Commit 0a8cb8e34149251ad1f280fe099a4f971554639a
Committed by
Linus Torvalds
1 parent
57cc083ad9
Exists in
master
and in
20 other branches
fs/proc: convert to kstrtoX()
Convert fs/proc/ from strict_strto*() to kstrto*() functions. Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 13 additions and 11 deletions Inline Diff
fs/proc/base.c
1 | /* | 1 | /* |
2 | * linux/fs/proc/base.c | 2 | * linux/fs/proc/base.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * | 5 | * |
6 | * proc base directory handling functions | 6 | * proc base directory handling functions |
7 | * | 7 | * |
8 | * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. | 8 | * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. |
9 | * Instead of using magical inumbers to determine the kind of object | 9 | * Instead of using magical inumbers to determine the kind of object |
10 | * we allocate and fill in-core inodes upon lookup. They don't even | 10 | * we allocate and fill in-core inodes upon lookup. They don't even |
11 | * go into icache. We cache the reference to task_struct upon lookup too. | 11 | * go into icache. We cache the reference to task_struct upon lookup too. |
12 | * Eventually it should become a filesystem in its own. We don't use the | 12 | * Eventually it should become a filesystem in its own. We don't use the |
13 | * rest of procfs anymore. | 13 | * rest of procfs anymore. |
14 | * | 14 | * |
15 | * | 15 | * |
16 | * Changelog: | 16 | * Changelog: |
17 | * 17-Jan-2005 | 17 | * 17-Jan-2005 |
18 | * Allan Bezerra | 18 | * Allan Bezerra |
19 | * Bruna Moreira <bruna.moreira@indt.org.br> | 19 | * Bruna Moreira <bruna.moreira@indt.org.br> |
20 | * Edjard Mota <edjard.mota@indt.org.br> | 20 | * Edjard Mota <edjard.mota@indt.org.br> |
21 | * Ilias Biris <ilias.biris@indt.org.br> | 21 | * Ilias Biris <ilias.biris@indt.org.br> |
22 | * Mauricio Lin <mauricio.lin@indt.org.br> | 22 | * Mauricio Lin <mauricio.lin@indt.org.br> |
23 | * | 23 | * |
24 | * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT | 24 | * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT |
25 | * | 25 | * |
26 | * A new process specific entry (smaps) included in /proc. It shows the | 26 | * A new process specific entry (smaps) included in /proc. It shows the |
27 | * size of rss for each memory area. The maps entry lacks information | 27 | * size of rss for each memory area. The maps entry lacks information |
28 | * about physical memory size (rss) for each mapped file, i.e., | 28 | * about physical memory size (rss) for each mapped file, i.e., |
29 | * rss information for executables and library files. | 29 | * rss information for executables and library files. |
30 | * This additional information is useful for any tools that need to know | 30 | * This additional information is useful for any tools that need to know |
31 | * about physical memory consumption for a process specific library. | 31 | * about physical memory consumption for a process specific library. |
32 | * | 32 | * |
33 | * Changelog: | 33 | * Changelog: |
34 | * 21-Feb-2005 | 34 | * 21-Feb-2005 |
35 | * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT | 35 | * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT |
36 | * Pud inclusion in the page table walking. | 36 | * Pud inclusion in the page table walking. |
37 | * | 37 | * |
38 | * ChangeLog: | 38 | * ChangeLog: |
39 | * 10-Mar-2005 | 39 | * 10-Mar-2005 |
40 | * 10LE Instituto Nokia de Tecnologia - INdT: | 40 | * 10LE Instituto Nokia de Tecnologia - INdT: |
41 | * A better way to walks through the page table as suggested by Hugh Dickins. | 41 | * A better way to walks through the page table as suggested by Hugh Dickins. |
42 | * | 42 | * |
43 | * Simo Piiroinen <simo.piiroinen@nokia.com>: | 43 | * Simo Piiroinen <simo.piiroinen@nokia.com>: |
44 | * Smaps information related to shared, private, clean and dirty pages. | 44 | * Smaps information related to shared, private, clean and dirty pages. |
45 | * | 45 | * |
46 | * Paul Mundt <paul.mundt@nokia.com>: | 46 | * Paul Mundt <paul.mundt@nokia.com>: |
47 | * Overall revision about smaps. | 47 | * Overall revision about smaps. |
48 | */ | 48 | */ |
49 | 49 | ||
50 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
51 | 51 | ||
52 | #include <linux/errno.h> | 52 | #include <linux/errno.h> |
53 | #include <linux/time.h> | 53 | #include <linux/time.h> |
54 | #include <linux/proc_fs.h> | 54 | #include <linux/proc_fs.h> |
55 | #include <linux/stat.h> | 55 | #include <linux/stat.h> |
56 | #include <linux/task_io_accounting_ops.h> | 56 | #include <linux/task_io_accounting_ops.h> |
57 | #include <linux/init.h> | 57 | #include <linux/init.h> |
58 | #include <linux/capability.h> | 58 | #include <linux/capability.h> |
59 | #include <linux/file.h> | 59 | #include <linux/file.h> |
60 | #include <linux/fdtable.h> | 60 | #include <linux/fdtable.h> |
61 | #include <linux/string.h> | 61 | #include <linux/string.h> |
62 | #include <linux/seq_file.h> | 62 | #include <linux/seq_file.h> |
63 | #include <linux/namei.h> | 63 | #include <linux/namei.h> |
64 | #include <linux/mnt_namespace.h> | 64 | #include <linux/mnt_namespace.h> |
65 | #include <linux/mm.h> | 65 | #include <linux/mm.h> |
66 | #include <linux/swap.h> | 66 | #include <linux/swap.h> |
67 | #include <linux/rcupdate.h> | 67 | #include <linux/rcupdate.h> |
68 | #include <linux/kallsyms.h> | 68 | #include <linux/kallsyms.h> |
69 | #include <linux/stacktrace.h> | 69 | #include <linux/stacktrace.h> |
70 | #include <linux/resource.h> | 70 | #include <linux/resource.h> |
71 | #include <linux/module.h> | 71 | #include <linux/module.h> |
72 | #include <linux/mount.h> | 72 | #include <linux/mount.h> |
73 | #include <linux/security.h> | 73 | #include <linux/security.h> |
74 | #include <linux/ptrace.h> | 74 | #include <linux/ptrace.h> |
75 | #include <linux/tracehook.h> | 75 | #include <linux/tracehook.h> |
76 | #include <linux/cgroup.h> | 76 | #include <linux/cgroup.h> |
77 | #include <linux/cpuset.h> | 77 | #include <linux/cpuset.h> |
78 | #include <linux/audit.h> | 78 | #include <linux/audit.h> |
79 | #include <linux/poll.h> | 79 | #include <linux/poll.h> |
80 | #include <linux/nsproxy.h> | 80 | #include <linux/nsproxy.h> |
81 | #include <linux/oom.h> | 81 | #include <linux/oom.h> |
82 | #include <linux/elf.h> | 82 | #include <linux/elf.h> |
83 | #include <linux/pid_namespace.h> | 83 | #include <linux/pid_namespace.h> |
84 | #include <linux/fs_struct.h> | 84 | #include <linux/fs_struct.h> |
85 | #include <linux/slab.h> | 85 | #include <linux/slab.h> |
86 | #include "internal.h" | 86 | #include "internal.h" |
87 | 87 | ||
88 | /* NOTE: | 88 | /* NOTE: |
89 | * Implementing inode permission operations in /proc is almost | 89 | * Implementing inode permission operations in /proc is almost |
90 | * certainly an error. Permission checks need to happen during | 90 | * certainly an error. Permission checks need to happen during |
91 | * each system call not at open time. The reason is that most of | 91 | * each system call not at open time. The reason is that most of |
92 | * what we wish to check for permissions in /proc varies at runtime. | 92 | * what we wish to check for permissions in /proc varies at runtime. |
93 | * | 93 | * |
94 | * The classic example of a problem is opening file descriptors | 94 | * The classic example of a problem is opening file descriptors |
95 | * in /proc for a task before it execs a suid executable. | 95 | * in /proc for a task before it execs a suid executable. |
96 | */ | 96 | */ |
97 | 97 | ||
98 | struct pid_entry { | 98 | struct pid_entry { |
99 | char *name; | 99 | char *name; |
100 | int len; | 100 | int len; |
101 | mode_t mode; | 101 | mode_t mode; |
102 | const struct inode_operations *iop; | 102 | const struct inode_operations *iop; |
103 | const struct file_operations *fop; | 103 | const struct file_operations *fop; |
104 | union proc_op op; | 104 | union proc_op op; |
105 | }; | 105 | }; |
106 | 106 | ||
107 | #define NOD(NAME, MODE, IOP, FOP, OP) { \ | 107 | #define NOD(NAME, MODE, IOP, FOP, OP) { \ |
108 | .name = (NAME), \ | 108 | .name = (NAME), \ |
109 | .len = sizeof(NAME) - 1, \ | 109 | .len = sizeof(NAME) - 1, \ |
110 | .mode = MODE, \ | 110 | .mode = MODE, \ |
111 | .iop = IOP, \ | 111 | .iop = IOP, \ |
112 | .fop = FOP, \ | 112 | .fop = FOP, \ |
113 | .op = OP, \ | 113 | .op = OP, \ |
114 | } | 114 | } |
115 | 115 | ||
116 | #define DIR(NAME, MODE, iops, fops) \ | 116 | #define DIR(NAME, MODE, iops, fops) \ |
117 | NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) | 117 | NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) |
118 | #define LNK(NAME, get_link) \ | 118 | #define LNK(NAME, get_link) \ |
119 | NOD(NAME, (S_IFLNK|S_IRWXUGO), \ | 119 | NOD(NAME, (S_IFLNK|S_IRWXUGO), \ |
120 | &proc_pid_link_inode_operations, NULL, \ | 120 | &proc_pid_link_inode_operations, NULL, \ |
121 | { .proc_get_link = get_link } ) | 121 | { .proc_get_link = get_link } ) |
122 | #define REG(NAME, MODE, fops) \ | 122 | #define REG(NAME, MODE, fops) \ |
123 | NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) | 123 | NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) |
124 | #define INF(NAME, MODE, read) \ | 124 | #define INF(NAME, MODE, read) \ |
125 | NOD(NAME, (S_IFREG|(MODE)), \ | 125 | NOD(NAME, (S_IFREG|(MODE)), \ |
126 | NULL, &proc_info_file_operations, \ | 126 | NULL, &proc_info_file_operations, \ |
127 | { .proc_read = read } ) | 127 | { .proc_read = read } ) |
128 | #define ONE(NAME, MODE, show) \ | 128 | #define ONE(NAME, MODE, show) \ |
129 | NOD(NAME, (S_IFREG|(MODE)), \ | 129 | NOD(NAME, (S_IFREG|(MODE)), \ |
130 | NULL, &proc_single_file_operations, \ | 130 | NULL, &proc_single_file_operations, \ |
131 | { .proc_show = show } ) | 131 | { .proc_show = show } ) |
132 | 132 | ||
133 | /* | 133 | /* |
134 | * Count the number of hardlinks for the pid_entry table, excluding the . | 134 | * Count the number of hardlinks for the pid_entry table, excluding the . |
135 | * and .. links. | 135 | * and .. links. |
136 | */ | 136 | */ |
137 | static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, | 137 | static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, |
138 | unsigned int n) | 138 | unsigned int n) |
139 | { | 139 | { |
140 | unsigned int i; | 140 | unsigned int i; |
141 | unsigned int count; | 141 | unsigned int count; |
142 | 142 | ||
143 | count = 0; | 143 | count = 0; |
144 | for (i = 0; i < n; ++i) { | 144 | for (i = 0; i < n; ++i) { |
145 | if (S_ISDIR(entries[i].mode)) | 145 | if (S_ISDIR(entries[i].mode)) |
146 | ++count; | 146 | ++count; |
147 | } | 147 | } |
148 | 148 | ||
149 | return count; | 149 | return count; |
150 | } | 150 | } |
151 | 151 | ||
152 | static int get_task_root(struct task_struct *task, struct path *root) | 152 | static int get_task_root(struct task_struct *task, struct path *root) |
153 | { | 153 | { |
154 | int result = -ENOENT; | 154 | int result = -ENOENT; |
155 | 155 | ||
156 | task_lock(task); | 156 | task_lock(task); |
157 | if (task->fs) { | 157 | if (task->fs) { |
158 | get_fs_root(task->fs, root); | 158 | get_fs_root(task->fs, root); |
159 | result = 0; | 159 | result = 0; |
160 | } | 160 | } |
161 | task_unlock(task); | 161 | task_unlock(task); |
162 | return result; | 162 | return result; |
163 | } | 163 | } |
164 | 164 | ||
165 | static int proc_cwd_link(struct inode *inode, struct path *path) | 165 | static int proc_cwd_link(struct inode *inode, struct path *path) |
166 | { | 166 | { |
167 | struct task_struct *task = get_proc_task(inode); | 167 | struct task_struct *task = get_proc_task(inode); |
168 | int result = -ENOENT; | 168 | int result = -ENOENT; |
169 | 169 | ||
170 | if (task) { | 170 | if (task) { |
171 | task_lock(task); | 171 | task_lock(task); |
172 | if (task->fs) { | 172 | if (task->fs) { |
173 | get_fs_pwd(task->fs, path); | 173 | get_fs_pwd(task->fs, path); |
174 | result = 0; | 174 | result = 0; |
175 | } | 175 | } |
176 | task_unlock(task); | 176 | task_unlock(task); |
177 | put_task_struct(task); | 177 | put_task_struct(task); |
178 | } | 178 | } |
179 | return result; | 179 | return result; |
180 | } | 180 | } |
181 | 181 | ||
182 | static int proc_root_link(struct inode *inode, struct path *path) | 182 | static int proc_root_link(struct inode *inode, struct path *path) |
183 | { | 183 | { |
184 | struct task_struct *task = get_proc_task(inode); | 184 | struct task_struct *task = get_proc_task(inode); |
185 | int result = -ENOENT; | 185 | int result = -ENOENT; |
186 | 186 | ||
187 | if (task) { | 187 | if (task) { |
188 | result = get_task_root(task, path); | 188 | result = get_task_root(task, path); |
189 | put_task_struct(task); | 189 | put_task_struct(task); |
190 | } | 190 | } |
191 | return result; | 191 | return result; |
192 | } | 192 | } |
193 | 193 | ||
194 | static struct mm_struct *__check_mem_permission(struct task_struct *task) | 194 | static struct mm_struct *__check_mem_permission(struct task_struct *task) |
195 | { | 195 | { |
196 | struct mm_struct *mm; | 196 | struct mm_struct *mm; |
197 | 197 | ||
198 | mm = get_task_mm(task); | 198 | mm = get_task_mm(task); |
199 | if (!mm) | 199 | if (!mm) |
200 | return ERR_PTR(-EINVAL); | 200 | return ERR_PTR(-EINVAL); |
201 | 201 | ||
202 | /* | 202 | /* |
203 | * A task can always look at itself, in case it chooses | 203 | * A task can always look at itself, in case it chooses |
204 | * to use system calls instead of load instructions. | 204 | * to use system calls instead of load instructions. |
205 | */ | 205 | */ |
206 | if (task == current) | 206 | if (task == current) |
207 | return mm; | 207 | return mm; |
208 | 208 | ||
209 | /* | 209 | /* |
210 | * If current is actively ptrace'ing, and would also be | 210 | * If current is actively ptrace'ing, and would also be |
211 | * permitted to freshly attach with ptrace now, permit it. | 211 | * permitted to freshly attach with ptrace now, permit it. |
212 | */ | 212 | */ |
213 | if (task_is_stopped_or_traced(task)) { | 213 | if (task_is_stopped_or_traced(task)) { |
214 | int match; | 214 | int match; |
215 | rcu_read_lock(); | 215 | rcu_read_lock(); |
216 | match = (tracehook_tracer_task(task) == current); | 216 | match = (tracehook_tracer_task(task) == current); |
217 | rcu_read_unlock(); | 217 | rcu_read_unlock(); |
218 | if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) | 218 | if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) |
219 | return mm; | 219 | return mm; |
220 | } | 220 | } |
221 | 221 | ||
222 | /* | 222 | /* |
223 | * No one else is allowed. | 223 | * No one else is allowed. |
224 | */ | 224 | */ |
225 | mmput(mm); | 225 | mmput(mm); |
226 | return ERR_PTR(-EPERM); | 226 | return ERR_PTR(-EPERM); |
227 | } | 227 | } |
228 | 228 | ||
229 | /* | 229 | /* |
230 | * If current may access user memory in @task return a reference to the | 230 | * If current may access user memory in @task return a reference to the |
231 | * corresponding mm, otherwise ERR_PTR. | 231 | * corresponding mm, otherwise ERR_PTR. |
232 | */ | 232 | */ |
233 | static struct mm_struct *check_mem_permission(struct task_struct *task) | 233 | static struct mm_struct *check_mem_permission(struct task_struct *task) |
234 | { | 234 | { |
235 | struct mm_struct *mm; | 235 | struct mm_struct *mm; |
236 | int err; | 236 | int err; |
237 | 237 | ||
238 | /* | 238 | /* |
239 | * Avoid racing if task exec's as we might get a new mm but validate | 239 | * Avoid racing if task exec's as we might get a new mm but validate |
240 | * against old credentials. | 240 | * against old credentials. |
241 | */ | 241 | */ |
242 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); | 242 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); |
243 | if (err) | 243 | if (err) |
244 | return ERR_PTR(err); | 244 | return ERR_PTR(err); |
245 | 245 | ||
246 | mm = __check_mem_permission(task); | 246 | mm = __check_mem_permission(task); |
247 | mutex_unlock(&task->signal->cred_guard_mutex); | 247 | mutex_unlock(&task->signal->cred_guard_mutex); |
248 | 248 | ||
249 | return mm; | 249 | return mm; |
250 | } | 250 | } |
251 | 251 | ||
252 | struct mm_struct *mm_for_maps(struct task_struct *task) | 252 | struct mm_struct *mm_for_maps(struct task_struct *task) |
253 | { | 253 | { |
254 | struct mm_struct *mm; | 254 | struct mm_struct *mm; |
255 | int err; | 255 | int err; |
256 | 256 | ||
257 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); | 257 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); |
258 | if (err) | 258 | if (err) |
259 | return ERR_PTR(err); | 259 | return ERR_PTR(err); |
260 | 260 | ||
261 | mm = get_task_mm(task); | 261 | mm = get_task_mm(task); |
262 | if (mm && mm != current->mm && | 262 | if (mm && mm != current->mm && |
263 | !ptrace_may_access(task, PTRACE_MODE_READ)) { | 263 | !ptrace_may_access(task, PTRACE_MODE_READ)) { |
264 | mmput(mm); | 264 | mmput(mm); |
265 | mm = ERR_PTR(-EACCES); | 265 | mm = ERR_PTR(-EACCES); |
266 | } | 266 | } |
267 | mutex_unlock(&task->signal->cred_guard_mutex); | 267 | mutex_unlock(&task->signal->cred_guard_mutex); |
268 | 268 | ||
269 | return mm; | 269 | return mm; |
270 | } | 270 | } |
271 | 271 | ||
272 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) | 272 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) |
273 | { | 273 | { |
274 | int res = 0; | 274 | int res = 0; |
275 | unsigned int len; | 275 | unsigned int len; |
276 | struct mm_struct *mm = get_task_mm(task); | 276 | struct mm_struct *mm = get_task_mm(task); |
277 | if (!mm) | 277 | if (!mm) |
278 | goto out; | 278 | goto out; |
279 | if (!mm->arg_end) | 279 | if (!mm->arg_end) |
280 | goto out_mm; /* Shh! No looking before we're done */ | 280 | goto out_mm; /* Shh! No looking before we're done */ |
281 | 281 | ||
282 | len = mm->arg_end - mm->arg_start; | 282 | len = mm->arg_end - mm->arg_start; |
283 | 283 | ||
284 | if (len > PAGE_SIZE) | 284 | if (len > PAGE_SIZE) |
285 | len = PAGE_SIZE; | 285 | len = PAGE_SIZE; |
286 | 286 | ||
287 | res = access_process_vm(task, mm->arg_start, buffer, len, 0); | 287 | res = access_process_vm(task, mm->arg_start, buffer, len, 0); |
288 | 288 | ||
289 | // If the nul at the end of args has been overwritten, then | 289 | // If the nul at the end of args has been overwritten, then |
290 | // assume application is using setproctitle(3). | 290 | // assume application is using setproctitle(3). |
291 | if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { | 291 | if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { |
292 | len = strnlen(buffer, res); | 292 | len = strnlen(buffer, res); |
293 | if (len < res) { | 293 | if (len < res) { |
294 | res = len; | 294 | res = len; |
295 | } else { | 295 | } else { |
296 | len = mm->env_end - mm->env_start; | 296 | len = mm->env_end - mm->env_start; |
297 | if (len > PAGE_SIZE - res) | 297 | if (len > PAGE_SIZE - res) |
298 | len = PAGE_SIZE - res; | 298 | len = PAGE_SIZE - res; |
299 | res += access_process_vm(task, mm->env_start, buffer+res, len, 0); | 299 | res += access_process_vm(task, mm->env_start, buffer+res, len, 0); |
300 | res = strnlen(buffer, res); | 300 | res = strnlen(buffer, res); |
301 | } | 301 | } |
302 | } | 302 | } |
303 | out_mm: | 303 | out_mm: |
304 | mmput(mm); | 304 | mmput(mm); |
305 | out: | 305 | out: |
306 | return res; | 306 | return res; |
307 | } | 307 | } |
308 | 308 | ||
309 | static int proc_pid_auxv(struct task_struct *task, char *buffer) | 309 | static int proc_pid_auxv(struct task_struct *task, char *buffer) |
310 | { | 310 | { |
311 | struct mm_struct *mm = mm_for_maps(task); | 311 | struct mm_struct *mm = mm_for_maps(task); |
312 | int res = PTR_ERR(mm); | 312 | int res = PTR_ERR(mm); |
313 | if (mm && !IS_ERR(mm)) { | 313 | if (mm && !IS_ERR(mm)) { |
314 | unsigned int nwords = 0; | 314 | unsigned int nwords = 0; |
315 | do { | 315 | do { |
316 | nwords += 2; | 316 | nwords += 2; |
317 | } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ | 317 | } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ |
318 | res = nwords * sizeof(mm->saved_auxv[0]); | 318 | res = nwords * sizeof(mm->saved_auxv[0]); |
319 | if (res > PAGE_SIZE) | 319 | if (res > PAGE_SIZE) |
320 | res = PAGE_SIZE; | 320 | res = PAGE_SIZE; |
321 | memcpy(buffer, mm->saved_auxv, res); | 321 | memcpy(buffer, mm->saved_auxv, res); |
322 | mmput(mm); | 322 | mmput(mm); |
323 | } | 323 | } |
324 | return res; | 324 | return res; |
325 | } | 325 | } |
326 | 326 | ||
327 | 327 | ||
328 | #ifdef CONFIG_KALLSYMS | 328 | #ifdef CONFIG_KALLSYMS |
329 | /* | 329 | /* |
330 | * Provides a wchan file via kallsyms in a proper one-value-per-file format. | 330 | * Provides a wchan file via kallsyms in a proper one-value-per-file format. |
331 | * Returns the resolved symbol. If that fails, simply return the address. | 331 | * Returns the resolved symbol. If that fails, simply return the address. |
332 | */ | 332 | */ |
333 | static int proc_pid_wchan(struct task_struct *task, char *buffer) | 333 | static int proc_pid_wchan(struct task_struct *task, char *buffer) |
334 | { | 334 | { |
335 | unsigned long wchan; | 335 | unsigned long wchan; |
336 | char symname[KSYM_NAME_LEN]; | 336 | char symname[KSYM_NAME_LEN]; |
337 | 337 | ||
338 | wchan = get_wchan(task); | 338 | wchan = get_wchan(task); |
339 | 339 | ||
340 | if (lookup_symbol_name(wchan, symname) < 0) | 340 | if (lookup_symbol_name(wchan, symname) < 0) |
341 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 341 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
342 | return 0; | 342 | return 0; |
343 | else | 343 | else |
344 | return sprintf(buffer, "%lu", wchan); | 344 | return sprintf(buffer, "%lu", wchan); |
345 | else | 345 | else |
346 | return sprintf(buffer, "%s", symname); | 346 | return sprintf(buffer, "%s", symname); |
347 | } | 347 | } |
348 | #endif /* CONFIG_KALLSYMS */ | 348 | #endif /* CONFIG_KALLSYMS */ |
349 | 349 | ||
350 | static int lock_trace(struct task_struct *task) | 350 | static int lock_trace(struct task_struct *task) |
351 | { | 351 | { |
352 | int err = mutex_lock_killable(&task->signal->cred_guard_mutex); | 352 | int err = mutex_lock_killable(&task->signal->cred_guard_mutex); |
353 | if (err) | 353 | if (err) |
354 | return err; | 354 | return err; |
355 | if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { | 355 | if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { |
356 | mutex_unlock(&task->signal->cred_guard_mutex); | 356 | mutex_unlock(&task->signal->cred_guard_mutex); |
357 | return -EPERM; | 357 | return -EPERM; |
358 | } | 358 | } |
359 | return 0; | 359 | return 0; |
360 | } | 360 | } |
361 | 361 | ||
362 | static void unlock_trace(struct task_struct *task) | 362 | static void unlock_trace(struct task_struct *task) |
363 | { | 363 | { |
364 | mutex_unlock(&task->signal->cred_guard_mutex); | 364 | mutex_unlock(&task->signal->cred_guard_mutex); |
365 | } | 365 | } |
366 | 366 | ||
367 | #ifdef CONFIG_STACKTRACE | 367 | #ifdef CONFIG_STACKTRACE |
368 | 368 | ||
369 | #define MAX_STACK_TRACE_DEPTH 64 | 369 | #define MAX_STACK_TRACE_DEPTH 64 |
370 | 370 | ||
371 | static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, | 371 | static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, |
372 | struct pid *pid, struct task_struct *task) | 372 | struct pid *pid, struct task_struct *task) |
373 | { | 373 | { |
374 | struct stack_trace trace; | 374 | struct stack_trace trace; |
375 | unsigned long *entries; | 375 | unsigned long *entries; |
376 | int err; | 376 | int err; |
377 | int i; | 377 | int i; |
378 | 378 | ||
379 | entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); | 379 | entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); |
380 | if (!entries) | 380 | if (!entries) |
381 | return -ENOMEM; | 381 | return -ENOMEM; |
382 | 382 | ||
383 | trace.nr_entries = 0; | 383 | trace.nr_entries = 0; |
384 | trace.max_entries = MAX_STACK_TRACE_DEPTH; | 384 | trace.max_entries = MAX_STACK_TRACE_DEPTH; |
385 | trace.entries = entries; | 385 | trace.entries = entries; |
386 | trace.skip = 0; | 386 | trace.skip = 0; |
387 | 387 | ||
388 | err = lock_trace(task); | 388 | err = lock_trace(task); |
389 | if (!err) { | 389 | if (!err) { |
390 | save_stack_trace_tsk(task, &trace); | 390 | save_stack_trace_tsk(task, &trace); |
391 | 391 | ||
392 | for (i = 0; i < trace.nr_entries; i++) { | 392 | for (i = 0; i < trace.nr_entries; i++) { |
393 | seq_printf(m, "[<%pK>] %pS\n", | 393 | seq_printf(m, "[<%pK>] %pS\n", |
394 | (void *)entries[i], (void *)entries[i]); | 394 | (void *)entries[i], (void *)entries[i]); |
395 | } | 395 | } |
396 | unlock_trace(task); | 396 | unlock_trace(task); |
397 | } | 397 | } |
398 | kfree(entries); | 398 | kfree(entries); |
399 | 399 | ||
400 | return err; | 400 | return err; |
401 | } | 401 | } |
402 | #endif | 402 | #endif |
403 | 403 | ||
404 | #ifdef CONFIG_SCHEDSTATS | 404 | #ifdef CONFIG_SCHEDSTATS |
405 | /* | 405 | /* |
406 | * Provides /proc/PID/schedstat | 406 | * Provides /proc/PID/schedstat |
407 | */ | 407 | */ |
408 | static int proc_pid_schedstat(struct task_struct *task, char *buffer) | 408 | static int proc_pid_schedstat(struct task_struct *task, char *buffer) |
409 | { | 409 | { |
410 | return sprintf(buffer, "%llu %llu %lu\n", | 410 | return sprintf(buffer, "%llu %llu %lu\n", |
411 | (unsigned long long)task->se.sum_exec_runtime, | 411 | (unsigned long long)task->se.sum_exec_runtime, |
412 | (unsigned long long)task->sched_info.run_delay, | 412 | (unsigned long long)task->sched_info.run_delay, |
413 | task->sched_info.pcount); | 413 | task->sched_info.pcount); |
414 | } | 414 | } |
415 | #endif | 415 | #endif |
416 | 416 | ||
417 | #ifdef CONFIG_LATENCYTOP | 417 | #ifdef CONFIG_LATENCYTOP |
418 | static int lstats_show_proc(struct seq_file *m, void *v) | 418 | static int lstats_show_proc(struct seq_file *m, void *v) |
419 | { | 419 | { |
420 | int i; | 420 | int i; |
421 | struct inode *inode = m->private; | 421 | struct inode *inode = m->private; |
422 | struct task_struct *task = get_proc_task(inode); | 422 | struct task_struct *task = get_proc_task(inode); |
423 | 423 | ||
424 | if (!task) | 424 | if (!task) |
425 | return -ESRCH; | 425 | return -ESRCH; |
426 | seq_puts(m, "Latency Top version : v0.1\n"); | 426 | seq_puts(m, "Latency Top version : v0.1\n"); |
427 | for (i = 0; i < 32; i++) { | 427 | for (i = 0; i < 32; i++) { |
428 | struct latency_record *lr = &task->latency_record[i]; | 428 | struct latency_record *lr = &task->latency_record[i]; |
429 | if (lr->backtrace[0]) { | 429 | if (lr->backtrace[0]) { |
430 | int q; | 430 | int q; |
431 | seq_printf(m, "%i %li %li", | 431 | seq_printf(m, "%i %li %li", |
432 | lr->count, lr->time, lr->max); | 432 | lr->count, lr->time, lr->max); |
433 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { | 433 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { |
434 | unsigned long bt = lr->backtrace[q]; | 434 | unsigned long bt = lr->backtrace[q]; |
435 | if (!bt) | 435 | if (!bt) |
436 | break; | 436 | break; |
437 | if (bt == ULONG_MAX) | 437 | if (bt == ULONG_MAX) |
438 | break; | 438 | break; |
439 | seq_printf(m, " %ps", (void *)bt); | 439 | seq_printf(m, " %ps", (void *)bt); |
440 | } | 440 | } |
441 | seq_putc(m, '\n'); | 441 | seq_putc(m, '\n'); |
442 | } | 442 | } |
443 | 443 | ||
444 | } | 444 | } |
445 | put_task_struct(task); | 445 | put_task_struct(task); |
446 | return 0; | 446 | return 0; |
447 | } | 447 | } |
448 | 448 | ||
449 | static int lstats_open(struct inode *inode, struct file *file) | 449 | static int lstats_open(struct inode *inode, struct file *file) |
450 | { | 450 | { |
451 | return single_open(file, lstats_show_proc, inode); | 451 | return single_open(file, lstats_show_proc, inode); |
452 | } | 452 | } |
453 | 453 | ||
454 | static ssize_t lstats_write(struct file *file, const char __user *buf, | 454 | static ssize_t lstats_write(struct file *file, const char __user *buf, |
455 | size_t count, loff_t *offs) | 455 | size_t count, loff_t *offs) |
456 | { | 456 | { |
457 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); | 457 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
458 | 458 | ||
459 | if (!task) | 459 | if (!task) |
460 | return -ESRCH; | 460 | return -ESRCH; |
461 | clear_all_latency_tracing(task); | 461 | clear_all_latency_tracing(task); |
462 | put_task_struct(task); | 462 | put_task_struct(task); |
463 | 463 | ||
464 | return count; | 464 | return count; |
465 | } | 465 | } |
466 | 466 | ||
467 | static const struct file_operations proc_lstats_operations = { | 467 | static const struct file_operations proc_lstats_operations = { |
468 | .open = lstats_open, | 468 | .open = lstats_open, |
469 | .read = seq_read, | 469 | .read = seq_read, |
470 | .write = lstats_write, | 470 | .write = lstats_write, |
471 | .llseek = seq_lseek, | 471 | .llseek = seq_lseek, |
472 | .release = single_release, | 472 | .release = single_release, |
473 | }; | 473 | }; |
474 | 474 | ||
475 | #endif | 475 | #endif |
476 | 476 | ||
477 | static int proc_oom_score(struct task_struct *task, char *buffer) | 477 | static int proc_oom_score(struct task_struct *task, char *buffer) |
478 | { | 478 | { |
479 | unsigned long points = 0; | 479 | unsigned long points = 0; |
480 | 480 | ||
481 | read_lock(&tasklist_lock); | 481 | read_lock(&tasklist_lock); |
482 | if (pid_alive(task)) | 482 | if (pid_alive(task)) |
483 | points = oom_badness(task, NULL, NULL, | 483 | points = oom_badness(task, NULL, NULL, |
484 | totalram_pages + total_swap_pages); | 484 | totalram_pages + total_swap_pages); |
485 | read_unlock(&tasklist_lock); | 485 | read_unlock(&tasklist_lock); |
486 | return sprintf(buffer, "%lu\n", points); | 486 | return sprintf(buffer, "%lu\n", points); |
487 | } | 487 | } |
488 | 488 | ||
489 | struct limit_names { | 489 | struct limit_names { |
490 | char *name; | 490 | char *name; |
491 | char *unit; | 491 | char *unit; |
492 | }; | 492 | }; |
493 | 493 | ||
494 | static const struct limit_names lnames[RLIM_NLIMITS] = { | 494 | static const struct limit_names lnames[RLIM_NLIMITS] = { |
495 | [RLIMIT_CPU] = {"Max cpu time", "seconds"}, | 495 | [RLIMIT_CPU] = {"Max cpu time", "seconds"}, |
496 | [RLIMIT_FSIZE] = {"Max file size", "bytes"}, | 496 | [RLIMIT_FSIZE] = {"Max file size", "bytes"}, |
497 | [RLIMIT_DATA] = {"Max data size", "bytes"}, | 497 | [RLIMIT_DATA] = {"Max data size", "bytes"}, |
498 | [RLIMIT_STACK] = {"Max stack size", "bytes"}, | 498 | [RLIMIT_STACK] = {"Max stack size", "bytes"}, |
499 | [RLIMIT_CORE] = {"Max core file size", "bytes"}, | 499 | [RLIMIT_CORE] = {"Max core file size", "bytes"}, |
500 | [RLIMIT_RSS] = {"Max resident set", "bytes"}, | 500 | [RLIMIT_RSS] = {"Max resident set", "bytes"}, |
501 | [RLIMIT_NPROC] = {"Max processes", "processes"}, | 501 | [RLIMIT_NPROC] = {"Max processes", "processes"}, |
502 | [RLIMIT_NOFILE] = {"Max open files", "files"}, | 502 | [RLIMIT_NOFILE] = {"Max open files", "files"}, |
503 | [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, | 503 | [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, |
504 | [RLIMIT_AS] = {"Max address space", "bytes"}, | 504 | [RLIMIT_AS] = {"Max address space", "bytes"}, |
505 | [RLIMIT_LOCKS] = {"Max file locks", "locks"}, | 505 | [RLIMIT_LOCKS] = {"Max file locks", "locks"}, |
506 | [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, | 506 | [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, |
507 | [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, | 507 | [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, |
508 | [RLIMIT_NICE] = {"Max nice priority", NULL}, | 508 | [RLIMIT_NICE] = {"Max nice priority", NULL}, |
509 | [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, | 509 | [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, |
510 | [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, | 510 | [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, |
511 | }; | 511 | }; |
512 | 512 | ||
513 | /* Display limits for a process */ | 513 | /* Display limits for a process */ |
514 | static int proc_pid_limits(struct task_struct *task, char *buffer) | 514 | static int proc_pid_limits(struct task_struct *task, char *buffer) |
515 | { | 515 | { |
516 | unsigned int i; | 516 | unsigned int i; |
517 | int count = 0; | 517 | int count = 0; |
518 | unsigned long flags; | 518 | unsigned long flags; |
519 | char *bufptr = buffer; | 519 | char *bufptr = buffer; |
520 | 520 | ||
521 | struct rlimit rlim[RLIM_NLIMITS]; | 521 | struct rlimit rlim[RLIM_NLIMITS]; |
522 | 522 | ||
523 | if (!lock_task_sighand(task, &flags)) | 523 | if (!lock_task_sighand(task, &flags)) |
524 | return 0; | 524 | return 0; |
525 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); | 525 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); |
526 | unlock_task_sighand(task, &flags); | 526 | unlock_task_sighand(task, &flags); |
527 | 527 | ||
528 | /* | 528 | /* |
529 | * print the file header | 529 | * print the file header |
530 | */ | 530 | */ |
531 | count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", | 531 | count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", |
532 | "Limit", "Soft Limit", "Hard Limit", "Units"); | 532 | "Limit", "Soft Limit", "Hard Limit", "Units"); |
533 | 533 | ||
534 | for (i = 0; i < RLIM_NLIMITS; i++) { | 534 | for (i = 0; i < RLIM_NLIMITS; i++) { |
535 | if (rlim[i].rlim_cur == RLIM_INFINITY) | 535 | if (rlim[i].rlim_cur == RLIM_INFINITY) |
536 | count += sprintf(&bufptr[count], "%-25s %-20s ", | 536 | count += sprintf(&bufptr[count], "%-25s %-20s ", |
537 | lnames[i].name, "unlimited"); | 537 | lnames[i].name, "unlimited"); |
538 | else | 538 | else |
539 | count += sprintf(&bufptr[count], "%-25s %-20lu ", | 539 | count += sprintf(&bufptr[count], "%-25s %-20lu ", |
540 | lnames[i].name, rlim[i].rlim_cur); | 540 | lnames[i].name, rlim[i].rlim_cur); |
541 | 541 | ||
542 | if (rlim[i].rlim_max == RLIM_INFINITY) | 542 | if (rlim[i].rlim_max == RLIM_INFINITY) |
543 | count += sprintf(&bufptr[count], "%-20s ", "unlimited"); | 543 | count += sprintf(&bufptr[count], "%-20s ", "unlimited"); |
544 | else | 544 | else |
545 | count += sprintf(&bufptr[count], "%-20lu ", | 545 | count += sprintf(&bufptr[count], "%-20lu ", |
546 | rlim[i].rlim_max); | 546 | rlim[i].rlim_max); |
547 | 547 | ||
548 | if (lnames[i].unit) | 548 | if (lnames[i].unit) |
549 | count += sprintf(&bufptr[count], "%-10s\n", | 549 | count += sprintf(&bufptr[count], "%-10s\n", |
550 | lnames[i].unit); | 550 | lnames[i].unit); |
551 | else | 551 | else |
552 | count += sprintf(&bufptr[count], "\n"); | 552 | count += sprintf(&bufptr[count], "\n"); |
553 | } | 553 | } |
554 | 554 | ||
555 | return count; | 555 | return count; |
556 | } | 556 | } |
557 | 557 | ||
558 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 558 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
559 | static int proc_pid_syscall(struct task_struct *task, char *buffer) | 559 | static int proc_pid_syscall(struct task_struct *task, char *buffer) |
560 | { | 560 | { |
561 | long nr; | 561 | long nr; |
562 | unsigned long args[6], sp, pc; | 562 | unsigned long args[6], sp, pc; |
563 | int res = lock_trace(task); | 563 | int res = lock_trace(task); |
564 | if (res) | 564 | if (res) |
565 | return res; | 565 | return res; |
566 | 566 | ||
567 | if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) | 567 | if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) |
568 | res = sprintf(buffer, "running\n"); | 568 | res = sprintf(buffer, "running\n"); |
569 | else if (nr < 0) | 569 | else if (nr < 0) |
570 | res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); | 570 | res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); |
571 | else | 571 | else |
572 | res = sprintf(buffer, | 572 | res = sprintf(buffer, |
573 | "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", | 573 | "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", |
574 | nr, | 574 | nr, |
575 | args[0], args[1], args[2], args[3], args[4], args[5], | 575 | args[0], args[1], args[2], args[3], args[4], args[5], |
576 | sp, pc); | 576 | sp, pc); |
577 | unlock_trace(task); | 577 | unlock_trace(task); |
578 | return res; | 578 | return res; |
579 | } | 579 | } |
580 | #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ | 580 | #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ |
581 | 581 | ||
582 | /************************************************************************/ | 582 | /************************************************************************/ |
583 | /* Here the fs part begins */ | 583 | /* Here the fs part begins */ |
584 | /************************************************************************/ | 584 | /************************************************************************/ |
585 | 585 | ||
586 | /* permission checks */ | 586 | /* permission checks */ |
587 | static int proc_fd_access_allowed(struct inode *inode) | 587 | static int proc_fd_access_allowed(struct inode *inode) |
588 | { | 588 | { |
589 | struct task_struct *task; | 589 | struct task_struct *task; |
590 | int allowed = 0; | 590 | int allowed = 0; |
591 | /* Allow access to a task's file descriptors if it is us or we | 591 | /* Allow access to a task's file descriptors if it is us or we |
592 | * may use ptrace attach to the process and find out that | 592 | * may use ptrace attach to the process and find out that |
593 | * information. | 593 | * information. |
594 | */ | 594 | */ |
595 | task = get_proc_task(inode); | 595 | task = get_proc_task(inode); |
596 | if (task) { | 596 | if (task) { |
597 | allowed = ptrace_may_access(task, PTRACE_MODE_READ); | 597 | allowed = ptrace_may_access(task, PTRACE_MODE_READ); |
598 | put_task_struct(task); | 598 | put_task_struct(task); |
599 | } | 599 | } |
600 | return allowed; | 600 | return allowed; |
601 | } | 601 | } |
602 | 602 | ||
603 | int proc_setattr(struct dentry *dentry, struct iattr *attr) | 603 | int proc_setattr(struct dentry *dentry, struct iattr *attr) |
604 | { | 604 | { |
605 | int error; | 605 | int error; |
606 | struct inode *inode = dentry->d_inode; | 606 | struct inode *inode = dentry->d_inode; |
607 | 607 | ||
608 | if (attr->ia_valid & ATTR_MODE) | 608 | if (attr->ia_valid & ATTR_MODE) |
609 | return -EPERM; | 609 | return -EPERM; |
610 | 610 | ||
611 | error = inode_change_ok(inode, attr); | 611 | error = inode_change_ok(inode, attr); |
612 | if (error) | 612 | if (error) |
613 | return error; | 613 | return error; |
614 | 614 | ||
615 | if ((attr->ia_valid & ATTR_SIZE) && | 615 | if ((attr->ia_valid & ATTR_SIZE) && |
616 | attr->ia_size != i_size_read(inode)) { | 616 | attr->ia_size != i_size_read(inode)) { |
617 | error = vmtruncate(inode, attr->ia_size); | 617 | error = vmtruncate(inode, attr->ia_size); |
618 | if (error) | 618 | if (error) |
619 | return error; | 619 | return error; |
620 | } | 620 | } |
621 | 621 | ||
622 | setattr_copy(inode, attr); | 622 | setattr_copy(inode, attr); |
623 | mark_inode_dirty(inode); | 623 | mark_inode_dirty(inode); |
624 | return 0; | 624 | return 0; |
625 | } | 625 | } |
626 | 626 | ||
627 | static const struct inode_operations proc_def_inode_operations = { | 627 | static const struct inode_operations proc_def_inode_operations = { |
628 | .setattr = proc_setattr, | 628 | .setattr = proc_setattr, |
629 | }; | 629 | }; |
630 | 630 | ||
631 | static int mounts_open_common(struct inode *inode, struct file *file, | 631 | static int mounts_open_common(struct inode *inode, struct file *file, |
632 | const struct seq_operations *op) | 632 | const struct seq_operations *op) |
633 | { | 633 | { |
634 | struct task_struct *task = get_proc_task(inode); | 634 | struct task_struct *task = get_proc_task(inode); |
635 | struct nsproxy *nsp; | 635 | struct nsproxy *nsp; |
636 | struct mnt_namespace *ns = NULL; | 636 | struct mnt_namespace *ns = NULL; |
637 | struct path root; | 637 | struct path root; |
638 | struct proc_mounts *p; | 638 | struct proc_mounts *p; |
639 | int ret = -EINVAL; | 639 | int ret = -EINVAL; |
640 | 640 | ||
641 | if (task) { | 641 | if (task) { |
642 | rcu_read_lock(); | 642 | rcu_read_lock(); |
643 | nsp = task_nsproxy(task); | 643 | nsp = task_nsproxy(task); |
644 | if (nsp) { | 644 | if (nsp) { |
645 | ns = nsp->mnt_ns; | 645 | ns = nsp->mnt_ns; |
646 | if (ns) | 646 | if (ns) |
647 | get_mnt_ns(ns); | 647 | get_mnt_ns(ns); |
648 | } | 648 | } |
649 | rcu_read_unlock(); | 649 | rcu_read_unlock(); |
650 | if (ns && get_task_root(task, &root) == 0) | 650 | if (ns && get_task_root(task, &root) == 0) |
651 | ret = 0; | 651 | ret = 0; |
652 | put_task_struct(task); | 652 | put_task_struct(task); |
653 | } | 653 | } |
654 | 654 | ||
655 | if (!ns) | 655 | if (!ns) |
656 | goto err; | 656 | goto err; |
657 | if (ret) | 657 | if (ret) |
658 | goto err_put_ns; | 658 | goto err_put_ns; |
659 | 659 | ||
660 | ret = -ENOMEM; | 660 | ret = -ENOMEM; |
661 | p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); | 661 | p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); |
662 | if (!p) | 662 | if (!p) |
663 | goto err_put_path; | 663 | goto err_put_path; |
664 | 664 | ||
665 | file->private_data = &p->m; | 665 | file->private_data = &p->m; |
666 | ret = seq_open(file, op); | 666 | ret = seq_open(file, op); |
667 | if (ret) | 667 | if (ret) |
668 | goto err_free; | 668 | goto err_free; |
669 | 669 | ||
670 | p->m.private = p; | 670 | p->m.private = p; |
671 | p->ns = ns; | 671 | p->ns = ns; |
672 | p->root = root; | 672 | p->root = root; |
673 | p->event = ns->event; | 673 | p->event = ns->event; |
674 | 674 | ||
675 | return 0; | 675 | return 0; |
676 | 676 | ||
677 | err_free: | 677 | err_free: |
678 | kfree(p); | 678 | kfree(p); |
679 | err_put_path: | 679 | err_put_path: |
680 | path_put(&root); | 680 | path_put(&root); |
681 | err_put_ns: | 681 | err_put_ns: |
682 | put_mnt_ns(ns); | 682 | put_mnt_ns(ns); |
683 | err: | 683 | err: |
684 | return ret; | 684 | return ret; |
685 | } | 685 | } |
686 | 686 | ||
687 | static int mounts_release(struct inode *inode, struct file *file) | 687 | static int mounts_release(struct inode *inode, struct file *file) |
688 | { | 688 | { |
689 | struct proc_mounts *p = file->private_data; | 689 | struct proc_mounts *p = file->private_data; |
690 | path_put(&p->root); | 690 | path_put(&p->root); |
691 | put_mnt_ns(p->ns); | 691 | put_mnt_ns(p->ns); |
692 | return seq_release(inode, file); | 692 | return seq_release(inode, file); |
693 | } | 693 | } |
694 | 694 | ||
695 | static unsigned mounts_poll(struct file *file, poll_table *wait) | 695 | static unsigned mounts_poll(struct file *file, poll_table *wait) |
696 | { | 696 | { |
697 | struct proc_mounts *p = file->private_data; | 697 | struct proc_mounts *p = file->private_data; |
698 | unsigned res = POLLIN | POLLRDNORM; | 698 | unsigned res = POLLIN | POLLRDNORM; |
699 | 699 | ||
700 | poll_wait(file, &p->ns->poll, wait); | 700 | poll_wait(file, &p->ns->poll, wait); |
701 | if (mnt_had_events(p)) | 701 | if (mnt_had_events(p)) |
702 | res |= POLLERR | POLLPRI; | 702 | res |= POLLERR | POLLPRI; |
703 | 703 | ||
704 | return res; | 704 | return res; |
705 | } | 705 | } |
706 | 706 | ||
707 | static int mounts_open(struct inode *inode, struct file *file) | 707 | static int mounts_open(struct inode *inode, struct file *file) |
708 | { | 708 | { |
709 | return mounts_open_common(inode, file, &mounts_op); | 709 | return mounts_open_common(inode, file, &mounts_op); |
710 | } | 710 | } |
711 | 711 | ||
712 | static const struct file_operations proc_mounts_operations = { | 712 | static const struct file_operations proc_mounts_operations = { |
713 | .open = mounts_open, | 713 | .open = mounts_open, |
714 | .read = seq_read, | 714 | .read = seq_read, |
715 | .llseek = seq_lseek, | 715 | .llseek = seq_lseek, |
716 | .release = mounts_release, | 716 | .release = mounts_release, |
717 | .poll = mounts_poll, | 717 | .poll = mounts_poll, |
718 | }; | 718 | }; |
719 | 719 | ||
720 | static int mountinfo_open(struct inode *inode, struct file *file) | 720 | static int mountinfo_open(struct inode *inode, struct file *file) |
721 | { | 721 | { |
722 | return mounts_open_common(inode, file, &mountinfo_op); | 722 | return mounts_open_common(inode, file, &mountinfo_op); |
723 | } | 723 | } |
724 | 724 | ||
725 | static const struct file_operations proc_mountinfo_operations = { | 725 | static const struct file_operations proc_mountinfo_operations = { |
726 | .open = mountinfo_open, | 726 | .open = mountinfo_open, |
727 | .read = seq_read, | 727 | .read = seq_read, |
728 | .llseek = seq_lseek, | 728 | .llseek = seq_lseek, |
729 | .release = mounts_release, | 729 | .release = mounts_release, |
730 | .poll = mounts_poll, | 730 | .poll = mounts_poll, |
731 | }; | 731 | }; |
732 | 732 | ||
733 | static int mountstats_open(struct inode *inode, struct file *file) | 733 | static int mountstats_open(struct inode *inode, struct file *file) |
734 | { | 734 | { |
735 | return mounts_open_common(inode, file, &mountstats_op); | 735 | return mounts_open_common(inode, file, &mountstats_op); |
736 | } | 736 | } |
737 | 737 | ||
738 | static const struct file_operations proc_mountstats_operations = { | 738 | static const struct file_operations proc_mountstats_operations = { |
739 | .open = mountstats_open, | 739 | .open = mountstats_open, |
740 | .read = seq_read, | 740 | .read = seq_read, |
741 | .llseek = seq_lseek, | 741 | .llseek = seq_lseek, |
742 | .release = mounts_release, | 742 | .release = mounts_release, |
743 | }; | 743 | }; |
744 | 744 | ||
745 | #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ | 745 | #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ |
746 | 746 | ||
747 | static ssize_t proc_info_read(struct file * file, char __user * buf, | 747 | static ssize_t proc_info_read(struct file * file, char __user * buf, |
748 | size_t count, loff_t *ppos) | 748 | size_t count, loff_t *ppos) |
749 | { | 749 | { |
750 | struct inode * inode = file->f_path.dentry->d_inode; | 750 | struct inode * inode = file->f_path.dentry->d_inode; |
751 | unsigned long page; | 751 | unsigned long page; |
752 | ssize_t length; | 752 | ssize_t length; |
753 | struct task_struct *task = get_proc_task(inode); | 753 | struct task_struct *task = get_proc_task(inode); |
754 | 754 | ||
755 | length = -ESRCH; | 755 | length = -ESRCH; |
756 | if (!task) | 756 | if (!task) |
757 | goto out_no_task; | 757 | goto out_no_task; |
758 | 758 | ||
759 | if (count > PROC_BLOCK_SIZE) | 759 | if (count > PROC_BLOCK_SIZE) |
760 | count = PROC_BLOCK_SIZE; | 760 | count = PROC_BLOCK_SIZE; |
761 | 761 | ||
762 | length = -ENOMEM; | 762 | length = -ENOMEM; |
763 | if (!(page = __get_free_page(GFP_TEMPORARY))) | 763 | if (!(page = __get_free_page(GFP_TEMPORARY))) |
764 | goto out; | 764 | goto out; |
765 | 765 | ||
766 | length = PROC_I(inode)->op.proc_read(task, (char*)page); | 766 | length = PROC_I(inode)->op.proc_read(task, (char*)page); |
767 | 767 | ||
768 | if (length >= 0) | 768 | if (length >= 0) |
769 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 769 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
770 | free_page(page); | 770 | free_page(page); |
771 | out: | 771 | out: |
772 | put_task_struct(task); | 772 | put_task_struct(task); |
773 | out_no_task: | 773 | out_no_task: |
774 | return length; | 774 | return length; |
775 | } | 775 | } |
776 | 776 | ||
777 | static const struct file_operations proc_info_file_operations = { | 777 | static const struct file_operations proc_info_file_operations = { |
778 | .read = proc_info_read, | 778 | .read = proc_info_read, |
779 | .llseek = generic_file_llseek, | 779 | .llseek = generic_file_llseek, |
780 | }; | 780 | }; |
781 | 781 | ||
782 | static int proc_single_show(struct seq_file *m, void *v) | 782 | static int proc_single_show(struct seq_file *m, void *v) |
783 | { | 783 | { |
784 | struct inode *inode = m->private; | 784 | struct inode *inode = m->private; |
785 | struct pid_namespace *ns; | 785 | struct pid_namespace *ns; |
786 | struct pid *pid; | 786 | struct pid *pid; |
787 | struct task_struct *task; | 787 | struct task_struct *task; |
788 | int ret; | 788 | int ret; |
789 | 789 | ||
790 | ns = inode->i_sb->s_fs_info; | 790 | ns = inode->i_sb->s_fs_info; |
791 | pid = proc_pid(inode); | 791 | pid = proc_pid(inode); |
792 | task = get_pid_task(pid, PIDTYPE_PID); | 792 | task = get_pid_task(pid, PIDTYPE_PID); |
793 | if (!task) | 793 | if (!task) |
794 | return -ESRCH; | 794 | return -ESRCH; |
795 | 795 | ||
796 | ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); | 796 | ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); |
797 | 797 | ||
798 | put_task_struct(task); | 798 | put_task_struct(task); |
799 | return ret; | 799 | return ret; |
800 | } | 800 | } |
801 | 801 | ||
802 | static int proc_single_open(struct inode *inode, struct file *filp) | 802 | static int proc_single_open(struct inode *inode, struct file *filp) |
803 | { | 803 | { |
804 | return single_open(filp, proc_single_show, inode); | 804 | return single_open(filp, proc_single_show, inode); |
805 | } | 805 | } |
806 | 806 | ||
807 | static const struct file_operations proc_single_file_operations = { | 807 | static const struct file_operations proc_single_file_operations = { |
808 | .open = proc_single_open, | 808 | .open = proc_single_open, |
809 | .read = seq_read, | 809 | .read = seq_read, |
810 | .llseek = seq_lseek, | 810 | .llseek = seq_lseek, |
811 | .release = single_release, | 811 | .release = single_release, |
812 | }; | 812 | }; |
813 | 813 | ||
814 | static int mem_open(struct inode* inode, struct file* file) | 814 | static int mem_open(struct inode* inode, struct file* file) |
815 | { | 815 | { |
816 | file->private_data = (void*)((long)current->self_exec_id); | 816 | file->private_data = (void*)((long)current->self_exec_id); |
817 | /* OK to pass negative loff_t, we can catch out-of-range */ | 817 | /* OK to pass negative loff_t, we can catch out-of-range */ |
818 | file->f_mode |= FMODE_UNSIGNED_OFFSET; | 818 | file->f_mode |= FMODE_UNSIGNED_OFFSET; |
819 | return 0; | 819 | return 0; |
820 | } | 820 | } |
821 | 821 | ||
822 | static ssize_t mem_read(struct file * file, char __user * buf, | 822 | static ssize_t mem_read(struct file * file, char __user * buf, |
823 | size_t count, loff_t *ppos) | 823 | size_t count, loff_t *ppos) |
824 | { | 824 | { |
825 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 825 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
826 | char *page; | 826 | char *page; |
827 | unsigned long src = *ppos; | 827 | unsigned long src = *ppos; |
828 | int ret = -ESRCH; | 828 | int ret = -ESRCH; |
829 | struct mm_struct *mm; | 829 | struct mm_struct *mm; |
830 | 830 | ||
831 | if (!task) | 831 | if (!task) |
832 | goto out_no_task; | 832 | goto out_no_task; |
833 | 833 | ||
834 | ret = -ENOMEM; | 834 | ret = -ENOMEM; |
835 | page = (char *)__get_free_page(GFP_TEMPORARY); | 835 | page = (char *)__get_free_page(GFP_TEMPORARY); |
836 | if (!page) | 836 | if (!page) |
837 | goto out; | 837 | goto out; |
838 | 838 | ||
839 | mm = check_mem_permission(task); | 839 | mm = check_mem_permission(task); |
840 | ret = PTR_ERR(mm); | 840 | ret = PTR_ERR(mm); |
841 | if (IS_ERR(mm)) | 841 | if (IS_ERR(mm)) |
842 | goto out_free; | 842 | goto out_free; |
843 | 843 | ||
844 | ret = -EIO; | 844 | ret = -EIO; |
845 | 845 | ||
846 | if (file->private_data != (void*)((long)current->self_exec_id)) | 846 | if (file->private_data != (void*)((long)current->self_exec_id)) |
847 | goto out_put; | 847 | goto out_put; |
848 | 848 | ||
849 | ret = 0; | 849 | ret = 0; |
850 | 850 | ||
851 | while (count > 0) { | 851 | while (count > 0) { |
852 | int this_len, retval; | 852 | int this_len, retval; |
853 | 853 | ||
854 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | 854 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; |
855 | retval = access_remote_vm(mm, src, page, this_len, 0); | 855 | retval = access_remote_vm(mm, src, page, this_len, 0); |
856 | if (!retval) { | 856 | if (!retval) { |
857 | if (!ret) | 857 | if (!ret) |
858 | ret = -EIO; | 858 | ret = -EIO; |
859 | break; | 859 | break; |
860 | } | 860 | } |
861 | 861 | ||
862 | if (copy_to_user(buf, page, retval)) { | 862 | if (copy_to_user(buf, page, retval)) { |
863 | ret = -EFAULT; | 863 | ret = -EFAULT; |
864 | break; | 864 | break; |
865 | } | 865 | } |
866 | 866 | ||
867 | ret += retval; | 867 | ret += retval; |
868 | src += retval; | 868 | src += retval; |
869 | buf += retval; | 869 | buf += retval; |
870 | count -= retval; | 870 | count -= retval; |
871 | } | 871 | } |
872 | *ppos = src; | 872 | *ppos = src; |
873 | 873 | ||
874 | out_put: | 874 | out_put: |
875 | mmput(mm); | 875 | mmput(mm); |
876 | out_free: | 876 | out_free: |
877 | free_page((unsigned long) page); | 877 | free_page((unsigned long) page); |
878 | out: | 878 | out: |
879 | put_task_struct(task); | 879 | put_task_struct(task); |
880 | out_no_task: | 880 | out_no_task: |
881 | return ret; | 881 | return ret; |
882 | } | 882 | } |
883 | 883 | ||
884 | static ssize_t mem_write(struct file * file, const char __user *buf, | 884 | static ssize_t mem_write(struct file * file, const char __user *buf, |
885 | size_t count, loff_t *ppos) | 885 | size_t count, loff_t *ppos) |
886 | { | 886 | { |
887 | int copied; | 887 | int copied; |
888 | char *page; | 888 | char *page; |
889 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 889 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
890 | unsigned long dst = *ppos; | 890 | unsigned long dst = *ppos; |
891 | struct mm_struct *mm; | 891 | struct mm_struct *mm; |
892 | 892 | ||
893 | copied = -ESRCH; | 893 | copied = -ESRCH; |
894 | if (!task) | 894 | if (!task) |
895 | goto out_no_task; | 895 | goto out_no_task; |
896 | 896 | ||
897 | mm = check_mem_permission(task); | 897 | mm = check_mem_permission(task); |
898 | copied = PTR_ERR(mm); | 898 | copied = PTR_ERR(mm); |
899 | if (IS_ERR(mm)) | 899 | if (IS_ERR(mm)) |
900 | goto out_task; | 900 | goto out_task; |
901 | 901 | ||
902 | copied = -EIO; | 902 | copied = -EIO; |
903 | if (file->private_data != (void *)((long)current->self_exec_id)) | 903 | if (file->private_data != (void *)((long)current->self_exec_id)) |
904 | goto out_mm; | 904 | goto out_mm; |
905 | 905 | ||
906 | copied = -ENOMEM; | 906 | copied = -ENOMEM; |
907 | page = (char *)__get_free_page(GFP_TEMPORARY); | 907 | page = (char *)__get_free_page(GFP_TEMPORARY); |
908 | if (!page) | 908 | if (!page) |
909 | goto out_mm; | 909 | goto out_mm; |
910 | 910 | ||
911 | copied = 0; | 911 | copied = 0; |
912 | while (count > 0) { | 912 | while (count > 0) { |
913 | int this_len, retval; | 913 | int this_len, retval; |
914 | 914 | ||
915 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | 915 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; |
916 | if (copy_from_user(page, buf, this_len)) { | 916 | if (copy_from_user(page, buf, this_len)) { |
917 | copied = -EFAULT; | 917 | copied = -EFAULT; |
918 | break; | 918 | break; |
919 | } | 919 | } |
920 | retval = access_remote_vm(mm, dst, page, this_len, 1); | 920 | retval = access_remote_vm(mm, dst, page, this_len, 1); |
921 | if (!retval) { | 921 | if (!retval) { |
922 | if (!copied) | 922 | if (!copied) |
923 | copied = -EIO; | 923 | copied = -EIO; |
924 | break; | 924 | break; |
925 | } | 925 | } |
926 | copied += retval; | 926 | copied += retval; |
927 | buf += retval; | 927 | buf += retval; |
928 | dst += retval; | 928 | dst += retval; |
929 | count -= retval; | 929 | count -= retval; |
930 | } | 930 | } |
931 | *ppos = dst; | 931 | *ppos = dst; |
932 | free_page((unsigned long) page); | 932 | free_page((unsigned long) page); |
933 | out_mm: | 933 | out_mm: |
934 | mmput(mm); | 934 | mmput(mm); |
935 | out_task: | 935 | out_task: |
936 | put_task_struct(task); | 936 | put_task_struct(task); |
937 | out_no_task: | 937 | out_no_task: |
938 | return copied; | 938 | return copied; |
939 | } | 939 | } |
940 | 940 | ||
941 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) | 941 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) |
942 | { | 942 | { |
943 | switch (orig) { | 943 | switch (orig) { |
944 | case 0: | 944 | case 0: |
945 | file->f_pos = offset; | 945 | file->f_pos = offset; |
946 | break; | 946 | break; |
947 | case 1: | 947 | case 1: |
948 | file->f_pos += offset; | 948 | file->f_pos += offset; |
949 | break; | 949 | break; |
950 | default: | 950 | default: |
951 | return -EINVAL; | 951 | return -EINVAL; |
952 | } | 952 | } |
953 | force_successful_syscall_return(); | 953 | force_successful_syscall_return(); |
954 | return file->f_pos; | 954 | return file->f_pos; |
955 | } | 955 | } |
956 | 956 | ||
957 | static const struct file_operations proc_mem_operations = { | 957 | static const struct file_operations proc_mem_operations = { |
958 | .llseek = mem_lseek, | 958 | .llseek = mem_lseek, |
959 | .read = mem_read, | 959 | .read = mem_read, |
960 | .write = mem_write, | 960 | .write = mem_write, |
961 | .open = mem_open, | 961 | .open = mem_open, |
962 | }; | 962 | }; |
963 | 963 | ||
964 | static ssize_t environ_read(struct file *file, char __user *buf, | 964 | static ssize_t environ_read(struct file *file, char __user *buf, |
965 | size_t count, loff_t *ppos) | 965 | size_t count, loff_t *ppos) |
966 | { | 966 | { |
967 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); | 967 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
968 | char *page; | 968 | char *page; |
969 | unsigned long src = *ppos; | 969 | unsigned long src = *ppos; |
970 | int ret = -ESRCH; | 970 | int ret = -ESRCH; |
971 | struct mm_struct *mm; | 971 | struct mm_struct *mm; |
972 | 972 | ||
973 | if (!task) | 973 | if (!task) |
974 | goto out_no_task; | 974 | goto out_no_task; |
975 | 975 | ||
976 | ret = -ENOMEM; | 976 | ret = -ENOMEM; |
977 | page = (char *)__get_free_page(GFP_TEMPORARY); | 977 | page = (char *)__get_free_page(GFP_TEMPORARY); |
978 | if (!page) | 978 | if (!page) |
979 | goto out; | 979 | goto out; |
980 | 980 | ||
981 | 981 | ||
982 | mm = mm_for_maps(task); | 982 | mm = mm_for_maps(task); |
983 | ret = PTR_ERR(mm); | 983 | ret = PTR_ERR(mm); |
984 | if (!mm || IS_ERR(mm)) | 984 | if (!mm || IS_ERR(mm)) |
985 | goto out_free; | 985 | goto out_free; |
986 | 986 | ||
987 | ret = 0; | 987 | ret = 0; |
988 | while (count > 0) { | 988 | while (count > 0) { |
989 | int this_len, retval, max_len; | 989 | int this_len, retval, max_len; |
990 | 990 | ||
991 | this_len = mm->env_end - (mm->env_start + src); | 991 | this_len = mm->env_end - (mm->env_start + src); |
992 | 992 | ||
993 | if (this_len <= 0) | 993 | if (this_len <= 0) |
994 | break; | 994 | break; |
995 | 995 | ||
996 | max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | 996 | max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; |
997 | this_len = (this_len > max_len) ? max_len : this_len; | 997 | this_len = (this_len > max_len) ? max_len : this_len; |
998 | 998 | ||
999 | retval = access_process_vm(task, (mm->env_start + src), | 999 | retval = access_process_vm(task, (mm->env_start + src), |
1000 | page, this_len, 0); | 1000 | page, this_len, 0); |
1001 | 1001 | ||
1002 | if (retval <= 0) { | 1002 | if (retval <= 0) { |
1003 | ret = retval; | 1003 | ret = retval; |
1004 | break; | 1004 | break; |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | if (copy_to_user(buf, page, retval)) { | 1007 | if (copy_to_user(buf, page, retval)) { |
1008 | ret = -EFAULT; | 1008 | ret = -EFAULT; |
1009 | break; | 1009 | break; |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | ret += retval; | 1012 | ret += retval; |
1013 | src += retval; | 1013 | src += retval; |
1014 | buf += retval; | 1014 | buf += retval; |
1015 | count -= retval; | 1015 | count -= retval; |
1016 | } | 1016 | } |
1017 | *ppos = src; | 1017 | *ppos = src; |
1018 | 1018 | ||
1019 | mmput(mm); | 1019 | mmput(mm); |
1020 | out_free: | 1020 | out_free: |
1021 | free_page((unsigned long) page); | 1021 | free_page((unsigned long) page); |
1022 | out: | 1022 | out: |
1023 | put_task_struct(task); | 1023 | put_task_struct(task); |
1024 | out_no_task: | 1024 | out_no_task: |
1025 | return ret; | 1025 | return ret; |
1026 | } | 1026 | } |
1027 | 1027 | ||
1028 | static const struct file_operations proc_environ_operations = { | 1028 | static const struct file_operations proc_environ_operations = { |
1029 | .read = environ_read, | 1029 | .read = environ_read, |
1030 | .llseek = generic_file_llseek, | 1030 | .llseek = generic_file_llseek, |
1031 | }; | 1031 | }; |
1032 | 1032 | ||
1033 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, | 1033 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, |
1034 | size_t count, loff_t *ppos) | 1034 | size_t count, loff_t *ppos) |
1035 | { | 1035 | { |
1036 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 1036 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
1037 | char buffer[PROC_NUMBUF]; | 1037 | char buffer[PROC_NUMBUF]; |
1038 | size_t len; | 1038 | size_t len; |
1039 | int oom_adjust = OOM_DISABLE; | 1039 | int oom_adjust = OOM_DISABLE; |
1040 | unsigned long flags; | 1040 | unsigned long flags; |
1041 | 1041 | ||
1042 | if (!task) | 1042 | if (!task) |
1043 | return -ESRCH; | 1043 | return -ESRCH; |
1044 | 1044 | ||
1045 | if (lock_task_sighand(task, &flags)) { | 1045 | if (lock_task_sighand(task, &flags)) { |
1046 | oom_adjust = task->signal->oom_adj; | 1046 | oom_adjust = task->signal->oom_adj; |
1047 | unlock_task_sighand(task, &flags); | 1047 | unlock_task_sighand(task, &flags); |
1048 | } | 1048 | } |
1049 | 1049 | ||
1050 | put_task_struct(task); | 1050 | put_task_struct(task); |
1051 | 1051 | ||
1052 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 1052 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
1053 | 1053 | ||
1054 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | 1054 | return simple_read_from_buffer(buf, count, ppos, buffer, len); |
1055 | } | 1055 | } |
1056 | 1056 | ||
1057 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | 1057 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, |
1058 | size_t count, loff_t *ppos) | 1058 | size_t count, loff_t *ppos) |
1059 | { | 1059 | { |
1060 | struct task_struct *task; | 1060 | struct task_struct *task; |
1061 | char buffer[PROC_NUMBUF]; | 1061 | char buffer[PROC_NUMBUF]; |
1062 | long oom_adjust; | 1062 | int oom_adjust; |
1063 | unsigned long flags; | 1063 | unsigned long flags; |
1064 | int err; | 1064 | int err; |
1065 | 1065 | ||
1066 | memset(buffer, 0, sizeof(buffer)); | 1066 | memset(buffer, 0, sizeof(buffer)); |
1067 | if (count > sizeof(buffer) - 1) | 1067 | if (count > sizeof(buffer) - 1) |
1068 | count = sizeof(buffer) - 1; | 1068 | count = sizeof(buffer) - 1; |
1069 | if (copy_from_user(buffer, buf, count)) { | 1069 | if (copy_from_user(buffer, buf, count)) { |
1070 | err = -EFAULT; | 1070 | err = -EFAULT; |
1071 | goto out; | 1071 | goto out; |
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | err = strict_strtol(strstrip(buffer), 0, &oom_adjust); | 1074 | err = kstrtoint(strstrip(buffer), 0, &oom_adjust); |
1075 | if (err) | 1075 | if (err) |
1076 | goto out; | 1076 | goto out; |
1077 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && | 1077 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && |
1078 | oom_adjust != OOM_DISABLE) { | 1078 | oom_adjust != OOM_DISABLE) { |
1079 | err = -EINVAL; | 1079 | err = -EINVAL; |
1080 | goto out; | 1080 | goto out; |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | task = get_proc_task(file->f_path.dentry->d_inode); | 1083 | task = get_proc_task(file->f_path.dentry->d_inode); |
1084 | if (!task) { | 1084 | if (!task) { |
1085 | err = -ESRCH; | 1085 | err = -ESRCH; |
1086 | goto out; | 1086 | goto out; |
1087 | } | 1087 | } |
1088 | 1088 | ||
1089 | task_lock(task); | 1089 | task_lock(task); |
1090 | if (!task->mm) { | 1090 | if (!task->mm) { |
1091 | err = -EINVAL; | 1091 | err = -EINVAL; |
1092 | goto err_task_lock; | 1092 | goto err_task_lock; |
1093 | } | 1093 | } |
1094 | 1094 | ||
1095 | if (!lock_task_sighand(task, &flags)) { | 1095 | if (!lock_task_sighand(task, &flags)) { |
1096 | err = -ESRCH; | 1096 | err = -ESRCH; |
1097 | goto err_task_lock; | 1097 | goto err_task_lock; |
1098 | } | 1098 | } |
1099 | 1099 | ||
1100 | if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { | 1100 | if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { |
1101 | err = -EACCES; | 1101 | err = -EACCES; |
1102 | goto err_sighand; | 1102 | goto err_sighand; |
1103 | } | 1103 | } |
1104 | 1104 | ||
1105 | if (oom_adjust != task->signal->oom_adj) { | 1105 | if (oom_adjust != task->signal->oom_adj) { |
1106 | if (oom_adjust == OOM_DISABLE) | 1106 | if (oom_adjust == OOM_DISABLE) |
1107 | atomic_inc(&task->mm->oom_disable_count); | 1107 | atomic_inc(&task->mm->oom_disable_count); |
1108 | if (task->signal->oom_adj == OOM_DISABLE) | 1108 | if (task->signal->oom_adj == OOM_DISABLE) |
1109 | atomic_dec(&task->mm->oom_disable_count); | 1109 | atomic_dec(&task->mm->oom_disable_count); |
1110 | } | 1110 | } |
1111 | 1111 | ||
1112 | /* | 1112 | /* |
1113 | * Warn that /proc/pid/oom_adj is deprecated, see | 1113 | * Warn that /proc/pid/oom_adj is deprecated, see |
1114 | * Documentation/feature-removal-schedule.txt. | 1114 | * Documentation/feature-removal-schedule.txt. |
1115 | */ | 1115 | */ |
1116 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " | 1116 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " |
1117 | "please use /proc/%d/oom_score_adj instead.\n", | 1117 | "please use /proc/%d/oom_score_adj instead.\n", |
1118 | current->comm, task_pid_nr(current), | 1118 | current->comm, task_pid_nr(current), |
1119 | task_pid_nr(task), task_pid_nr(task)); | 1119 | task_pid_nr(task), task_pid_nr(task)); |
1120 | task->signal->oom_adj = oom_adjust; | 1120 | task->signal->oom_adj = oom_adjust; |
1121 | /* | 1121 | /* |
1122 | * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum | 1122 | * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum |
1123 | * value is always attainable. | 1123 | * value is always attainable. |
1124 | */ | 1124 | */ |
1125 | if (task->signal->oom_adj == OOM_ADJUST_MAX) | 1125 | if (task->signal->oom_adj == OOM_ADJUST_MAX) |
1126 | task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; | 1126 | task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; |
1127 | else | 1127 | else |
1128 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / | 1128 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / |
1129 | -OOM_DISABLE; | 1129 | -OOM_DISABLE; |
1130 | err_sighand: | 1130 | err_sighand: |
1131 | unlock_task_sighand(task, &flags); | 1131 | unlock_task_sighand(task, &flags); |
1132 | err_task_lock: | 1132 | err_task_lock: |
1133 | task_unlock(task); | 1133 | task_unlock(task); |
1134 | put_task_struct(task); | 1134 | put_task_struct(task); |
1135 | out: | 1135 | out: |
1136 | return err < 0 ? err : count; | 1136 | return err < 0 ? err : count; |
1137 | } | 1137 | } |
1138 | 1138 | ||
1139 | static const struct file_operations proc_oom_adjust_operations = { | 1139 | static const struct file_operations proc_oom_adjust_operations = { |
1140 | .read = oom_adjust_read, | 1140 | .read = oom_adjust_read, |
1141 | .write = oom_adjust_write, | 1141 | .write = oom_adjust_write, |
1142 | .llseek = generic_file_llseek, | 1142 | .llseek = generic_file_llseek, |
1143 | }; | 1143 | }; |
1144 | 1144 | ||
1145 | static ssize_t oom_score_adj_read(struct file *file, char __user *buf, | 1145 | static ssize_t oom_score_adj_read(struct file *file, char __user *buf, |
1146 | size_t count, loff_t *ppos) | 1146 | size_t count, loff_t *ppos) |
1147 | { | 1147 | { |
1148 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 1148 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
1149 | char buffer[PROC_NUMBUF]; | 1149 | char buffer[PROC_NUMBUF]; |
1150 | int oom_score_adj = OOM_SCORE_ADJ_MIN; | 1150 | int oom_score_adj = OOM_SCORE_ADJ_MIN; |
1151 | unsigned long flags; | 1151 | unsigned long flags; |
1152 | size_t len; | 1152 | size_t len; |
1153 | 1153 | ||
1154 | if (!task) | 1154 | if (!task) |
1155 | return -ESRCH; | 1155 | return -ESRCH; |
1156 | if (lock_task_sighand(task, &flags)) { | 1156 | if (lock_task_sighand(task, &flags)) { |
1157 | oom_score_adj = task->signal->oom_score_adj; | 1157 | oom_score_adj = task->signal->oom_score_adj; |
1158 | unlock_task_sighand(task, &flags); | 1158 | unlock_task_sighand(task, &flags); |
1159 | } | 1159 | } |
1160 | put_task_struct(task); | 1160 | put_task_struct(task); |
1161 | len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); | 1161 | len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); |
1162 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | 1162 | return simple_read_from_buffer(buf, count, ppos, buffer, len); |
1163 | } | 1163 | } |
1164 | 1164 | ||
1165 | static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | 1165 | static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, |
1166 | size_t count, loff_t *ppos) | 1166 | size_t count, loff_t *ppos) |
1167 | { | 1167 | { |
1168 | struct task_struct *task; | 1168 | struct task_struct *task; |
1169 | char buffer[PROC_NUMBUF]; | 1169 | char buffer[PROC_NUMBUF]; |
1170 | unsigned long flags; | 1170 | unsigned long flags; |
1171 | long oom_score_adj; | 1171 | int oom_score_adj; |
1172 | int err; | 1172 | int err; |
1173 | 1173 | ||
1174 | memset(buffer, 0, sizeof(buffer)); | 1174 | memset(buffer, 0, sizeof(buffer)); |
1175 | if (count > sizeof(buffer) - 1) | 1175 | if (count > sizeof(buffer) - 1) |
1176 | count = sizeof(buffer) - 1; | 1176 | count = sizeof(buffer) - 1; |
1177 | if (copy_from_user(buffer, buf, count)) { | 1177 | if (copy_from_user(buffer, buf, count)) { |
1178 | err = -EFAULT; | 1178 | err = -EFAULT; |
1179 | goto out; | 1179 | goto out; |
1180 | } | 1180 | } |
1181 | 1181 | ||
1182 | err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); | 1182 | err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); |
1183 | if (err) | 1183 | if (err) |
1184 | goto out; | 1184 | goto out; |
1185 | if (oom_score_adj < OOM_SCORE_ADJ_MIN || | 1185 | if (oom_score_adj < OOM_SCORE_ADJ_MIN || |
1186 | oom_score_adj > OOM_SCORE_ADJ_MAX) { | 1186 | oom_score_adj > OOM_SCORE_ADJ_MAX) { |
1187 | err = -EINVAL; | 1187 | err = -EINVAL; |
1188 | goto out; | 1188 | goto out; |
1189 | } | 1189 | } |
1190 | 1190 | ||
1191 | task = get_proc_task(file->f_path.dentry->d_inode); | 1191 | task = get_proc_task(file->f_path.dentry->d_inode); |
1192 | if (!task) { | 1192 | if (!task) { |
1193 | err = -ESRCH; | 1193 | err = -ESRCH; |
1194 | goto out; | 1194 | goto out; |
1195 | } | 1195 | } |
1196 | 1196 | ||
1197 | task_lock(task); | 1197 | task_lock(task); |
1198 | if (!task->mm) { | 1198 | if (!task->mm) { |
1199 | err = -EINVAL; | 1199 | err = -EINVAL; |
1200 | goto err_task_lock; | 1200 | goto err_task_lock; |
1201 | } | 1201 | } |
1202 | 1202 | ||
1203 | if (!lock_task_sighand(task, &flags)) { | 1203 | if (!lock_task_sighand(task, &flags)) { |
1204 | err = -ESRCH; | 1204 | err = -ESRCH; |
1205 | goto err_task_lock; | 1205 | goto err_task_lock; |
1206 | } | 1206 | } |
1207 | 1207 | ||
1208 | if (oom_score_adj < task->signal->oom_score_adj_min && | 1208 | if (oom_score_adj < task->signal->oom_score_adj_min && |
1209 | !capable(CAP_SYS_RESOURCE)) { | 1209 | !capable(CAP_SYS_RESOURCE)) { |
1210 | err = -EACCES; | 1210 | err = -EACCES; |
1211 | goto err_sighand; | 1211 | goto err_sighand; |
1212 | } | 1212 | } |
1213 | 1213 | ||
1214 | if (oom_score_adj != task->signal->oom_score_adj) { | 1214 | if (oom_score_adj != task->signal->oom_score_adj) { |
1215 | if (oom_score_adj == OOM_SCORE_ADJ_MIN) | 1215 | if (oom_score_adj == OOM_SCORE_ADJ_MIN) |
1216 | atomic_inc(&task->mm->oom_disable_count); | 1216 | atomic_inc(&task->mm->oom_disable_count); |
1217 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | 1217 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) |
1218 | atomic_dec(&task->mm->oom_disable_count); | 1218 | atomic_dec(&task->mm->oom_disable_count); |
1219 | } | 1219 | } |
1220 | task->signal->oom_score_adj = oom_score_adj; | 1220 | task->signal->oom_score_adj = oom_score_adj; |
1221 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | 1221 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) |
1222 | task->signal->oom_score_adj_min = oom_score_adj; | 1222 | task->signal->oom_score_adj_min = oom_score_adj; |
1223 | /* | 1223 | /* |
1224 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | 1224 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is |
1225 | * always attainable. | 1225 | * always attainable. |
1226 | */ | 1226 | */ |
1227 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | 1227 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) |
1228 | task->signal->oom_adj = OOM_DISABLE; | 1228 | task->signal->oom_adj = OOM_DISABLE; |
1229 | else | 1229 | else |
1230 | task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / | 1230 | task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / |
1231 | OOM_SCORE_ADJ_MAX; | 1231 | OOM_SCORE_ADJ_MAX; |
1232 | err_sighand: | 1232 | err_sighand: |
1233 | unlock_task_sighand(task, &flags); | 1233 | unlock_task_sighand(task, &flags); |
1234 | err_task_lock: | 1234 | err_task_lock: |
1235 | task_unlock(task); | 1235 | task_unlock(task); |
1236 | put_task_struct(task); | 1236 | put_task_struct(task); |
1237 | out: | 1237 | out: |
1238 | return err < 0 ? err : count; | 1238 | return err < 0 ? err : count; |
1239 | } | 1239 | } |
1240 | 1240 | ||
1241 | static const struct file_operations proc_oom_score_adj_operations = { | 1241 | static const struct file_operations proc_oom_score_adj_operations = { |
1242 | .read = oom_score_adj_read, | 1242 | .read = oom_score_adj_read, |
1243 | .write = oom_score_adj_write, | 1243 | .write = oom_score_adj_write, |
1244 | .llseek = default_llseek, | 1244 | .llseek = default_llseek, |
1245 | }; | 1245 | }; |
1246 | 1246 | ||
1247 | #ifdef CONFIG_AUDITSYSCALL | 1247 | #ifdef CONFIG_AUDITSYSCALL |
1248 | #define TMPBUFLEN 21 | 1248 | #define TMPBUFLEN 21 |
1249 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | 1249 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, |
1250 | size_t count, loff_t *ppos) | 1250 | size_t count, loff_t *ppos) |
1251 | { | 1251 | { |
1252 | struct inode * inode = file->f_path.dentry->d_inode; | 1252 | struct inode * inode = file->f_path.dentry->d_inode; |
1253 | struct task_struct *task = get_proc_task(inode); | 1253 | struct task_struct *task = get_proc_task(inode); |
1254 | ssize_t length; | 1254 | ssize_t length; |
1255 | char tmpbuf[TMPBUFLEN]; | 1255 | char tmpbuf[TMPBUFLEN]; |
1256 | 1256 | ||
1257 | if (!task) | 1257 | if (!task) |
1258 | return -ESRCH; | 1258 | return -ESRCH; |
1259 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 1259 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
1260 | audit_get_loginuid(task)); | 1260 | audit_get_loginuid(task)); |
1261 | put_task_struct(task); | 1261 | put_task_struct(task); |
1262 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 1262 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
1263 | } | 1263 | } |
1264 | 1264 | ||
1265 | static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | 1265 | static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, |
1266 | size_t count, loff_t *ppos) | 1266 | size_t count, loff_t *ppos) |
1267 | { | 1267 | { |
1268 | struct inode * inode = file->f_path.dentry->d_inode; | 1268 | struct inode * inode = file->f_path.dentry->d_inode; |
1269 | char *page, *tmp; | 1269 | char *page, *tmp; |
1270 | ssize_t length; | 1270 | ssize_t length; |
1271 | uid_t loginuid; | 1271 | uid_t loginuid; |
1272 | 1272 | ||
1273 | if (!capable(CAP_AUDIT_CONTROL)) | 1273 | if (!capable(CAP_AUDIT_CONTROL)) |
1274 | return -EPERM; | 1274 | return -EPERM; |
1275 | 1275 | ||
1276 | rcu_read_lock(); | 1276 | rcu_read_lock(); |
1277 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { | 1277 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { |
1278 | rcu_read_unlock(); | 1278 | rcu_read_unlock(); |
1279 | return -EPERM; | 1279 | return -EPERM; |
1280 | } | 1280 | } |
1281 | rcu_read_unlock(); | 1281 | rcu_read_unlock(); |
1282 | 1282 | ||
1283 | if (count >= PAGE_SIZE) | 1283 | if (count >= PAGE_SIZE) |
1284 | count = PAGE_SIZE - 1; | 1284 | count = PAGE_SIZE - 1; |
1285 | 1285 | ||
1286 | if (*ppos != 0) { | 1286 | if (*ppos != 0) { |
1287 | /* No partial writes. */ | 1287 | /* No partial writes. */ |
1288 | return -EINVAL; | 1288 | return -EINVAL; |
1289 | } | 1289 | } |
1290 | page = (char*)__get_free_page(GFP_TEMPORARY); | 1290 | page = (char*)__get_free_page(GFP_TEMPORARY); |
1291 | if (!page) | 1291 | if (!page) |
1292 | return -ENOMEM; | 1292 | return -ENOMEM; |
1293 | length = -EFAULT; | 1293 | length = -EFAULT; |
1294 | if (copy_from_user(page, buf, count)) | 1294 | if (copy_from_user(page, buf, count)) |
1295 | goto out_free_page; | 1295 | goto out_free_page; |
1296 | 1296 | ||
1297 | page[count] = '\0'; | 1297 | page[count] = '\0'; |
1298 | loginuid = simple_strtoul(page, &tmp, 10); | 1298 | loginuid = simple_strtoul(page, &tmp, 10); |
1299 | if (tmp == page) { | 1299 | if (tmp == page) { |
1300 | length = -EINVAL; | 1300 | length = -EINVAL; |
1301 | goto out_free_page; | 1301 | goto out_free_page; |
1302 | 1302 | ||
1303 | } | 1303 | } |
1304 | length = audit_set_loginuid(current, loginuid); | 1304 | length = audit_set_loginuid(current, loginuid); |
1305 | if (likely(length == 0)) | 1305 | if (likely(length == 0)) |
1306 | length = count; | 1306 | length = count; |
1307 | 1307 | ||
1308 | out_free_page: | 1308 | out_free_page: |
1309 | free_page((unsigned long) page); | 1309 | free_page((unsigned long) page); |
1310 | return length; | 1310 | return length; |
1311 | } | 1311 | } |
1312 | 1312 | ||
1313 | static const struct file_operations proc_loginuid_operations = { | 1313 | static const struct file_operations proc_loginuid_operations = { |
1314 | .read = proc_loginuid_read, | 1314 | .read = proc_loginuid_read, |
1315 | .write = proc_loginuid_write, | 1315 | .write = proc_loginuid_write, |
1316 | .llseek = generic_file_llseek, | 1316 | .llseek = generic_file_llseek, |
1317 | }; | 1317 | }; |
1318 | 1318 | ||
1319 | static ssize_t proc_sessionid_read(struct file * file, char __user * buf, | 1319 | static ssize_t proc_sessionid_read(struct file * file, char __user * buf, |
1320 | size_t count, loff_t *ppos) | 1320 | size_t count, loff_t *ppos) |
1321 | { | 1321 | { |
1322 | struct inode * inode = file->f_path.dentry->d_inode; | 1322 | struct inode * inode = file->f_path.dentry->d_inode; |
1323 | struct task_struct *task = get_proc_task(inode); | 1323 | struct task_struct *task = get_proc_task(inode); |
1324 | ssize_t length; | 1324 | ssize_t length; |
1325 | char tmpbuf[TMPBUFLEN]; | 1325 | char tmpbuf[TMPBUFLEN]; |
1326 | 1326 | ||
1327 | if (!task) | 1327 | if (!task) |
1328 | return -ESRCH; | 1328 | return -ESRCH; |
1329 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 1329 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
1330 | audit_get_sessionid(task)); | 1330 | audit_get_sessionid(task)); |
1331 | put_task_struct(task); | 1331 | put_task_struct(task); |
1332 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 1332 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
1333 | } | 1333 | } |
1334 | 1334 | ||
1335 | static const struct file_operations proc_sessionid_operations = { | 1335 | static const struct file_operations proc_sessionid_operations = { |
1336 | .read = proc_sessionid_read, | 1336 | .read = proc_sessionid_read, |
1337 | .llseek = generic_file_llseek, | 1337 | .llseek = generic_file_llseek, |
1338 | }; | 1338 | }; |
1339 | #endif | 1339 | #endif |
1340 | 1340 | ||
1341 | #ifdef CONFIG_FAULT_INJECTION | 1341 | #ifdef CONFIG_FAULT_INJECTION |
1342 | static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, | 1342 | static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, |
1343 | size_t count, loff_t *ppos) | 1343 | size_t count, loff_t *ppos) |
1344 | { | 1344 | { |
1345 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); | 1345 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
1346 | char buffer[PROC_NUMBUF]; | 1346 | char buffer[PROC_NUMBUF]; |
1347 | size_t len; | 1347 | size_t len; |
1348 | int make_it_fail; | 1348 | int make_it_fail; |
1349 | 1349 | ||
1350 | if (!task) | 1350 | if (!task) |
1351 | return -ESRCH; | 1351 | return -ESRCH; |
1352 | make_it_fail = task->make_it_fail; | 1352 | make_it_fail = task->make_it_fail; |
1353 | put_task_struct(task); | 1353 | put_task_struct(task); |
1354 | 1354 | ||
1355 | len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); | 1355 | len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); |
1356 | 1356 | ||
1357 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | 1357 | return simple_read_from_buffer(buf, count, ppos, buffer, len); |
1358 | } | 1358 | } |
1359 | 1359 | ||
1360 | static ssize_t proc_fault_inject_write(struct file * file, | 1360 | static ssize_t proc_fault_inject_write(struct file * file, |
1361 | const char __user * buf, size_t count, loff_t *ppos) | 1361 | const char __user * buf, size_t count, loff_t *ppos) |
1362 | { | 1362 | { |
1363 | struct task_struct *task; | 1363 | struct task_struct *task; |
1364 | char buffer[PROC_NUMBUF], *end; | 1364 | char buffer[PROC_NUMBUF], *end; |
1365 | int make_it_fail; | 1365 | int make_it_fail; |
1366 | 1366 | ||
1367 | if (!capable(CAP_SYS_RESOURCE)) | 1367 | if (!capable(CAP_SYS_RESOURCE)) |
1368 | return -EPERM; | 1368 | return -EPERM; |
1369 | memset(buffer, 0, sizeof(buffer)); | 1369 | memset(buffer, 0, sizeof(buffer)); |
1370 | if (count > sizeof(buffer) - 1) | 1370 | if (count > sizeof(buffer) - 1) |
1371 | count = sizeof(buffer) - 1; | 1371 | count = sizeof(buffer) - 1; |
1372 | if (copy_from_user(buffer, buf, count)) | 1372 | if (copy_from_user(buffer, buf, count)) |
1373 | return -EFAULT; | 1373 | return -EFAULT; |
1374 | make_it_fail = simple_strtol(strstrip(buffer), &end, 0); | 1374 | make_it_fail = simple_strtol(strstrip(buffer), &end, 0); |
1375 | if (*end) | 1375 | if (*end) |
1376 | return -EINVAL; | 1376 | return -EINVAL; |
1377 | task = get_proc_task(file->f_dentry->d_inode); | 1377 | task = get_proc_task(file->f_dentry->d_inode); |
1378 | if (!task) | 1378 | if (!task) |
1379 | return -ESRCH; | 1379 | return -ESRCH; |
1380 | task->make_it_fail = make_it_fail; | 1380 | task->make_it_fail = make_it_fail; |
1381 | put_task_struct(task); | 1381 | put_task_struct(task); |
1382 | 1382 | ||
1383 | return count; | 1383 | return count; |
1384 | } | 1384 | } |
1385 | 1385 | ||
1386 | static const struct file_operations proc_fault_inject_operations = { | 1386 | static const struct file_operations proc_fault_inject_operations = { |
1387 | .read = proc_fault_inject_read, | 1387 | .read = proc_fault_inject_read, |
1388 | .write = proc_fault_inject_write, | 1388 | .write = proc_fault_inject_write, |
1389 | .llseek = generic_file_llseek, | 1389 | .llseek = generic_file_llseek, |
1390 | }; | 1390 | }; |
1391 | #endif | 1391 | #endif |
1392 | 1392 | ||
1393 | 1393 | ||
1394 | #ifdef CONFIG_SCHED_DEBUG | 1394 | #ifdef CONFIG_SCHED_DEBUG |
1395 | /* | 1395 | /* |
1396 | * Print out various scheduling related per-task fields: | 1396 | * Print out various scheduling related per-task fields: |
1397 | */ | 1397 | */ |
1398 | static int sched_show(struct seq_file *m, void *v) | 1398 | static int sched_show(struct seq_file *m, void *v) |
1399 | { | 1399 | { |
1400 | struct inode *inode = m->private; | 1400 | struct inode *inode = m->private; |
1401 | struct task_struct *p; | 1401 | struct task_struct *p; |
1402 | 1402 | ||
1403 | p = get_proc_task(inode); | 1403 | p = get_proc_task(inode); |
1404 | if (!p) | 1404 | if (!p) |
1405 | return -ESRCH; | 1405 | return -ESRCH; |
1406 | proc_sched_show_task(p, m); | 1406 | proc_sched_show_task(p, m); |
1407 | 1407 | ||
1408 | put_task_struct(p); | 1408 | put_task_struct(p); |
1409 | 1409 | ||
1410 | return 0; | 1410 | return 0; |
1411 | } | 1411 | } |
1412 | 1412 | ||
1413 | static ssize_t | 1413 | static ssize_t |
1414 | sched_write(struct file *file, const char __user *buf, | 1414 | sched_write(struct file *file, const char __user *buf, |
1415 | size_t count, loff_t *offset) | 1415 | size_t count, loff_t *offset) |
1416 | { | 1416 | { |
1417 | struct inode *inode = file->f_path.dentry->d_inode; | 1417 | struct inode *inode = file->f_path.dentry->d_inode; |
1418 | struct task_struct *p; | 1418 | struct task_struct *p; |
1419 | 1419 | ||
1420 | p = get_proc_task(inode); | 1420 | p = get_proc_task(inode); |
1421 | if (!p) | 1421 | if (!p) |
1422 | return -ESRCH; | 1422 | return -ESRCH; |
1423 | proc_sched_set_task(p); | 1423 | proc_sched_set_task(p); |
1424 | 1424 | ||
1425 | put_task_struct(p); | 1425 | put_task_struct(p); |
1426 | 1426 | ||
1427 | return count; | 1427 | return count; |
1428 | } | 1428 | } |
1429 | 1429 | ||
1430 | static int sched_open(struct inode *inode, struct file *filp) | 1430 | static int sched_open(struct inode *inode, struct file *filp) |
1431 | { | 1431 | { |
1432 | return single_open(filp, sched_show, inode); | 1432 | return single_open(filp, sched_show, inode); |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | static const struct file_operations proc_pid_sched_operations = { | 1435 | static const struct file_operations proc_pid_sched_operations = { |
1436 | .open = sched_open, | 1436 | .open = sched_open, |
1437 | .read = seq_read, | 1437 | .read = seq_read, |
1438 | .write = sched_write, | 1438 | .write = sched_write, |
1439 | .llseek = seq_lseek, | 1439 | .llseek = seq_lseek, |
1440 | .release = single_release, | 1440 | .release = single_release, |
1441 | }; | 1441 | }; |
1442 | 1442 | ||
1443 | #endif | 1443 | #endif |
1444 | 1444 | ||
1445 | #ifdef CONFIG_SCHED_AUTOGROUP | 1445 | #ifdef CONFIG_SCHED_AUTOGROUP |
1446 | /* | 1446 | /* |
1447 | * Print out autogroup related information: | 1447 | * Print out autogroup related information: |
1448 | */ | 1448 | */ |
1449 | static int sched_autogroup_show(struct seq_file *m, void *v) | 1449 | static int sched_autogroup_show(struct seq_file *m, void *v) |
1450 | { | 1450 | { |
1451 | struct inode *inode = m->private; | 1451 | struct inode *inode = m->private; |
1452 | struct task_struct *p; | 1452 | struct task_struct *p; |
1453 | 1453 | ||
1454 | p = get_proc_task(inode); | 1454 | p = get_proc_task(inode); |
1455 | if (!p) | 1455 | if (!p) |
1456 | return -ESRCH; | 1456 | return -ESRCH; |
1457 | proc_sched_autogroup_show_task(p, m); | 1457 | proc_sched_autogroup_show_task(p, m); |
1458 | 1458 | ||
1459 | put_task_struct(p); | 1459 | put_task_struct(p); |
1460 | 1460 | ||
1461 | return 0; | 1461 | return 0; |
1462 | } | 1462 | } |
1463 | 1463 | ||
1464 | static ssize_t | 1464 | static ssize_t |
1465 | sched_autogroup_write(struct file *file, const char __user *buf, | 1465 | sched_autogroup_write(struct file *file, const char __user *buf, |
1466 | size_t count, loff_t *offset) | 1466 | size_t count, loff_t *offset) |
1467 | { | 1467 | { |
1468 | struct inode *inode = file->f_path.dentry->d_inode; | 1468 | struct inode *inode = file->f_path.dentry->d_inode; |
1469 | struct task_struct *p; | 1469 | struct task_struct *p; |
1470 | char buffer[PROC_NUMBUF]; | 1470 | char buffer[PROC_NUMBUF]; |
1471 | long nice; | 1471 | int nice; |
1472 | int err; | 1472 | int err; |
1473 | 1473 | ||
1474 | memset(buffer, 0, sizeof(buffer)); | 1474 | memset(buffer, 0, sizeof(buffer)); |
1475 | if (count > sizeof(buffer) - 1) | 1475 | if (count > sizeof(buffer) - 1) |
1476 | count = sizeof(buffer) - 1; | 1476 | count = sizeof(buffer) - 1; |
1477 | if (copy_from_user(buffer, buf, count)) | 1477 | if (copy_from_user(buffer, buf, count)) |
1478 | return -EFAULT; | 1478 | return -EFAULT; |
1479 | 1479 | ||
1480 | err = strict_strtol(strstrip(buffer), 0, &nice); | 1480 | err = kstrtoint(strstrip(buffer), 0, &nice); |
1481 | if (err) | 1481 | if (err < 0) |
1482 | return -EINVAL; | 1482 | return err; |
1483 | 1483 | ||
1484 | p = get_proc_task(inode); | 1484 | p = get_proc_task(inode); |
1485 | if (!p) | 1485 | if (!p) |
1486 | return -ESRCH; | 1486 | return -ESRCH; |
1487 | 1487 | ||
1488 | err = nice; | 1488 | err = nice; |
1489 | err = proc_sched_autogroup_set_nice(p, &err); | 1489 | err = proc_sched_autogroup_set_nice(p, &err); |
1490 | if (err) | 1490 | if (err) |
1491 | count = err; | 1491 | count = err; |
1492 | 1492 | ||
1493 | put_task_struct(p); | 1493 | put_task_struct(p); |
1494 | 1494 | ||
1495 | return count; | 1495 | return count; |
1496 | } | 1496 | } |
1497 | 1497 | ||
1498 | static int sched_autogroup_open(struct inode *inode, struct file *filp) | 1498 | static int sched_autogroup_open(struct inode *inode, struct file *filp) |
1499 | { | 1499 | { |
1500 | int ret; | 1500 | int ret; |
1501 | 1501 | ||
1502 | ret = single_open(filp, sched_autogroup_show, NULL); | 1502 | ret = single_open(filp, sched_autogroup_show, NULL); |
1503 | if (!ret) { | 1503 | if (!ret) { |
1504 | struct seq_file *m = filp->private_data; | 1504 | struct seq_file *m = filp->private_data; |
1505 | 1505 | ||
1506 | m->private = inode; | 1506 | m->private = inode; |
1507 | } | 1507 | } |
1508 | return ret; | 1508 | return ret; |
1509 | } | 1509 | } |
1510 | 1510 | ||
1511 | static const struct file_operations proc_pid_sched_autogroup_operations = { | 1511 | static const struct file_operations proc_pid_sched_autogroup_operations = { |
1512 | .open = sched_autogroup_open, | 1512 | .open = sched_autogroup_open, |
1513 | .read = seq_read, | 1513 | .read = seq_read, |
1514 | .write = sched_autogroup_write, | 1514 | .write = sched_autogroup_write, |
1515 | .llseek = seq_lseek, | 1515 | .llseek = seq_lseek, |
1516 | .release = single_release, | 1516 | .release = single_release, |
1517 | }; | 1517 | }; |
1518 | 1518 | ||
1519 | #endif /* CONFIG_SCHED_AUTOGROUP */ | 1519 | #endif /* CONFIG_SCHED_AUTOGROUP */ |
1520 | 1520 | ||
1521 | static ssize_t comm_write(struct file *file, const char __user *buf, | 1521 | static ssize_t comm_write(struct file *file, const char __user *buf, |
1522 | size_t count, loff_t *offset) | 1522 | size_t count, loff_t *offset) |
1523 | { | 1523 | { |
1524 | struct inode *inode = file->f_path.dentry->d_inode; | 1524 | struct inode *inode = file->f_path.dentry->d_inode; |
1525 | struct task_struct *p; | 1525 | struct task_struct *p; |
1526 | char buffer[TASK_COMM_LEN]; | 1526 | char buffer[TASK_COMM_LEN]; |
1527 | 1527 | ||
1528 | memset(buffer, 0, sizeof(buffer)); | 1528 | memset(buffer, 0, sizeof(buffer)); |
1529 | if (count > sizeof(buffer) - 1) | 1529 | if (count > sizeof(buffer) - 1) |
1530 | count = sizeof(buffer) - 1; | 1530 | count = sizeof(buffer) - 1; |
1531 | if (copy_from_user(buffer, buf, count)) | 1531 | if (copy_from_user(buffer, buf, count)) |
1532 | return -EFAULT; | 1532 | return -EFAULT; |
1533 | 1533 | ||
1534 | p = get_proc_task(inode); | 1534 | p = get_proc_task(inode); |
1535 | if (!p) | 1535 | if (!p) |
1536 | return -ESRCH; | 1536 | return -ESRCH; |
1537 | 1537 | ||
1538 | if (same_thread_group(current, p)) | 1538 | if (same_thread_group(current, p)) |
1539 | set_task_comm(p, buffer); | 1539 | set_task_comm(p, buffer); |
1540 | else | 1540 | else |
1541 | count = -EINVAL; | 1541 | count = -EINVAL; |
1542 | 1542 | ||
1543 | put_task_struct(p); | 1543 | put_task_struct(p); |
1544 | 1544 | ||
1545 | return count; | 1545 | return count; |
1546 | } | 1546 | } |
1547 | 1547 | ||
1548 | static int comm_show(struct seq_file *m, void *v) | 1548 | static int comm_show(struct seq_file *m, void *v) |
1549 | { | 1549 | { |
1550 | struct inode *inode = m->private; | 1550 | struct inode *inode = m->private; |
1551 | struct task_struct *p; | 1551 | struct task_struct *p; |
1552 | 1552 | ||
1553 | p = get_proc_task(inode); | 1553 | p = get_proc_task(inode); |
1554 | if (!p) | 1554 | if (!p) |
1555 | return -ESRCH; | 1555 | return -ESRCH; |
1556 | 1556 | ||
1557 | task_lock(p); | 1557 | task_lock(p); |
1558 | seq_printf(m, "%s\n", p->comm); | 1558 | seq_printf(m, "%s\n", p->comm); |
1559 | task_unlock(p); | 1559 | task_unlock(p); |
1560 | 1560 | ||
1561 | put_task_struct(p); | 1561 | put_task_struct(p); |
1562 | 1562 | ||
1563 | return 0; | 1563 | return 0; |
1564 | } | 1564 | } |
1565 | 1565 | ||
1566 | static int comm_open(struct inode *inode, struct file *filp) | 1566 | static int comm_open(struct inode *inode, struct file *filp) |
1567 | { | 1567 | { |
1568 | return single_open(filp, comm_show, inode); | 1568 | return single_open(filp, comm_show, inode); |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | static const struct file_operations proc_pid_set_comm_operations = { | 1571 | static const struct file_operations proc_pid_set_comm_operations = { |
1572 | .open = comm_open, | 1572 | .open = comm_open, |
1573 | .read = seq_read, | 1573 | .read = seq_read, |
1574 | .write = comm_write, | 1574 | .write = comm_write, |
1575 | .llseek = seq_lseek, | 1575 | .llseek = seq_lseek, |
1576 | .release = single_release, | 1576 | .release = single_release, |
1577 | }; | 1577 | }; |
1578 | 1578 | ||
1579 | static int proc_exe_link(struct inode *inode, struct path *exe_path) | 1579 | static int proc_exe_link(struct inode *inode, struct path *exe_path) |
1580 | { | 1580 | { |
1581 | struct task_struct *task; | 1581 | struct task_struct *task; |
1582 | struct mm_struct *mm; | 1582 | struct mm_struct *mm; |
1583 | struct file *exe_file; | 1583 | struct file *exe_file; |
1584 | 1584 | ||
1585 | task = get_proc_task(inode); | 1585 | task = get_proc_task(inode); |
1586 | if (!task) | 1586 | if (!task) |
1587 | return -ENOENT; | 1587 | return -ENOENT; |
1588 | mm = get_task_mm(task); | 1588 | mm = get_task_mm(task); |
1589 | put_task_struct(task); | 1589 | put_task_struct(task); |
1590 | if (!mm) | 1590 | if (!mm) |
1591 | return -ENOENT; | 1591 | return -ENOENT; |
1592 | exe_file = get_mm_exe_file(mm); | 1592 | exe_file = get_mm_exe_file(mm); |
1593 | mmput(mm); | 1593 | mmput(mm); |
1594 | if (exe_file) { | 1594 | if (exe_file) { |
1595 | *exe_path = exe_file->f_path; | 1595 | *exe_path = exe_file->f_path; |
1596 | path_get(&exe_file->f_path); | 1596 | path_get(&exe_file->f_path); |
1597 | fput(exe_file); | 1597 | fput(exe_file); |
1598 | return 0; | 1598 | return 0; |
1599 | } else | 1599 | } else |
1600 | return -ENOENT; | 1600 | return -ENOENT; |
1601 | } | 1601 | } |
1602 | 1602 | ||
1603 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | 1603 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) |
1604 | { | 1604 | { |
1605 | struct inode *inode = dentry->d_inode; | 1605 | struct inode *inode = dentry->d_inode; |
1606 | int error = -EACCES; | 1606 | int error = -EACCES; |
1607 | 1607 | ||
1608 | /* We don't need a base pointer in the /proc filesystem */ | 1608 | /* We don't need a base pointer in the /proc filesystem */ |
1609 | path_put(&nd->path); | 1609 | path_put(&nd->path); |
1610 | 1610 | ||
1611 | /* Are we allowed to snoop on the tasks file descriptors? */ | 1611 | /* Are we allowed to snoop on the tasks file descriptors? */ |
1612 | if (!proc_fd_access_allowed(inode)) | 1612 | if (!proc_fd_access_allowed(inode)) |
1613 | goto out; | 1613 | goto out; |
1614 | 1614 | ||
1615 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); | 1615 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); |
1616 | out: | 1616 | out: |
1617 | return ERR_PTR(error); | 1617 | return ERR_PTR(error); |
1618 | } | 1618 | } |
1619 | 1619 | ||
1620 | static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) | 1620 | static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) |
1621 | { | 1621 | { |
1622 | char *tmp = (char*)__get_free_page(GFP_TEMPORARY); | 1622 | char *tmp = (char*)__get_free_page(GFP_TEMPORARY); |
1623 | char *pathname; | 1623 | char *pathname; |
1624 | int len; | 1624 | int len; |
1625 | 1625 | ||
1626 | if (!tmp) | 1626 | if (!tmp) |
1627 | return -ENOMEM; | 1627 | return -ENOMEM; |
1628 | 1628 | ||
1629 | pathname = d_path(path, tmp, PAGE_SIZE); | 1629 | pathname = d_path(path, tmp, PAGE_SIZE); |
1630 | len = PTR_ERR(pathname); | 1630 | len = PTR_ERR(pathname); |
1631 | if (IS_ERR(pathname)) | 1631 | if (IS_ERR(pathname)) |
1632 | goto out; | 1632 | goto out; |
1633 | len = tmp + PAGE_SIZE - 1 - pathname; | 1633 | len = tmp + PAGE_SIZE - 1 - pathname; |
1634 | 1634 | ||
1635 | if (len > buflen) | 1635 | if (len > buflen) |
1636 | len = buflen; | 1636 | len = buflen; |
1637 | if (copy_to_user(buffer, pathname, len)) | 1637 | if (copy_to_user(buffer, pathname, len)) |
1638 | len = -EFAULT; | 1638 | len = -EFAULT; |
1639 | out: | 1639 | out: |
1640 | free_page((unsigned long)tmp); | 1640 | free_page((unsigned long)tmp); |
1641 | return len; | 1641 | return len; |
1642 | } | 1642 | } |
1643 | 1643 | ||
1644 | static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) | 1644 | static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) |
1645 | { | 1645 | { |
1646 | int error = -EACCES; | 1646 | int error = -EACCES; |
1647 | struct inode *inode = dentry->d_inode; | 1647 | struct inode *inode = dentry->d_inode; |
1648 | struct path path; | 1648 | struct path path; |
1649 | 1649 | ||
1650 | /* Are we allowed to snoop on the tasks file descriptors? */ | 1650 | /* Are we allowed to snoop on the tasks file descriptors? */ |
1651 | if (!proc_fd_access_allowed(inode)) | 1651 | if (!proc_fd_access_allowed(inode)) |
1652 | goto out; | 1652 | goto out; |
1653 | 1653 | ||
1654 | error = PROC_I(inode)->op.proc_get_link(inode, &path); | 1654 | error = PROC_I(inode)->op.proc_get_link(inode, &path); |
1655 | if (error) | 1655 | if (error) |
1656 | goto out; | 1656 | goto out; |
1657 | 1657 | ||
1658 | error = do_proc_readlink(&path, buffer, buflen); | 1658 | error = do_proc_readlink(&path, buffer, buflen); |
1659 | path_put(&path); | 1659 | path_put(&path); |
1660 | out: | 1660 | out: |
1661 | return error; | 1661 | return error; |
1662 | } | 1662 | } |
1663 | 1663 | ||
1664 | static const struct inode_operations proc_pid_link_inode_operations = { | 1664 | static const struct inode_operations proc_pid_link_inode_operations = { |
1665 | .readlink = proc_pid_readlink, | 1665 | .readlink = proc_pid_readlink, |
1666 | .follow_link = proc_pid_follow_link, | 1666 | .follow_link = proc_pid_follow_link, |
1667 | .setattr = proc_setattr, | 1667 | .setattr = proc_setattr, |
1668 | }; | 1668 | }; |
1669 | 1669 | ||
1670 | 1670 | ||
1671 | /* building an inode */ | 1671 | /* building an inode */ |
1672 | 1672 | ||
1673 | static int task_dumpable(struct task_struct *task) | 1673 | static int task_dumpable(struct task_struct *task) |
1674 | { | 1674 | { |
1675 | int dumpable = 0; | 1675 | int dumpable = 0; |
1676 | struct mm_struct *mm; | 1676 | struct mm_struct *mm; |
1677 | 1677 | ||
1678 | task_lock(task); | 1678 | task_lock(task); |
1679 | mm = task->mm; | 1679 | mm = task->mm; |
1680 | if (mm) | 1680 | if (mm) |
1681 | dumpable = get_dumpable(mm); | 1681 | dumpable = get_dumpable(mm); |
1682 | task_unlock(task); | 1682 | task_unlock(task); |
1683 | if(dumpable == 1) | 1683 | if(dumpable == 1) |
1684 | return 1; | 1684 | return 1; |
1685 | return 0; | 1685 | return 0; |
1686 | } | 1686 | } |
1687 | 1687 | ||
1688 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | 1688 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) |
1689 | { | 1689 | { |
1690 | struct inode * inode; | 1690 | struct inode * inode; |
1691 | struct proc_inode *ei; | 1691 | struct proc_inode *ei; |
1692 | const struct cred *cred; | 1692 | const struct cred *cred; |
1693 | 1693 | ||
1694 | /* We need a new inode */ | 1694 | /* We need a new inode */ |
1695 | 1695 | ||
1696 | inode = new_inode(sb); | 1696 | inode = new_inode(sb); |
1697 | if (!inode) | 1697 | if (!inode) |
1698 | goto out; | 1698 | goto out; |
1699 | 1699 | ||
1700 | /* Common stuff */ | 1700 | /* Common stuff */ |
1701 | ei = PROC_I(inode); | 1701 | ei = PROC_I(inode); |
1702 | inode->i_ino = get_next_ino(); | 1702 | inode->i_ino = get_next_ino(); |
1703 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1703 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
1704 | inode->i_op = &proc_def_inode_operations; | 1704 | inode->i_op = &proc_def_inode_operations; |
1705 | 1705 | ||
1706 | /* | 1706 | /* |
1707 | * grab the reference to task. | 1707 | * grab the reference to task. |
1708 | */ | 1708 | */ |
1709 | ei->pid = get_task_pid(task, PIDTYPE_PID); | 1709 | ei->pid = get_task_pid(task, PIDTYPE_PID); |
1710 | if (!ei->pid) | 1710 | if (!ei->pid) |
1711 | goto out_unlock; | 1711 | goto out_unlock; |
1712 | 1712 | ||
1713 | if (task_dumpable(task)) { | 1713 | if (task_dumpable(task)) { |
1714 | rcu_read_lock(); | 1714 | rcu_read_lock(); |
1715 | cred = __task_cred(task); | 1715 | cred = __task_cred(task); |
1716 | inode->i_uid = cred->euid; | 1716 | inode->i_uid = cred->euid; |
1717 | inode->i_gid = cred->egid; | 1717 | inode->i_gid = cred->egid; |
1718 | rcu_read_unlock(); | 1718 | rcu_read_unlock(); |
1719 | } | 1719 | } |
1720 | security_task_to_inode(task, inode); | 1720 | security_task_to_inode(task, inode); |
1721 | 1721 | ||
1722 | out: | 1722 | out: |
1723 | return inode; | 1723 | return inode; |
1724 | 1724 | ||
1725 | out_unlock: | 1725 | out_unlock: |
1726 | iput(inode); | 1726 | iput(inode); |
1727 | return NULL; | 1727 | return NULL; |
1728 | } | 1728 | } |
1729 | 1729 | ||
1730 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 1730 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
1731 | { | 1731 | { |
1732 | struct inode *inode = dentry->d_inode; | 1732 | struct inode *inode = dentry->d_inode; |
1733 | struct task_struct *task; | 1733 | struct task_struct *task; |
1734 | const struct cred *cred; | 1734 | const struct cred *cred; |
1735 | 1735 | ||
1736 | generic_fillattr(inode, stat); | 1736 | generic_fillattr(inode, stat); |
1737 | 1737 | ||
1738 | rcu_read_lock(); | 1738 | rcu_read_lock(); |
1739 | stat->uid = 0; | 1739 | stat->uid = 0; |
1740 | stat->gid = 0; | 1740 | stat->gid = 0; |
1741 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 1741 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
1742 | if (task) { | 1742 | if (task) { |
1743 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | 1743 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1744 | task_dumpable(task)) { | 1744 | task_dumpable(task)) { |
1745 | cred = __task_cred(task); | 1745 | cred = __task_cred(task); |
1746 | stat->uid = cred->euid; | 1746 | stat->uid = cred->euid; |
1747 | stat->gid = cred->egid; | 1747 | stat->gid = cred->egid; |
1748 | } | 1748 | } |
1749 | } | 1749 | } |
1750 | rcu_read_unlock(); | 1750 | rcu_read_unlock(); |
1751 | return 0; | 1751 | return 0; |
1752 | } | 1752 | } |
1753 | 1753 | ||
1754 | /* dentry stuff */ | 1754 | /* dentry stuff */ |
1755 | 1755 | ||
1756 | /* | 1756 | /* |
1757 | * Exceptional case: normally we are not allowed to unhash a busy | 1757 | * Exceptional case: normally we are not allowed to unhash a busy |
1758 | * directory. In this case, however, we can do it - no aliasing problems | 1758 | * directory. In this case, however, we can do it - no aliasing problems |
1759 | * due to the way we treat inodes. | 1759 | * due to the way we treat inodes. |
1760 | * | 1760 | * |
1761 | * Rewrite the inode's ownerships here because the owning task may have | 1761 | * Rewrite the inode's ownerships here because the owning task may have |
1762 | * performed a setuid(), etc. | 1762 | * performed a setuid(), etc. |
1763 | * | 1763 | * |
1764 | * Before the /proc/pid/status file was created the only way to read | 1764 | * Before the /proc/pid/status file was created the only way to read |
1765 | * the effective uid of a /process was to stat /proc/pid. Reading | 1765 | * the effective uid of a /process was to stat /proc/pid. Reading |
1766 | * /proc/pid/status is slow enough that procps and other packages | 1766 | * /proc/pid/status is slow enough that procps and other packages |
1767 | * kept stating /proc/pid. To keep the rules in /proc simple I have | 1767 | * kept stating /proc/pid. To keep the rules in /proc simple I have |
1768 | * made this apply to all per process world readable and executable | 1768 | * made this apply to all per process world readable and executable |
1769 | * directories. | 1769 | * directories. |
1770 | */ | 1770 | */ |
1771 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | 1771 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
1772 | { | 1772 | { |
1773 | struct inode *inode; | 1773 | struct inode *inode; |
1774 | struct task_struct *task; | 1774 | struct task_struct *task; |
1775 | const struct cred *cred; | 1775 | const struct cred *cred; |
1776 | 1776 | ||
1777 | if (nd && nd->flags & LOOKUP_RCU) | 1777 | if (nd && nd->flags & LOOKUP_RCU) |
1778 | return -ECHILD; | 1778 | return -ECHILD; |
1779 | 1779 | ||
1780 | inode = dentry->d_inode; | 1780 | inode = dentry->d_inode; |
1781 | task = get_proc_task(inode); | 1781 | task = get_proc_task(inode); |
1782 | 1782 | ||
1783 | if (task) { | 1783 | if (task) { |
1784 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | 1784 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1785 | task_dumpable(task)) { | 1785 | task_dumpable(task)) { |
1786 | rcu_read_lock(); | 1786 | rcu_read_lock(); |
1787 | cred = __task_cred(task); | 1787 | cred = __task_cred(task); |
1788 | inode->i_uid = cred->euid; | 1788 | inode->i_uid = cred->euid; |
1789 | inode->i_gid = cred->egid; | 1789 | inode->i_gid = cred->egid; |
1790 | rcu_read_unlock(); | 1790 | rcu_read_unlock(); |
1791 | } else { | 1791 | } else { |
1792 | inode->i_uid = 0; | 1792 | inode->i_uid = 0; |
1793 | inode->i_gid = 0; | 1793 | inode->i_gid = 0; |
1794 | } | 1794 | } |
1795 | inode->i_mode &= ~(S_ISUID | S_ISGID); | 1795 | inode->i_mode &= ~(S_ISUID | S_ISGID); |
1796 | security_task_to_inode(task, inode); | 1796 | security_task_to_inode(task, inode); |
1797 | put_task_struct(task); | 1797 | put_task_struct(task); |
1798 | return 1; | 1798 | return 1; |
1799 | } | 1799 | } |
1800 | d_drop(dentry); | 1800 | d_drop(dentry); |
1801 | return 0; | 1801 | return 0; |
1802 | } | 1802 | } |
1803 | 1803 | ||
1804 | static int pid_delete_dentry(const struct dentry * dentry) | 1804 | static int pid_delete_dentry(const struct dentry * dentry) |
1805 | { | 1805 | { |
1806 | /* Is the task we represent dead? | 1806 | /* Is the task we represent dead? |
1807 | * If so, then don't put the dentry on the lru list, | 1807 | * If so, then don't put the dentry on the lru list, |
1808 | * kill it immediately. | 1808 | * kill it immediately. |
1809 | */ | 1809 | */ |
1810 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | 1810 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; |
1811 | } | 1811 | } |
1812 | 1812 | ||
1813 | const struct dentry_operations pid_dentry_operations = | 1813 | const struct dentry_operations pid_dentry_operations = |
1814 | { | 1814 | { |
1815 | .d_revalidate = pid_revalidate, | 1815 | .d_revalidate = pid_revalidate, |
1816 | .d_delete = pid_delete_dentry, | 1816 | .d_delete = pid_delete_dentry, |
1817 | }; | 1817 | }; |
1818 | 1818 | ||
1819 | /* Lookups */ | 1819 | /* Lookups */ |
1820 | 1820 | ||
1821 | /* | 1821 | /* |
1822 | * Fill a directory entry. | 1822 | * Fill a directory entry. |
1823 | * | 1823 | * |
1824 | * If possible create the dcache entry and derive our inode number and | 1824 | * If possible create the dcache entry and derive our inode number and |
1825 | * file type from dcache entry. | 1825 | * file type from dcache entry. |
1826 | * | 1826 | * |
1827 | * Since all of the proc inode numbers are dynamically generated, the inode | 1827 | * Since all of the proc inode numbers are dynamically generated, the inode |
1828 | * numbers do not exist until the inode is cache. This means creating the | 1828 | * numbers do not exist until the inode is cache. This means creating the |
1829 | * the dcache entry in readdir is necessary to keep the inode numbers | 1829 | * the dcache entry in readdir is necessary to keep the inode numbers |
1830 | * reported by readdir in sync with the inode numbers reported | 1830 | * reported by readdir in sync with the inode numbers reported |
1831 | * by stat. | 1831 | * by stat. |
1832 | */ | 1832 | */ |
1833 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 1833 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
1834 | const char *name, int len, | 1834 | const char *name, int len, |
1835 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1835 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
1836 | { | 1836 | { |
1837 | struct dentry *child, *dir = filp->f_path.dentry; | 1837 | struct dentry *child, *dir = filp->f_path.dentry; |
1838 | struct inode *inode; | 1838 | struct inode *inode; |
1839 | struct qstr qname; | 1839 | struct qstr qname; |
1840 | ino_t ino = 0; | 1840 | ino_t ino = 0; |
1841 | unsigned type = DT_UNKNOWN; | 1841 | unsigned type = DT_UNKNOWN; |
1842 | 1842 | ||
1843 | qname.name = name; | 1843 | qname.name = name; |
1844 | qname.len = len; | 1844 | qname.len = len; |
1845 | qname.hash = full_name_hash(name, len); | 1845 | qname.hash = full_name_hash(name, len); |
1846 | 1846 | ||
1847 | child = d_lookup(dir, &qname); | 1847 | child = d_lookup(dir, &qname); |
1848 | if (!child) { | 1848 | if (!child) { |
1849 | struct dentry *new; | 1849 | struct dentry *new; |
1850 | new = d_alloc(dir, &qname); | 1850 | new = d_alloc(dir, &qname); |
1851 | if (new) { | 1851 | if (new) { |
1852 | child = instantiate(dir->d_inode, new, task, ptr); | 1852 | child = instantiate(dir->d_inode, new, task, ptr); |
1853 | if (child) | 1853 | if (child) |
1854 | dput(new); | 1854 | dput(new); |
1855 | else | 1855 | else |
1856 | child = new; | 1856 | child = new; |
1857 | } | 1857 | } |
1858 | } | 1858 | } |
1859 | if (!child || IS_ERR(child) || !child->d_inode) | 1859 | if (!child || IS_ERR(child) || !child->d_inode) |
1860 | goto end_instantiate; | 1860 | goto end_instantiate; |
1861 | inode = child->d_inode; | 1861 | inode = child->d_inode; |
1862 | if (inode) { | 1862 | if (inode) { |
1863 | ino = inode->i_ino; | 1863 | ino = inode->i_ino; |
1864 | type = inode->i_mode >> 12; | 1864 | type = inode->i_mode >> 12; |
1865 | } | 1865 | } |
1866 | dput(child); | 1866 | dput(child); |
1867 | end_instantiate: | 1867 | end_instantiate: |
1868 | if (!ino) | 1868 | if (!ino) |
1869 | ino = find_inode_number(dir, &qname); | 1869 | ino = find_inode_number(dir, &qname); |
1870 | if (!ino) | 1870 | if (!ino) |
1871 | ino = 1; | 1871 | ino = 1; |
1872 | return filldir(dirent, name, len, filp->f_pos, ino, type); | 1872 | return filldir(dirent, name, len, filp->f_pos, ino, type); |
1873 | } | 1873 | } |
1874 | 1874 | ||
1875 | static unsigned name_to_int(struct dentry *dentry) | 1875 | static unsigned name_to_int(struct dentry *dentry) |
1876 | { | 1876 | { |
1877 | const char *name = dentry->d_name.name; | 1877 | const char *name = dentry->d_name.name; |
1878 | int len = dentry->d_name.len; | 1878 | int len = dentry->d_name.len; |
1879 | unsigned n = 0; | 1879 | unsigned n = 0; |
1880 | 1880 | ||
1881 | if (len > 1 && *name == '0') | 1881 | if (len > 1 && *name == '0') |
1882 | goto out; | 1882 | goto out; |
1883 | while (len-- > 0) { | 1883 | while (len-- > 0) { |
1884 | unsigned c = *name++ - '0'; | 1884 | unsigned c = *name++ - '0'; |
1885 | if (c > 9) | 1885 | if (c > 9) |
1886 | goto out; | 1886 | goto out; |
1887 | if (n >= (~0U-9)/10) | 1887 | if (n >= (~0U-9)/10) |
1888 | goto out; | 1888 | goto out; |
1889 | n *= 10; | 1889 | n *= 10; |
1890 | n += c; | 1890 | n += c; |
1891 | } | 1891 | } |
1892 | return n; | 1892 | return n; |
1893 | out: | 1893 | out: |
1894 | return ~0U; | 1894 | return ~0U; |
1895 | } | 1895 | } |
1896 | 1896 | ||
1897 | #define PROC_FDINFO_MAX 64 | 1897 | #define PROC_FDINFO_MAX 64 |
1898 | 1898 | ||
1899 | static int proc_fd_info(struct inode *inode, struct path *path, char *info) | 1899 | static int proc_fd_info(struct inode *inode, struct path *path, char *info) |
1900 | { | 1900 | { |
1901 | struct task_struct *task = get_proc_task(inode); | 1901 | struct task_struct *task = get_proc_task(inode); |
1902 | struct files_struct *files = NULL; | 1902 | struct files_struct *files = NULL; |
1903 | struct file *file; | 1903 | struct file *file; |
1904 | int fd = proc_fd(inode); | 1904 | int fd = proc_fd(inode); |
1905 | 1905 | ||
1906 | if (task) { | 1906 | if (task) { |
1907 | files = get_files_struct(task); | 1907 | files = get_files_struct(task); |
1908 | put_task_struct(task); | 1908 | put_task_struct(task); |
1909 | } | 1909 | } |
1910 | if (files) { | 1910 | if (files) { |
1911 | /* | 1911 | /* |
1912 | * We are not taking a ref to the file structure, so we must | 1912 | * We are not taking a ref to the file structure, so we must |
1913 | * hold ->file_lock. | 1913 | * hold ->file_lock. |
1914 | */ | 1914 | */ |
1915 | spin_lock(&files->file_lock); | 1915 | spin_lock(&files->file_lock); |
1916 | file = fcheck_files(files, fd); | 1916 | file = fcheck_files(files, fd); |
1917 | if (file) { | 1917 | if (file) { |
1918 | if (path) { | 1918 | if (path) { |
1919 | *path = file->f_path; | 1919 | *path = file->f_path; |
1920 | path_get(&file->f_path); | 1920 | path_get(&file->f_path); |
1921 | } | 1921 | } |
1922 | if (info) | 1922 | if (info) |
1923 | snprintf(info, PROC_FDINFO_MAX, | 1923 | snprintf(info, PROC_FDINFO_MAX, |
1924 | "pos:\t%lli\n" | 1924 | "pos:\t%lli\n" |
1925 | "flags:\t0%o\n", | 1925 | "flags:\t0%o\n", |
1926 | (long long) file->f_pos, | 1926 | (long long) file->f_pos, |
1927 | file->f_flags); | 1927 | file->f_flags); |
1928 | spin_unlock(&files->file_lock); | 1928 | spin_unlock(&files->file_lock); |
1929 | put_files_struct(files); | 1929 | put_files_struct(files); |
1930 | return 0; | 1930 | return 0; |
1931 | } | 1931 | } |
1932 | spin_unlock(&files->file_lock); | 1932 | spin_unlock(&files->file_lock); |
1933 | put_files_struct(files); | 1933 | put_files_struct(files); |
1934 | } | 1934 | } |
1935 | return -ENOENT; | 1935 | return -ENOENT; |
1936 | } | 1936 | } |
1937 | 1937 | ||
1938 | static int proc_fd_link(struct inode *inode, struct path *path) | 1938 | static int proc_fd_link(struct inode *inode, struct path *path) |
1939 | { | 1939 | { |
1940 | return proc_fd_info(inode, path, NULL); | 1940 | return proc_fd_info(inode, path, NULL); |
1941 | } | 1941 | } |
1942 | 1942 | ||
1943 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1943 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
1944 | { | 1944 | { |
1945 | struct inode *inode; | 1945 | struct inode *inode; |
1946 | struct task_struct *task; | 1946 | struct task_struct *task; |
1947 | int fd; | 1947 | int fd; |
1948 | struct files_struct *files; | 1948 | struct files_struct *files; |
1949 | const struct cred *cred; | 1949 | const struct cred *cred; |
1950 | 1950 | ||
1951 | if (nd && nd->flags & LOOKUP_RCU) | 1951 | if (nd && nd->flags & LOOKUP_RCU) |
1952 | return -ECHILD; | 1952 | return -ECHILD; |
1953 | 1953 | ||
1954 | inode = dentry->d_inode; | 1954 | inode = dentry->d_inode; |
1955 | task = get_proc_task(inode); | 1955 | task = get_proc_task(inode); |
1956 | fd = proc_fd(inode); | 1956 | fd = proc_fd(inode); |
1957 | 1957 | ||
1958 | if (task) { | 1958 | if (task) { |
1959 | files = get_files_struct(task); | 1959 | files = get_files_struct(task); |
1960 | if (files) { | 1960 | if (files) { |
1961 | rcu_read_lock(); | 1961 | rcu_read_lock(); |
1962 | if (fcheck_files(files, fd)) { | 1962 | if (fcheck_files(files, fd)) { |
1963 | rcu_read_unlock(); | 1963 | rcu_read_unlock(); |
1964 | put_files_struct(files); | 1964 | put_files_struct(files); |
1965 | if (task_dumpable(task)) { | 1965 | if (task_dumpable(task)) { |
1966 | rcu_read_lock(); | 1966 | rcu_read_lock(); |
1967 | cred = __task_cred(task); | 1967 | cred = __task_cred(task); |
1968 | inode->i_uid = cred->euid; | 1968 | inode->i_uid = cred->euid; |
1969 | inode->i_gid = cred->egid; | 1969 | inode->i_gid = cred->egid; |
1970 | rcu_read_unlock(); | 1970 | rcu_read_unlock(); |
1971 | } else { | 1971 | } else { |
1972 | inode->i_uid = 0; | 1972 | inode->i_uid = 0; |
1973 | inode->i_gid = 0; | 1973 | inode->i_gid = 0; |
1974 | } | 1974 | } |
1975 | inode->i_mode &= ~(S_ISUID | S_ISGID); | 1975 | inode->i_mode &= ~(S_ISUID | S_ISGID); |
1976 | security_task_to_inode(task, inode); | 1976 | security_task_to_inode(task, inode); |
1977 | put_task_struct(task); | 1977 | put_task_struct(task); |
1978 | return 1; | 1978 | return 1; |
1979 | } | 1979 | } |
1980 | rcu_read_unlock(); | 1980 | rcu_read_unlock(); |
1981 | put_files_struct(files); | 1981 | put_files_struct(files); |
1982 | } | 1982 | } |
1983 | put_task_struct(task); | 1983 | put_task_struct(task); |
1984 | } | 1984 | } |
1985 | d_drop(dentry); | 1985 | d_drop(dentry); |
1986 | return 0; | 1986 | return 0; |
1987 | } | 1987 | } |
1988 | 1988 | ||
1989 | static const struct dentry_operations tid_fd_dentry_operations = | 1989 | static const struct dentry_operations tid_fd_dentry_operations = |
1990 | { | 1990 | { |
1991 | .d_revalidate = tid_fd_revalidate, | 1991 | .d_revalidate = tid_fd_revalidate, |
1992 | .d_delete = pid_delete_dentry, | 1992 | .d_delete = pid_delete_dentry, |
1993 | }; | 1993 | }; |
1994 | 1994 | ||
1995 | static struct dentry *proc_fd_instantiate(struct inode *dir, | 1995 | static struct dentry *proc_fd_instantiate(struct inode *dir, |
1996 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 1996 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
1997 | { | 1997 | { |
1998 | unsigned fd = *(const unsigned *)ptr; | 1998 | unsigned fd = *(const unsigned *)ptr; |
1999 | struct file *file; | 1999 | struct file *file; |
2000 | struct files_struct *files; | 2000 | struct files_struct *files; |
2001 | struct inode *inode; | 2001 | struct inode *inode; |
2002 | struct proc_inode *ei; | 2002 | struct proc_inode *ei; |
2003 | struct dentry *error = ERR_PTR(-ENOENT); | 2003 | struct dentry *error = ERR_PTR(-ENOENT); |
2004 | 2004 | ||
2005 | inode = proc_pid_make_inode(dir->i_sb, task); | 2005 | inode = proc_pid_make_inode(dir->i_sb, task); |
2006 | if (!inode) | 2006 | if (!inode) |
2007 | goto out; | 2007 | goto out; |
2008 | ei = PROC_I(inode); | 2008 | ei = PROC_I(inode); |
2009 | ei->fd = fd; | 2009 | ei->fd = fd; |
2010 | files = get_files_struct(task); | 2010 | files = get_files_struct(task); |
2011 | if (!files) | 2011 | if (!files) |
2012 | goto out_iput; | 2012 | goto out_iput; |
2013 | inode->i_mode = S_IFLNK; | 2013 | inode->i_mode = S_IFLNK; |
2014 | 2014 | ||
2015 | /* | 2015 | /* |
2016 | * We are not taking a ref to the file structure, so we must | 2016 | * We are not taking a ref to the file structure, so we must |
2017 | * hold ->file_lock. | 2017 | * hold ->file_lock. |
2018 | */ | 2018 | */ |
2019 | spin_lock(&files->file_lock); | 2019 | spin_lock(&files->file_lock); |
2020 | file = fcheck_files(files, fd); | 2020 | file = fcheck_files(files, fd); |
2021 | if (!file) | 2021 | if (!file) |
2022 | goto out_unlock; | 2022 | goto out_unlock; |
2023 | if (file->f_mode & FMODE_READ) | 2023 | if (file->f_mode & FMODE_READ) |
2024 | inode->i_mode |= S_IRUSR | S_IXUSR; | 2024 | inode->i_mode |= S_IRUSR | S_IXUSR; |
2025 | if (file->f_mode & FMODE_WRITE) | 2025 | if (file->f_mode & FMODE_WRITE) |
2026 | inode->i_mode |= S_IWUSR | S_IXUSR; | 2026 | inode->i_mode |= S_IWUSR | S_IXUSR; |
2027 | spin_unlock(&files->file_lock); | 2027 | spin_unlock(&files->file_lock); |
2028 | put_files_struct(files); | 2028 | put_files_struct(files); |
2029 | 2029 | ||
2030 | inode->i_op = &proc_pid_link_inode_operations; | 2030 | inode->i_op = &proc_pid_link_inode_operations; |
2031 | inode->i_size = 64; | 2031 | inode->i_size = 64; |
2032 | ei->op.proc_get_link = proc_fd_link; | 2032 | ei->op.proc_get_link = proc_fd_link; |
2033 | d_set_d_op(dentry, &tid_fd_dentry_operations); | 2033 | d_set_d_op(dentry, &tid_fd_dentry_operations); |
2034 | d_add(dentry, inode); | 2034 | d_add(dentry, inode); |
2035 | /* Close the race of the process dying before we return the dentry */ | 2035 | /* Close the race of the process dying before we return the dentry */ |
2036 | if (tid_fd_revalidate(dentry, NULL)) | 2036 | if (tid_fd_revalidate(dentry, NULL)) |
2037 | error = NULL; | 2037 | error = NULL; |
2038 | 2038 | ||
2039 | out: | 2039 | out: |
2040 | return error; | 2040 | return error; |
2041 | out_unlock: | 2041 | out_unlock: |
2042 | spin_unlock(&files->file_lock); | 2042 | spin_unlock(&files->file_lock); |
2043 | put_files_struct(files); | 2043 | put_files_struct(files); |
2044 | out_iput: | 2044 | out_iput: |
2045 | iput(inode); | 2045 | iput(inode); |
2046 | goto out; | 2046 | goto out; |
2047 | } | 2047 | } |
2048 | 2048 | ||
2049 | static struct dentry *proc_lookupfd_common(struct inode *dir, | 2049 | static struct dentry *proc_lookupfd_common(struct inode *dir, |
2050 | struct dentry *dentry, | 2050 | struct dentry *dentry, |
2051 | instantiate_t instantiate) | 2051 | instantiate_t instantiate) |
2052 | { | 2052 | { |
2053 | struct task_struct *task = get_proc_task(dir); | 2053 | struct task_struct *task = get_proc_task(dir); |
2054 | unsigned fd = name_to_int(dentry); | 2054 | unsigned fd = name_to_int(dentry); |
2055 | struct dentry *result = ERR_PTR(-ENOENT); | 2055 | struct dentry *result = ERR_PTR(-ENOENT); |
2056 | 2056 | ||
2057 | if (!task) | 2057 | if (!task) |
2058 | goto out_no_task; | 2058 | goto out_no_task; |
2059 | if (fd == ~0U) | 2059 | if (fd == ~0U) |
2060 | goto out; | 2060 | goto out; |
2061 | 2061 | ||
2062 | result = instantiate(dir, dentry, task, &fd); | 2062 | result = instantiate(dir, dentry, task, &fd); |
2063 | out: | 2063 | out: |
2064 | put_task_struct(task); | 2064 | put_task_struct(task); |
2065 | out_no_task: | 2065 | out_no_task: |
2066 | return result; | 2066 | return result; |
2067 | } | 2067 | } |
2068 | 2068 | ||
2069 | static int proc_readfd_common(struct file * filp, void * dirent, | 2069 | static int proc_readfd_common(struct file * filp, void * dirent, |
2070 | filldir_t filldir, instantiate_t instantiate) | 2070 | filldir_t filldir, instantiate_t instantiate) |
2071 | { | 2071 | { |
2072 | struct dentry *dentry = filp->f_path.dentry; | 2072 | struct dentry *dentry = filp->f_path.dentry; |
2073 | struct inode *inode = dentry->d_inode; | 2073 | struct inode *inode = dentry->d_inode; |
2074 | struct task_struct *p = get_proc_task(inode); | 2074 | struct task_struct *p = get_proc_task(inode); |
2075 | unsigned int fd, ino; | 2075 | unsigned int fd, ino; |
2076 | int retval; | 2076 | int retval; |
2077 | struct files_struct * files; | 2077 | struct files_struct * files; |
2078 | 2078 | ||
2079 | retval = -ENOENT; | 2079 | retval = -ENOENT; |
2080 | if (!p) | 2080 | if (!p) |
2081 | goto out_no_task; | 2081 | goto out_no_task; |
2082 | retval = 0; | 2082 | retval = 0; |
2083 | 2083 | ||
2084 | fd = filp->f_pos; | 2084 | fd = filp->f_pos; |
2085 | switch (fd) { | 2085 | switch (fd) { |
2086 | case 0: | 2086 | case 0: |
2087 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | 2087 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) |
2088 | goto out; | 2088 | goto out; |
2089 | filp->f_pos++; | 2089 | filp->f_pos++; |
2090 | case 1: | 2090 | case 1: |
2091 | ino = parent_ino(dentry); | 2091 | ino = parent_ino(dentry); |
2092 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | 2092 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) |
2093 | goto out; | 2093 | goto out; |
2094 | filp->f_pos++; | 2094 | filp->f_pos++; |
2095 | default: | 2095 | default: |
2096 | files = get_files_struct(p); | 2096 | files = get_files_struct(p); |
2097 | if (!files) | 2097 | if (!files) |
2098 | goto out; | 2098 | goto out; |
2099 | rcu_read_lock(); | 2099 | rcu_read_lock(); |
2100 | for (fd = filp->f_pos-2; | 2100 | for (fd = filp->f_pos-2; |
2101 | fd < files_fdtable(files)->max_fds; | 2101 | fd < files_fdtable(files)->max_fds; |
2102 | fd++, filp->f_pos++) { | 2102 | fd++, filp->f_pos++) { |
2103 | char name[PROC_NUMBUF]; | 2103 | char name[PROC_NUMBUF]; |
2104 | int len; | 2104 | int len; |
2105 | 2105 | ||
2106 | if (!fcheck_files(files, fd)) | 2106 | if (!fcheck_files(files, fd)) |
2107 | continue; | 2107 | continue; |
2108 | rcu_read_unlock(); | 2108 | rcu_read_unlock(); |
2109 | 2109 | ||
2110 | len = snprintf(name, sizeof(name), "%d", fd); | 2110 | len = snprintf(name, sizeof(name), "%d", fd); |
2111 | if (proc_fill_cache(filp, dirent, filldir, | 2111 | if (proc_fill_cache(filp, dirent, filldir, |
2112 | name, len, instantiate, | 2112 | name, len, instantiate, |
2113 | p, &fd) < 0) { | 2113 | p, &fd) < 0) { |
2114 | rcu_read_lock(); | 2114 | rcu_read_lock(); |
2115 | break; | 2115 | break; |
2116 | } | 2116 | } |
2117 | rcu_read_lock(); | 2117 | rcu_read_lock(); |
2118 | } | 2118 | } |
2119 | rcu_read_unlock(); | 2119 | rcu_read_unlock(); |
2120 | put_files_struct(files); | 2120 | put_files_struct(files); |
2121 | } | 2121 | } |
2122 | out: | 2122 | out: |
2123 | put_task_struct(p); | 2123 | put_task_struct(p); |
2124 | out_no_task: | 2124 | out_no_task: |
2125 | return retval; | 2125 | return retval; |
2126 | } | 2126 | } |
2127 | 2127 | ||
2128 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, | 2128 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, |
2129 | struct nameidata *nd) | 2129 | struct nameidata *nd) |
2130 | { | 2130 | { |
2131 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); | 2131 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); |
2132 | } | 2132 | } |
2133 | 2133 | ||
2134 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | 2134 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) |
2135 | { | 2135 | { |
2136 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | 2136 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); |
2137 | } | 2137 | } |
2138 | 2138 | ||
2139 | static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, | 2139 | static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, |
2140 | size_t len, loff_t *ppos) | 2140 | size_t len, loff_t *ppos) |
2141 | { | 2141 | { |
2142 | char tmp[PROC_FDINFO_MAX]; | 2142 | char tmp[PROC_FDINFO_MAX]; |
2143 | int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); | 2143 | int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); |
2144 | if (!err) | 2144 | if (!err) |
2145 | err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); | 2145 | err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); |
2146 | return err; | 2146 | return err; |
2147 | } | 2147 | } |
2148 | 2148 | ||
2149 | static const struct file_operations proc_fdinfo_file_operations = { | 2149 | static const struct file_operations proc_fdinfo_file_operations = { |
2150 | .open = nonseekable_open, | 2150 | .open = nonseekable_open, |
2151 | .read = proc_fdinfo_read, | 2151 | .read = proc_fdinfo_read, |
2152 | .llseek = no_llseek, | 2152 | .llseek = no_llseek, |
2153 | }; | 2153 | }; |
2154 | 2154 | ||
2155 | static const struct file_operations proc_fd_operations = { | 2155 | static const struct file_operations proc_fd_operations = { |
2156 | .read = generic_read_dir, | 2156 | .read = generic_read_dir, |
2157 | .readdir = proc_readfd, | 2157 | .readdir = proc_readfd, |
2158 | .llseek = default_llseek, | 2158 | .llseek = default_llseek, |
2159 | }; | 2159 | }; |
2160 | 2160 | ||
2161 | /* | 2161 | /* |
2162 | * /proc/pid/fd needs a special permission handler so that a process can still | 2162 | * /proc/pid/fd needs a special permission handler so that a process can still |
2163 | * access /proc/self/fd after it has executed a setuid(). | 2163 | * access /proc/self/fd after it has executed a setuid(). |
2164 | */ | 2164 | */ |
2165 | static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) | 2165 | static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) |
2166 | { | 2166 | { |
2167 | int rv; | 2167 | int rv; |
2168 | 2168 | ||
2169 | if (flags & IPERM_FLAG_RCU) | 2169 | if (flags & IPERM_FLAG_RCU) |
2170 | return -ECHILD; | 2170 | return -ECHILD; |
2171 | rv = generic_permission(inode, mask, flags, NULL); | 2171 | rv = generic_permission(inode, mask, flags, NULL); |
2172 | if (rv == 0) | 2172 | if (rv == 0) |
2173 | return 0; | 2173 | return 0; |
2174 | if (task_pid(current) == proc_pid(inode)) | 2174 | if (task_pid(current) == proc_pid(inode)) |
2175 | rv = 0; | 2175 | rv = 0; |
2176 | return rv; | 2176 | return rv; |
2177 | } | 2177 | } |
2178 | 2178 | ||
2179 | /* | 2179 | /* |
2180 | * proc directories can do almost nothing.. | 2180 | * proc directories can do almost nothing.. |
2181 | */ | 2181 | */ |
2182 | static const struct inode_operations proc_fd_inode_operations = { | 2182 | static const struct inode_operations proc_fd_inode_operations = { |
2183 | .lookup = proc_lookupfd, | 2183 | .lookup = proc_lookupfd, |
2184 | .permission = proc_fd_permission, | 2184 | .permission = proc_fd_permission, |
2185 | .setattr = proc_setattr, | 2185 | .setattr = proc_setattr, |
2186 | }; | 2186 | }; |
2187 | 2187 | ||
2188 | static struct dentry *proc_fdinfo_instantiate(struct inode *dir, | 2188 | static struct dentry *proc_fdinfo_instantiate(struct inode *dir, |
2189 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2189 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
2190 | { | 2190 | { |
2191 | unsigned fd = *(unsigned *)ptr; | 2191 | unsigned fd = *(unsigned *)ptr; |
2192 | struct inode *inode; | 2192 | struct inode *inode; |
2193 | struct proc_inode *ei; | 2193 | struct proc_inode *ei; |
2194 | struct dentry *error = ERR_PTR(-ENOENT); | 2194 | struct dentry *error = ERR_PTR(-ENOENT); |
2195 | 2195 | ||
2196 | inode = proc_pid_make_inode(dir->i_sb, task); | 2196 | inode = proc_pid_make_inode(dir->i_sb, task); |
2197 | if (!inode) | 2197 | if (!inode) |
2198 | goto out; | 2198 | goto out; |
2199 | ei = PROC_I(inode); | 2199 | ei = PROC_I(inode); |
2200 | ei->fd = fd; | 2200 | ei->fd = fd; |
2201 | inode->i_mode = S_IFREG | S_IRUSR; | 2201 | inode->i_mode = S_IFREG | S_IRUSR; |
2202 | inode->i_fop = &proc_fdinfo_file_operations; | 2202 | inode->i_fop = &proc_fdinfo_file_operations; |
2203 | d_set_d_op(dentry, &tid_fd_dentry_operations); | 2203 | d_set_d_op(dentry, &tid_fd_dentry_operations); |
2204 | d_add(dentry, inode); | 2204 | d_add(dentry, inode); |
2205 | /* Close the race of the process dying before we return the dentry */ | 2205 | /* Close the race of the process dying before we return the dentry */ |
2206 | if (tid_fd_revalidate(dentry, NULL)) | 2206 | if (tid_fd_revalidate(dentry, NULL)) |
2207 | error = NULL; | 2207 | error = NULL; |
2208 | 2208 | ||
2209 | out: | 2209 | out: |
2210 | return error; | 2210 | return error; |
2211 | } | 2211 | } |
2212 | 2212 | ||
2213 | static struct dentry *proc_lookupfdinfo(struct inode *dir, | 2213 | static struct dentry *proc_lookupfdinfo(struct inode *dir, |
2214 | struct dentry *dentry, | 2214 | struct dentry *dentry, |
2215 | struct nameidata *nd) | 2215 | struct nameidata *nd) |
2216 | { | 2216 | { |
2217 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | 2217 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); |
2218 | } | 2218 | } |
2219 | 2219 | ||
2220 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | 2220 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) |
2221 | { | 2221 | { |
2222 | return proc_readfd_common(filp, dirent, filldir, | 2222 | return proc_readfd_common(filp, dirent, filldir, |
2223 | proc_fdinfo_instantiate); | 2223 | proc_fdinfo_instantiate); |
2224 | } | 2224 | } |
2225 | 2225 | ||
2226 | static const struct file_operations proc_fdinfo_operations = { | 2226 | static const struct file_operations proc_fdinfo_operations = { |
2227 | .read = generic_read_dir, | 2227 | .read = generic_read_dir, |
2228 | .readdir = proc_readfdinfo, | 2228 | .readdir = proc_readfdinfo, |
2229 | .llseek = default_llseek, | 2229 | .llseek = default_llseek, |
2230 | }; | 2230 | }; |
2231 | 2231 | ||
2232 | /* | 2232 | /* |
2233 | * proc directories can do almost nothing.. | 2233 | * proc directories can do almost nothing.. |
2234 | */ | 2234 | */ |
2235 | static const struct inode_operations proc_fdinfo_inode_operations = { | 2235 | static const struct inode_operations proc_fdinfo_inode_operations = { |
2236 | .lookup = proc_lookupfdinfo, | 2236 | .lookup = proc_lookupfdinfo, |
2237 | .setattr = proc_setattr, | 2237 | .setattr = proc_setattr, |
2238 | }; | 2238 | }; |
2239 | 2239 | ||
2240 | 2240 | ||
2241 | static struct dentry *proc_pident_instantiate(struct inode *dir, | 2241 | static struct dentry *proc_pident_instantiate(struct inode *dir, |
2242 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2242 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
2243 | { | 2243 | { |
2244 | const struct pid_entry *p = ptr; | 2244 | const struct pid_entry *p = ptr; |
2245 | struct inode *inode; | 2245 | struct inode *inode; |
2246 | struct proc_inode *ei; | 2246 | struct proc_inode *ei; |
2247 | struct dentry *error = ERR_PTR(-ENOENT); | 2247 | struct dentry *error = ERR_PTR(-ENOENT); |
2248 | 2248 | ||
2249 | inode = proc_pid_make_inode(dir->i_sb, task); | 2249 | inode = proc_pid_make_inode(dir->i_sb, task); |
2250 | if (!inode) | 2250 | if (!inode) |
2251 | goto out; | 2251 | goto out; |
2252 | 2252 | ||
2253 | ei = PROC_I(inode); | 2253 | ei = PROC_I(inode); |
2254 | inode->i_mode = p->mode; | 2254 | inode->i_mode = p->mode; |
2255 | if (S_ISDIR(inode->i_mode)) | 2255 | if (S_ISDIR(inode->i_mode)) |
2256 | inode->i_nlink = 2; /* Use getattr to fix if necessary */ | 2256 | inode->i_nlink = 2; /* Use getattr to fix if necessary */ |
2257 | if (p->iop) | 2257 | if (p->iop) |
2258 | inode->i_op = p->iop; | 2258 | inode->i_op = p->iop; |
2259 | if (p->fop) | 2259 | if (p->fop) |
2260 | inode->i_fop = p->fop; | 2260 | inode->i_fop = p->fop; |
2261 | ei->op = p->op; | 2261 | ei->op = p->op; |
2262 | d_set_d_op(dentry, &pid_dentry_operations); | 2262 | d_set_d_op(dentry, &pid_dentry_operations); |
2263 | d_add(dentry, inode); | 2263 | d_add(dentry, inode); |
2264 | /* Close the race of the process dying before we return the dentry */ | 2264 | /* Close the race of the process dying before we return the dentry */ |
2265 | if (pid_revalidate(dentry, NULL)) | 2265 | if (pid_revalidate(dentry, NULL)) |
2266 | error = NULL; | 2266 | error = NULL; |
2267 | out: | 2267 | out: |
2268 | return error; | 2268 | return error; |
2269 | } | 2269 | } |
2270 | 2270 | ||
2271 | static struct dentry *proc_pident_lookup(struct inode *dir, | 2271 | static struct dentry *proc_pident_lookup(struct inode *dir, |
2272 | struct dentry *dentry, | 2272 | struct dentry *dentry, |
2273 | const struct pid_entry *ents, | 2273 | const struct pid_entry *ents, |
2274 | unsigned int nents) | 2274 | unsigned int nents) |
2275 | { | 2275 | { |
2276 | struct dentry *error; | 2276 | struct dentry *error; |
2277 | struct task_struct *task = get_proc_task(dir); | 2277 | struct task_struct *task = get_proc_task(dir); |
2278 | const struct pid_entry *p, *last; | 2278 | const struct pid_entry *p, *last; |
2279 | 2279 | ||
2280 | error = ERR_PTR(-ENOENT); | 2280 | error = ERR_PTR(-ENOENT); |
2281 | 2281 | ||
2282 | if (!task) | 2282 | if (!task) |
2283 | goto out_no_task; | 2283 | goto out_no_task; |
2284 | 2284 | ||
2285 | /* | 2285 | /* |
2286 | * Yes, it does not scale. And it should not. Don't add | 2286 | * Yes, it does not scale. And it should not. Don't add |
2287 | * new entries into /proc/<tgid>/ without very good reasons. | 2287 | * new entries into /proc/<tgid>/ without very good reasons. |
2288 | */ | 2288 | */ |
2289 | last = &ents[nents - 1]; | 2289 | last = &ents[nents - 1]; |
2290 | for (p = ents; p <= last; p++) { | 2290 | for (p = ents; p <= last; p++) { |
2291 | if (p->len != dentry->d_name.len) | 2291 | if (p->len != dentry->d_name.len) |
2292 | continue; | 2292 | continue; |
2293 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | 2293 | if (!memcmp(dentry->d_name.name, p->name, p->len)) |
2294 | break; | 2294 | break; |
2295 | } | 2295 | } |
2296 | if (p > last) | 2296 | if (p > last) |
2297 | goto out; | 2297 | goto out; |
2298 | 2298 | ||
2299 | error = proc_pident_instantiate(dir, dentry, task, p); | 2299 | error = proc_pident_instantiate(dir, dentry, task, p); |
2300 | out: | 2300 | out: |
2301 | put_task_struct(task); | 2301 | put_task_struct(task); |
2302 | out_no_task: | 2302 | out_no_task: |
2303 | return error; | 2303 | return error; |
2304 | } | 2304 | } |
2305 | 2305 | ||
2306 | static int proc_pident_fill_cache(struct file *filp, void *dirent, | 2306 | static int proc_pident_fill_cache(struct file *filp, void *dirent, |
2307 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) | 2307 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) |
2308 | { | 2308 | { |
2309 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | 2309 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, |
2310 | proc_pident_instantiate, task, p); | 2310 | proc_pident_instantiate, task, p); |
2311 | } | 2311 | } |
2312 | 2312 | ||
2313 | static int proc_pident_readdir(struct file *filp, | 2313 | static int proc_pident_readdir(struct file *filp, |
2314 | void *dirent, filldir_t filldir, | 2314 | void *dirent, filldir_t filldir, |
2315 | const struct pid_entry *ents, unsigned int nents) | 2315 | const struct pid_entry *ents, unsigned int nents) |
2316 | { | 2316 | { |
2317 | int i; | 2317 | int i; |
2318 | struct dentry *dentry = filp->f_path.dentry; | 2318 | struct dentry *dentry = filp->f_path.dentry; |
2319 | struct inode *inode = dentry->d_inode; | 2319 | struct inode *inode = dentry->d_inode; |
2320 | struct task_struct *task = get_proc_task(inode); | 2320 | struct task_struct *task = get_proc_task(inode); |
2321 | const struct pid_entry *p, *last; | 2321 | const struct pid_entry *p, *last; |
2322 | ino_t ino; | 2322 | ino_t ino; |
2323 | int ret; | 2323 | int ret; |
2324 | 2324 | ||
2325 | ret = -ENOENT; | 2325 | ret = -ENOENT; |
2326 | if (!task) | 2326 | if (!task) |
2327 | goto out_no_task; | 2327 | goto out_no_task; |
2328 | 2328 | ||
2329 | ret = 0; | 2329 | ret = 0; |
2330 | i = filp->f_pos; | 2330 | i = filp->f_pos; |
2331 | switch (i) { | 2331 | switch (i) { |
2332 | case 0: | 2332 | case 0: |
2333 | ino = inode->i_ino; | 2333 | ino = inode->i_ino; |
2334 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 2334 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) |
2335 | goto out; | 2335 | goto out; |
2336 | i++; | 2336 | i++; |
2337 | filp->f_pos++; | 2337 | filp->f_pos++; |
2338 | /* fall through */ | 2338 | /* fall through */ |
2339 | case 1: | 2339 | case 1: |
2340 | ino = parent_ino(dentry); | 2340 | ino = parent_ino(dentry); |
2341 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 2341 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) |
2342 | goto out; | 2342 | goto out; |
2343 | i++; | 2343 | i++; |
2344 | filp->f_pos++; | 2344 | filp->f_pos++; |
2345 | /* fall through */ | 2345 | /* fall through */ |
2346 | default: | 2346 | default: |
2347 | i -= 2; | 2347 | i -= 2; |
2348 | if (i >= nents) { | 2348 | if (i >= nents) { |
2349 | ret = 1; | 2349 | ret = 1; |
2350 | goto out; | 2350 | goto out; |
2351 | } | 2351 | } |
2352 | p = ents + i; | 2352 | p = ents + i; |
2353 | last = &ents[nents - 1]; | 2353 | last = &ents[nents - 1]; |
2354 | while (p <= last) { | 2354 | while (p <= last) { |
2355 | if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) | 2355 | if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) |
2356 | goto out; | 2356 | goto out; |
2357 | filp->f_pos++; | 2357 | filp->f_pos++; |
2358 | p++; | 2358 | p++; |
2359 | } | 2359 | } |
2360 | } | 2360 | } |
2361 | 2361 | ||
2362 | ret = 1; | 2362 | ret = 1; |
2363 | out: | 2363 | out: |
2364 | put_task_struct(task); | 2364 | put_task_struct(task); |
2365 | out_no_task: | 2365 | out_no_task: |
2366 | return ret; | 2366 | return ret; |
2367 | } | 2367 | } |
2368 | 2368 | ||
2369 | #ifdef CONFIG_SECURITY | 2369 | #ifdef CONFIG_SECURITY |
2370 | static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | 2370 | static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, |
2371 | size_t count, loff_t *ppos) | 2371 | size_t count, loff_t *ppos) |
2372 | { | 2372 | { |
2373 | struct inode * inode = file->f_path.dentry->d_inode; | 2373 | struct inode * inode = file->f_path.dentry->d_inode; |
2374 | char *p = NULL; | 2374 | char *p = NULL; |
2375 | ssize_t length; | 2375 | ssize_t length; |
2376 | struct task_struct *task = get_proc_task(inode); | 2376 | struct task_struct *task = get_proc_task(inode); |
2377 | 2377 | ||
2378 | if (!task) | 2378 | if (!task) |
2379 | return -ESRCH; | 2379 | return -ESRCH; |
2380 | 2380 | ||
2381 | length = security_getprocattr(task, | 2381 | length = security_getprocattr(task, |
2382 | (char*)file->f_path.dentry->d_name.name, | 2382 | (char*)file->f_path.dentry->d_name.name, |
2383 | &p); | 2383 | &p); |
2384 | put_task_struct(task); | 2384 | put_task_struct(task); |
2385 | if (length > 0) | 2385 | if (length > 0) |
2386 | length = simple_read_from_buffer(buf, count, ppos, p, length); | 2386 | length = simple_read_from_buffer(buf, count, ppos, p, length); |
2387 | kfree(p); | 2387 | kfree(p); |
2388 | return length; | 2388 | return length; |
2389 | } | 2389 | } |
2390 | 2390 | ||
2391 | static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | 2391 | static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, |
2392 | size_t count, loff_t *ppos) | 2392 | size_t count, loff_t *ppos) |
2393 | { | 2393 | { |
2394 | struct inode * inode = file->f_path.dentry->d_inode; | 2394 | struct inode * inode = file->f_path.dentry->d_inode; |
2395 | char *page; | 2395 | char *page; |
2396 | ssize_t length; | 2396 | ssize_t length; |
2397 | struct task_struct *task = get_proc_task(inode); | 2397 | struct task_struct *task = get_proc_task(inode); |
2398 | 2398 | ||
2399 | length = -ESRCH; | 2399 | length = -ESRCH; |
2400 | if (!task) | 2400 | if (!task) |
2401 | goto out_no_task; | 2401 | goto out_no_task; |
2402 | if (count > PAGE_SIZE) | 2402 | if (count > PAGE_SIZE) |
2403 | count = PAGE_SIZE; | 2403 | count = PAGE_SIZE; |
2404 | 2404 | ||
2405 | /* No partial writes. */ | 2405 | /* No partial writes. */ |
2406 | length = -EINVAL; | 2406 | length = -EINVAL; |
2407 | if (*ppos != 0) | 2407 | if (*ppos != 0) |
2408 | goto out; | 2408 | goto out; |
2409 | 2409 | ||
2410 | length = -ENOMEM; | 2410 | length = -ENOMEM; |
2411 | page = (char*)__get_free_page(GFP_TEMPORARY); | 2411 | page = (char*)__get_free_page(GFP_TEMPORARY); |
2412 | if (!page) | 2412 | if (!page) |
2413 | goto out; | 2413 | goto out; |
2414 | 2414 | ||
2415 | length = -EFAULT; | 2415 | length = -EFAULT; |
2416 | if (copy_from_user(page, buf, count)) | 2416 | if (copy_from_user(page, buf, count)) |
2417 | goto out_free; | 2417 | goto out_free; |
2418 | 2418 | ||
2419 | /* Guard against adverse ptrace interaction */ | 2419 | /* Guard against adverse ptrace interaction */ |
2420 | length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); | 2420 | length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); |
2421 | if (length < 0) | 2421 | if (length < 0) |
2422 | goto out_free; | 2422 | goto out_free; |
2423 | 2423 | ||
2424 | length = security_setprocattr(task, | 2424 | length = security_setprocattr(task, |
2425 | (char*)file->f_path.dentry->d_name.name, | 2425 | (char*)file->f_path.dentry->d_name.name, |
2426 | (void*)page, count); | 2426 | (void*)page, count); |
2427 | mutex_unlock(&task->signal->cred_guard_mutex); | 2427 | mutex_unlock(&task->signal->cred_guard_mutex); |
2428 | out_free: | 2428 | out_free: |
2429 | free_page((unsigned long) page); | 2429 | free_page((unsigned long) page); |
2430 | out: | 2430 | out: |
2431 | put_task_struct(task); | 2431 | put_task_struct(task); |
2432 | out_no_task: | 2432 | out_no_task: |
2433 | return length; | 2433 | return length; |
2434 | } | 2434 | } |
2435 | 2435 | ||
2436 | static const struct file_operations proc_pid_attr_operations = { | 2436 | static const struct file_operations proc_pid_attr_operations = { |
2437 | .read = proc_pid_attr_read, | 2437 | .read = proc_pid_attr_read, |
2438 | .write = proc_pid_attr_write, | 2438 | .write = proc_pid_attr_write, |
2439 | .llseek = generic_file_llseek, | 2439 | .llseek = generic_file_llseek, |
2440 | }; | 2440 | }; |
2441 | 2441 | ||
2442 | static const struct pid_entry attr_dir_stuff[] = { | 2442 | static const struct pid_entry attr_dir_stuff[] = { |
2443 | REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2443 | REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
2444 | REG("prev", S_IRUGO, proc_pid_attr_operations), | 2444 | REG("prev", S_IRUGO, proc_pid_attr_operations), |
2445 | REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2445 | REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
2446 | REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2446 | REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
2447 | REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2447 | REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
2448 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2448 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
2449 | }; | 2449 | }; |
2450 | 2450 | ||
2451 | static int proc_attr_dir_readdir(struct file * filp, | 2451 | static int proc_attr_dir_readdir(struct file * filp, |
2452 | void * dirent, filldir_t filldir) | 2452 | void * dirent, filldir_t filldir) |
2453 | { | 2453 | { |
2454 | return proc_pident_readdir(filp,dirent,filldir, | 2454 | return proc_pident_readdir(filp,dirent,filldir, |
2455 | attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); | 2455 | attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); |
2456 | } | 2456 | } |
2457 | 2457 | ||
2458 | static const struct file_operations proc_attr_dir_operations = { | 2458 | static const struct file_operations proc_attr_dir_operations = { |
2459 | .read = generic_read_dir, | 2459 | .read = generic_read_dir, |
2460 | .readdir = proc_attr_dir_readdir, | 2460 | .readdir = proc_attr_dir_readdir, |
2461 | .llseek = default_llseek, | 2461 | .llseek = default_llseek, |
2462 | }; | 2462 | }; |
2463 | 2463 | ||
2464 | static struct dentry *proc_attr_dir_lookup(struct inode *dir, | 2464 | static struct dentry *proc_attr_dir_lookup(struct inode *dir, |
2465 | struct dentry *dentry, struct nameidata *nd) | 2465 | struct dentry *dentry, struct nameidata *nd) |
2466 | { | 2466 | { |
2467 | return proc_pident_lookup(dir, dentry, | 2467 | return proc_pident_lookup(dir, dentry, |
2468 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); | 2468 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); |
2469 | } | 2469 | } |
2470 | 2470 | ||
2471 | static const struct inode_operations proc_attr_dir_inode_operations = { | 2471 | static const struct inode_operations proc_attr_dir_inode_operations = { |
2472 | .lookup = proc_attr_dir_lookup, | 2472 | .lookup = proc_attr_dir_lookup, |
2473 | .getattr = pid_getattr, | 2473 | .getattr = pid_getattr, |
2474 | .setattr = proc_setattr, | 2474 | .setattr = proc_setattr, |
2475 | }; | 2475 | }; |
2476 | 2476 | ||
2477 | #endif | 2477 | #endif |
2478 | 2478 | ||
2479 | #ifdef CONFIG_ELF_CORE | 2479 | #ifdef CONFIG_ELF_CORE |
2480 | static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, | 2480 | static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, |
2481 | size_t count, loff_t *ppos) | 2481 | size_t count, loff_t *ppos) |
2482 | { | 2482 | { |
2483 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); | 2483 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
2484 | struct mm_struct *mm; | 2484 | struct mm_struct *mm; |
2485 | char buffer[PROC_NUMBUF]; | 2485 | char buffer[PROC_NUMBUF]; |
2486 | size_t len; | 2486 | size_t len; |
2487 | int ret; | 2487 | int ret; |
2488 | 2488 | ||
2489 | if (!task) | 2489 | if (!task) |
2490 | return -ESRCH; | 2490 | return -ESRCH; |
2491 | 2491 | ||
2492 | ret = 0; | 2492 | ret = 0; |
2493 | mm = get_task_mm(task); | 2493 | mm = get_task_mm(task); |
2494 | if (mm) { | 2494 | if (mm) { |
2495 | len = snprintf(buffer, sizeof(buffer), "%08lx\n", | 2495 | len = snprintf(buffer, sizeof(buffer), "%08lx\n", |
2496 | ((mm->flags & MMF_DUMP_FILTER_MASK) >> | 2496 | ((mm->flags & MMF_DUMP_FILTER_MASK) >> |
2497 | MMF_DUMP_FILTER_SHIFT)); | 2497 | MMF_DUMP_FILTER_SHIFT)); |
2498 | mmput(mm); | 2498 | mmput(mm); |
2499 | ret = simple_read_from_buffer(buf, count, ppos, buffer, len); | 2499 | ret = simple_read_from_buffer(buf, count, ppos, buffer, len); |
2500 | } | 2500 | } |
2501 | 2501 | ||
2502 | put_task_struct(task); | 2502 | put_task_struct(task); |
2503 | 2503 | ||
2504 | return ret; | 2504 | return ret; |
2505 | } | 2505 | } |
2506 | 2506 | ||
2507 | static ssize_t proc_coredump_filter_write(struct file *file, | 2507 | static ssize_t proc_coredump_filter_write(struct file *file, |
2508 | const char __user *buf, | 2508 | const char __user *buf, |
2509 | size_t count, | 2509 | size_t count, |
2510 | loff_t *ppos) | 2510 | loff_t *ppos) |
2511 | { | 2511 | { |
2512 | struct task_struct *task; | 2512 | struct task_struct *task; |
2513 | struct mm_struct *mm; | 2513 | struct mm_struct *mm; |
2514 | char buffer[PROC_NUMBUF], *end; | 2514 | char buffer[PROC_NUMBUF], *end; |
2515 | unsigned int val; | 2515 | unsigned int val; |
2516 | int ret; | 2516 | int ret; |
2517 | int i; | 2517 | int i; |
2518 | unsigned long mask; | 2518 | unsigned long mask; |
2519 | 2519 | ||
2520 | ret = -EFAULT; | 2520 | ret = -EFAULT; |
2521 | memset(buffer, 0, sizeof(buffer)); | 2521 | memset(buffer, 0, sizeof(buffer)); |
2522 | if (count > sizeof(buffer) - 1) | 2522 | if (count > sizeof(buffer) - 1) |
2523 | count = sizeof(buffer) - 1; | 2523 | count = sizeof(buffer) - 1; |
2524 | if (copy_from_user(buffer, buf, count)) | 2524 | if (copy_from_user(buffer, buf, count)) |
2525 | goto out_no_task; | 2525 | goto out_no_task; |
2526 | 2526 | ||
2527 | ret = -EINVAL; | 2527 | ret = -EINVAL; |
2528 | val = (unsigned int)simple_strtoul(buffer, &end, 0); | 2528 | val = (unsigned int)simple_strtoul(buffer, &end, 0); |
2529 | if (*end == '\n') | 2529 | if (*end == '\n') |
2530 | end++; | 2530 | end++; |
2531 | if (end - buffer == 0) | 2531 | if (end - buffer == 0) |
2532 | goto out_no_task; | 2532 | goto out_no_task; |
2533 | 2533 | ||
2534 | ret = -ESRCH; | 2534 | ret = -ESRCH; |
2535 | task = get_proc_task(file->f_dentry->d_inode); | 2535 | task = get_proc_task(file->f_dentry->d_inode); |
2536 | if (!task) | 2536 | if (!task) |
2537 | goto out_no_task; | 2537 | goto out_no_task; |
2538 | 2538 | ||
2539 | ret = end - buffer; | 2539 | ret = end - buffer; |
2540 | mm = get_task_mm(task); | 2540 | mm = get_task_mm(task); |
2541 | if (!mm) | 2541 | if (!mm) |
2542 | goto out_no_mm; | 2542 | goto out_no_mm; |
2543 | 2543 | ||
2544 | for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { | 2544 | for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { |
2545 | if (val & mask) | 2545 | if (val & mask) |
2546 | set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); | 2546 | set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); |
2547 | else | 2547 | else |
2548 | clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); | 2548 | clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); |
2549 | } | 2549 | } |
2550 | 2550 | ||
2551 | mmput(mm); | 2551 | mmput(mm); |
2552 | out_no_mm: | 2552 | out_no_mm: |
2553 | put_task_struct(task); | 2553 | put_task_struct(task); |
2554 | out_no_task: | 2554 | out_no_task: |
2555 | return ret; | 2555 | return ret; |
2556 | } | 2556 | } |
2557 | 2557 | ||
2558 | static const struct file_operations proc_coredump_filter_operations = { | 2558 | static const struct file_operations proc_coredump_filter_operations = { |
2559 | .read = proc_coredump_filter_read, | 2559 | .read = proc_coredump_filter_read, |
2560 | .write = proc_coredump_filter_write, | 2560 | .write = proc_coredump_filter_write, |
2561 | .llseek = generic_file_llseek, | 2561 | .llseek = generic_file_llseek, |
2562 | }; | 2562 | }; |
2563 | #endif | 2563 | #endif |
2564 | 2564 | ||
2565 | /* | 2565 | /* |
2566 | * /proc/self: | 2566 | * /proc/self: |
2567 | */ | 2567 | */ |
2568 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | 2568 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, |
2569 | int buflen) | 2569 | int buflen) |
2570 | { | 2570 | { |
2571 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | 2571 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; |
2572 | pid_t tgid = task_tgid_nr_ns(current, ns); | 2572 | pid_t tgid = task_tgid_nr_ns(current, ns); |
2573 | char tmp[PROC_NUMBUF]; | 2573 | char tmp[PROC_NUMBUF]; |
2574 | if (!tgid) | 2574 | if (!tgid) |
2575 | return -ENOENT; | 2575 | return -ENOENT; |
2576 | sprintf(tmp, "%d", tgid); | 2576 | sprintf(tmp, "%d", tgid); |
2577 | return vfs_readlink(dentry,buffer,buflen,tmp); | 2577 | return vfs_readlink(dentry,buffer,buflen,tmp); |
2578 | } | 2578 | } |
2579 | 2579 | ||
2580 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | 2580 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) |
2581 | { | 2581 | { |
2582 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | 2582 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; |
2583 | pid_t tgid = task_tgid_nr_ns(current, ns); | 2583 | pid_t tgid = task_tgid_nr_ns(current, ns); |
2584 | char *name = ERR_PTR(-ENOENT); | 2584 | char *name = ERR_PTR(-ENOENT); |
2585 | if (tgid) { | 2585 | if (tgid) { |
2586 | name = __getname(); | 2586 | name = __getname(); |
2587 | if (!name) | 2587 | if (!name) |
2588 | name = ERR_PTR(-ENOMEM); | 2588 | name = ERR_PTR(-ENOMEM); |
2589 | else | 2589 | else |
2590 | sprintf(name, "%d", tgid); | 2590 | sprintf(name, "%d", tgid); |
2591 | } | 2591 | } |
2592 | nd_set_link(nd, name); | 2592 | nd_set_link(nd, name); |
2593 | return NULL; | 2593 | return NULL; |
2594 | } | 2594 | } |
2595 | 2595 | ||
2596 | static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, | 2596 | static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, |
2597 | void *cookie) | 2597 | void *cookie) |
2598 | { | 2598 | { |
2599 | char *s = nd_get_link(nd); | 2599 | char *s = nd_get_link(nd); |
2600 | if (!IS_ERR(s)) | 2600 | if (!IS_ERR(s)) |
2601 | __putname(s); | 2601 | __putname(s); |
2602 | } | 2602 | } |
2603 | 2603 | ||
2604 | static const struct inode_operations proc_self_inode_operations = { | 2604 | static const struct inode_operations proc_self_inode_operations = { |
2605 | .readlink = proc_self_readlink, | 2605 | .readlink = proc_self_readlink, |
2606 | .follow_link = proc_self_follow_link, | 2606 | .follow_link = proc_self_follow_link, |
2607 | .put_link = proc_self_put_link, | 2607 | .put_link = proc_self_put_link, |
2608 | }; | 2608 | }; |
2609 | 2609 | ||
2610 | /* | 2610 | /* |
2611 | * proc base | 2611 | * proc base |
2612 | * | 2612 | * |
2613 | * These are the directory entries in the root directory of /proc | 2613 | * These are the directory entries in the root directory of /proc |
2614 | * that properly belong to the /proc filesystem, as they describe | 2614 | * that properly belong to the /proc filesystem, as they describe |
2615 | * describe something that is process related. | 2615 | * describe something that is process related. |
2616 | */ | 2616 | */ |
2617 | static const struct pid_entry proc_base_stuff[] = { | 2617 | static const struct pid_entry proc_base_stuff[] = { |
2618 | NOD("self", S_IFLNK|S_IRWXUGO, | 2618 | NOD("self", S_IFLNK|S_IRWXUGO, |
2619 | &proc_self_inode_operations, NULL, {}), | 2619 | &proc_self_inode_operations, NULL, {}), |
2620 | }; | 2620 | }; |
2621 | 2621 | ||
2622 | static struct dentry *proc_base_instantiate(struct inode *dir, | 2622 | static struct dentry *proc_base_instantiate(struct inode *dir, |
2623 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2623 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
2624 | { | 2624 | { |
2625 | const struct pid_entry *p = ptr; | 2625 | const struct pid_entry *p = ptr; |
2626 | struct inode *inode; | 2626 | struct inode *inode; |
2627 | struct proc_inode *ei; | 2627 | struct proc_inode *ei; |
2628 | struct dentry *error; | 2628 | struct dentry *error; |
2629 | 2629 | ||
2630 | /* Allocate the inode */ | 2630 | /* Allocate the inode */ |
2631 | error = ERR_PTR(-ENOMEM); | 2631 | error = ERR_PTR(-ENOMEM); |
2632 | inode = new_inode(dir->i_sb); | 2632 | inode = new_inode(dir->i_sb); |
2633 | if (!inode) | 2633 | if (!inode) |
2634 | goto out; | 2634 | goto out; |
2635 | 2635 | ||
2636 | /* Initialize the inode */ | 2636 | /* Initialize the inode */ |
2637 | ei = PROC_I(inode); | 2637 | ei = PROC_I(inode); |
2638 | inode->i_ino = get_next_ino(); | 2638 | inode->i_ino = get_next_ino(); |
2639 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 2639 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
2640 | 2640 | ||
2641 | /* | 2641 | /* |
2642 | * grab the reference to the task. | 2642 | * grab the reference to the task. |
2643 | */ | 2643 | */ |
2644 | ei->pid = get_task_pid(task, PIDTYPE_PID); | 2644 | ei->pid = get_task_pid(task, PIDTYPE_PID); |
2645 | if (!ei->pid) | 2645 | if (!ei->pid) |
2646 | goto out_iput; | 2646 | goto out_iput; |
2647 | 2647 | ||
2648 | inode->i_mode = p->mode; | 2648 | inode->i_mode = p->mode; |
2649 | if (S_ISDIR(inode->i_mode)) | 2649 | if (S_ISDIR(inode->i_mode)) |
2650 | inode->i_nlink = 2; | 2650 | inode->i_nlink = 2; |
2651 | if (S_ISLNK(inode->i_mode)) | 2651 | if (S_ISLNK(inode->i_mode)) |
2652 | inode->i_size = 64; | 2652 | inode->i_size = 64; |
2653 | if (p->iop) | 2653 | if (p->iop) |
2654 | inode->i_op = p->iop; | 2654 | inode->i_op = p->iop; |
2655 | if (p->fop) | 2655 | if (p->fop) |
2656 | inode->i_fop = p->fop; | 2656 | inode->i_fop = p->fop; |
2657 | ei->op = p->op; | 2657 | ei->op = p->op; |
2658 | d_add(dentry, inode); | 2658 | d_add(dentry, inode); |
2659 | error = NULL; | 2659 | error = NULL; |
2660 | out: | 2660 | out: |
2661 | return error; | 2661 | return error; |
2662 | out_iput: | 2662 | out_iput: |
2663 | iput(inode); | 2663 | iput(inode); |
2664 | goto out; | 2664 | goto out; |
2665 | } | 2665 | } |
2666 | 2666 | ||
2667 | static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) | 2667 | static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) |
2668 | { | 2668 | { |
2669 | struct dentry *error; | 2669 | struct dentry *error; |
2670 | struct task_struct *task = get_proc_task(dir); | 2670 | struct task_struct *task = get_proc_task(dir); |
2671 | const struct pid_entry *p, *last; | 2671 | const struct pid_entry *p, *last; |
2672 | 2672 | ||
2673 | error = ERR_PTR(-ENOENT); | 2673 | error = ERR_PTR(-ENOENT); |
2674 | 2674 | ||
2675 | if (!task) | 2675 | if (!task) |
2676 | goto out_no_task; | 2676 | goto out_no_task; |
2677 | 2677 | ||
2678 | /* Lookup the directory entry */ | 2678 | /* Lookup the directory entry */ |
2679 | last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; | 2679 | last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; |
2680 | for (p = proc_base_stuff; p <= last; p++) { | 2680 | for (p = proc_base_stuff; p <= last; p++) { |
2681 | if (p->len != dentry->d_name.len) | 2681 | if (p->len != dentry->d_name.len) |
2682 | continue; | 2682 | continue; |
2683 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | 2683 | if (!memcmp(dentry->d_name.name, p->name, p->len)) |
2684 | break; | 2684 | break; |
2685 | } | 2685 | } |
2686 | if (p > last) | 2686 | if (p > last) |
2687 | goto out; | 2687 | goto out; |
2688 | 2688 | ||
2689 | error = proc_base_instantiate(dir, dentry, task, p); | 2689 | error = proc_base_instantiate(dir, dentry, task, p); |
2690 | 2690 | ||
2691 | out: | 2691 | out: |
2692 | put_task_struct(task); | 2692 | put_task_struct(task); |
2693 | out_no_task: | 2693 | out_no_task: |
2694 | return error; | 2694 | return error; |
2695 | } | 2695 | } |
2696 | 2696 | ||
2697 | static int proc_base_fill_cache(struct file *filp, void *dirent, | 2697 | static int proc_base_fill_cache(struct file *filp, void *dirent, |
2698 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) | 2698 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) |
2699 | { | 2699 | { |
2700 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | 2700 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, |
2701 | proc_base_instantiate, task, p); | 2701 | proc_base_instantiate, task, p); |
2702 | } | 2702 | } |
2703 | 2703 | ||
2704 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 2704 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
2705 | static int do_io_accounting(struct task_struct *task, char *buffer, int whole) | 2705 | static int do_io_accounting(struct task_struct *task, char *buffer, int whole) |
2706 | { | 2706 | { |
2707 | struct task_io_accounting acct = task->ioac; | 2707 | struct task_io_accounting acct = task->ioac; |
2708 | unsigned long flags; | 2708 | unsigned long flags; |
2709 | 2709 | ||
2710 | if (whole && lock_task_sighand(task, &flags)) { | 2710 | if (whole && lock_task_sighand(task, &flags)) { |
2711 | struct task_struct *t = task; | 2711 | struct task_struct *t = task; |
2712 | 2712 | ||
2713 | task_io_accounting_add(&acct, &task->signal->ioac); | 2713 | task_io_accounting_add(&acct, &task->signal->ioac); |
2714 | while_each_thread(task, t) | 2714 | while_each_thread(task, t) |
2715 | task_io_accounting_add(&acct, &t->ioac); | 2715 | task_io_accounting_add(&acct, &t->ioac); |
2716 | 2716 | ||
2717 | unlock_task_sighand(task, &flags); | 2717 | unlock_task_sighand(task, &flags); |
2718 | } | 2718 | } |
2719 | return sprintf(buffer, | 2719 | return sprintf(buffer, |
2720 | "rchar: %llu\n" | 2720 | "rchar: %llu\n" |
2721 | "wchar: %llu\n" | 2721 | "wchar: %llu\n" |
2722 | "syscr: %llu\n" | 2722 | "syscr: %llu\n" |
2723 | "syscw: %llu\n" | 2723 | "syscw: %llu\n" |
2724 | "read_bytes: %llu\n" | 2724 | "read_bytes: %llu\n" |
2725 | "write_bytes: %llu\n" | 2725 | "write_bytes: %llu\n" |
2726 | "cancelled_write_bytes: %llu\n", | 2726 | "cancelled_write_bytes: %llu\n", |
2727 | (unsigned long long)acct.rchar, | 2727 | (unsigned long long)acct.rchar, |
2728 | (unsigned long long)acct.wchar, | 2728 | (unsigned long long)acct.wchar, |
2729 | (unsigned long long)acct.syscr, | 2729 | (unsigned long long)acct.syscr, |
2730 | (unsigned long long)acct.syscw, | 2730 | (unsigned long long)acct.syscw, |
2731 | (unsigned long long)acct.read_bytes, | 2731 | (unsigned long long)acct.read_bytes, |
2732 | (unsigned long long)acct.write_bytes, | 2732 | (unsigned long long)acct.write_bytes, |
2733 | (unsigned long long)acct.cancelled_write_bytes); | 2733 | (unsigned long long)acct.cancelled_write_bytes); |
2734 | } | 2734 | } |
2735 | 2735 | ||
2736 | static int proc_tid_io_accounting(struct task_struct *task, char *buffer) | 2736 | static int proc_tid_io_accounting(struct task_struct *task, char *buffer) |
2737 | { | 2737 | { |
2738 | return do_io_accounting(task, buffer, 0); | 2738 | return do_io_accounting(task, buffer, 0); |
2739 | } | 2739 | } |
2740 | 2740 | ||
2741 | static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | 2741 | static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) |
2742 | { | 2742 | { |
2743 | return do_io_accounting(task, buffer, 1); | 2743 | return do_io_accounting(task, buffer, 1); |
2744 | } | 2744 | } |
2745 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | 2745 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ |
2746 | 2746 | ||
2747 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | 2747 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, |
2748 | struct pid *pid, struct task_struct *task) | 2748 | struct pid *pid, struct task_struct *task) |
2749 | { | 2749 | { |
2750 | int err = lock_trace(task); | 2750 | int err = lock_trace(task); |
2751 | if (!err) { | 2751 | if (!err) { |
2752 | seq_printf(m, "%08x\n", task->personality); | 2752 | seq_printf(m, "%08x\n", task->personality); |
2753 | unlock_trace(task); | 2753 | unlock_trace(task); |
2754 | } | 2754 | } |
2755 | return err; | 2755 | return err; |
2756 | } | 2756 | } |
2757 | 2757 | ||
2758 | /* | 2758 | /* |
2759 | * Thread groups | 2759 | * Thread groups |
2760 | */ | 2760 | */ |
2761 | static const struct file_operations proc_task_operations; | 2761 | static const struct file_operations proc_task_operations; |
2762 | static const struct inode_operations proc_task_inode_operations; | 2762 | static const struct inode_operations proc_task_inode_operations; |
2763 | 2763 | ||
2764 | static const struct pid_entry tgid_base_stuff[] = { | 2764 | static const struct pid_entry tgid_base_stuff[] = { |
2765 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), | 2765 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), |
2766 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 2766 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
2767 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 2767 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2768 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | 2768 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), |
2769 | #ifdef CONFIG_NET | 2769 | #ifdef CONFIG_NET |
2770 | DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), | 2770 | DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), |
2771 | #endif | 2771 | #endif |
2772 | REG("environ", S_IRUSR, proc_environ_operations), | 2772 | REG("environ", S_IRUSR, proc_environ_operations), |
2773 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2773 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2774 | ONE("status", S_IRUGO, proc_pid_status), | 2774 | ONE("status", S_IRUGO, proc_pid_status), |
2775 | ONE("personality", S_IRUGO, proc_pid_personality), | 2775 | ONE("personality", S_IRUGO, proc_pid_personality), |
2776 | INF("limits", S_IRUGO, proc_pid_limits), | 2776 | INF("limits", S_IRUGO, proc_pid_limits), |
2777 | #ifdef CONFIG_SCHED_DEBUG | 2777 | #ifdef CONFIG_SCHED_DEBUG |
2778 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2778 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
2779 | #endif | 2779 | #endif |
2780 | #ifdef CONFIG_SCHED_AUTOGROUP | 2780 | #ifdef CONFIG_SCHED_AUTOGROUP |
2781 | REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), | 2781 | REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), |
2782 | #endif | 2782 | #endif |
2783 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), | 2783 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), |
2784 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 2784 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
2785 | INF("syscall", S_IRUGO, proc_pid_syscall), | 2785 | INF("syscall", S_IRUGO, proc_pid_syscall), |
2786 | #endif | 2786 | #endif |
2787 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 2787 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
2788 | ONE("stat", S_IRUGO, proc_tgid_stat), | 2788 | ONE("stat", S_IRUGO, proc_tgid_stat), |
2789 | ONE("statm", S_IRUGO, proc_pid_statm), | 2789 | ONE("statm", S_IRUGO, proc_pid_statm), |
2790 | REG("maps", S_IRUGO, proc_maps_operations), | 2790 | REG("maps", S_IRUGO, proc_maps_operations), |
2791 | #ifdef CONFIG_NUMA | 2791 | #ifdef CONFIG_NUMA |
2792 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), | 2792 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), |
2793 | #endif | 2793 | #endif |
2794 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), | 2794 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), |
2795 | LNK("cwd", proc_cwd_link), | 2795 | LNK("cwd", proc_cwd_link), |
2796 | LNK("root", proc_root_link), | 2796 | LNK("root", proc_root_link), |
2797 | LNK("exe", proc_exe_link), | 2797 | LNK("exe", proc_exe_link), |
2798 | REG("mounts", S_IRUGO, proc_mounts_operations), | 2798 | REG("mounts", S_IRUGO, proc_mounts_operations), |
2799 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), | 2799 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), |
2800 | REG("mountstats", S_IRUSR, proc_mountstats_operations), | 2800 | REG("mountstats", S_IRUSR, proc_mountstats_operations), |
2801 | #ifdef CONFIG_PROC_PAGE_MONITOR | 2801 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2802 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 2802 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
2803 | REG("smaps", S_IRUGO, proc_smaps_operations), | 2803 | REG("smaps", S_IRUGO, proc_smaps_operations), |
2804 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 2804 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
2805 | #endif | 2805 | #endif |
2806 | #ifdef CONFIG_SECURITY | 2806 | #ifdef CONFIG_SECURITY |
2807 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), | 2807 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), |
2808 | #endif | 2808 | #endif |
2809 | #ifdef CONFIG_KALLSYMS | 2809 | #ifdef CONFIG_KALLSYMS |
2810 | INF("wchan", S_IRUGO, proc_pid_wchan), | 2810 | INF("wchan", S_IRUGO, proc_pid_wchan), |
2811 | #endif | 2811 | #endif |
2812 | #ifdef CONFIG_STACKTRACE | 2812 | #ifdef CONFIG_STACKTRACE |
2813 | ONE("stack", S_IRUGO, proc_pid_stack), | 2813 | ONE("stack", S_IRUGO, proc_pid_stack), |
2814 | #endif | 2814 | #endif |
2815 | #ifdef CONFIG_SCHEDSTATS | 2815 | #ifdef CONFIG_SCHEDSTATS |
2816 | INF("schedstat", S_IRUGO, proc_pid_schedstat), | 2816 | INF("schedstat", S_IRUGO, proc_pid_schedstat), |
2817 | #endif | 2817 | #endif |
2818 | #ifdef CONFIG_LATENCYTOP | 2818 | #ifdef CONFIG_LATENCYTOP |
2819 | REG("latency", S_IRUGO, proc_lstats_operations), | 2819 | REG("latency", S_IRUGO, proc_lstats_operations), |
2820 | #endif | 2820 | #endif |
2821 | #ifdef CONFIG_PROC_PID_CPUSET | 2821 | #ifdef CONFIG_PROC_PID_CPUSET |
2822 | REG("cpuset", S_IRUGO, proc_cpuset_operations), | 2822 | REG("cpuset", S_IRUGO, proc_cpuset_operations), |
2823 | #endif | 2823 | #endif |
2824 | #ifdef CONFIG_CGROUPS | 2824 | #ifdef CONFIG_CGROUPS |
2825 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 2825 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
2826 | #endif | 2826 | #endif |
2827 | INF("oom_score", S_IRUGO, proc_oom_score), | 2827 | INF("oom_score", S_IRUGO, proc_oom_score), |
2828 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), | 2828 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), |
2829 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 2829 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
2830 | #ifdef CONFIG_AUDITSYSCALL | 2830 | #ifdef CONFIG_AUDITSYSCALL |
2831 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 2831 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
2832 | REG("sessionid", S_IRUGO, proc_sessionid_operations), | 2832 | REG("sessionid", S_IRUGO, proc_sessionid_operations), |
2833 | #endif | 2833 | #endif |
2834 | #ifdef CONFIG_FAULT_INJECTION | 2834 | #ifdef CONFIG_FAULT_INJECTION |
2835 | REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), | 2835 | REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), |
2836 | #endif | 2836 | #endif |
2837 | #ifdef CONFIG_ELF_CORE | 2837 | #ifdef CONFIG_ELF_CORE |
2838 | REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), | 2838 | REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), |
2839 | #endif | 2839 | #endif |
2840 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 2840 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
2841 | INF("io", S_IRUGO, proc_tgid_io_accounting), | 2841 | INF("io", S_IRUGO, proc_tgid_io_accounting), |
2842 | #endif | 2842 | #endif |
2843 | }; | 2843 | }; |
2844 | 2844 | ||
2845 | static int proc_tgid_base_readdir(struct file * filp, | 2845 | static int proc_tgid_base_readdir(struct file * filp, |
2846 | void * dirent, filldir_t filldir) | 2846 | void * dirent, filldir_t filldir) |
2847 | { | 2847 | { |
2848 | return proc_pident_readdir(filp,dirent,filldir, | 2848 | return proc_pident_readdir(filp,dirent,filldir, |
2849 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); | 2849 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); |
2850 | } | 2850 | } |
2851 | 2851 | ||
2852 | static const struct file_operations proc_tgid_base_operations = { | 2852 | static const struct file_operations proc_tgid_base_operations = { |
2853 | .read = generic_read_dir, | 2853 | .read = generic_read_dir, |
2854 | .readdir = proc_tgid_base_readdir, | 2854 | .readdir = proc_tgid_base_readdir, |
2855 | .llseek = default_llseek, | 2855 | .llseek = default_llseek, |
2856 | }; | 2856 | }; |
2857 | 2857 | ||
2858 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | 2858 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
2859 | return proc_pident_lookup(dir, dentry, | 2859 | return proc_pident_lookup(dir, dentry, |
2860 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); | 2860 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); |
2861 | } | 2861 | } |
2862 | 2862 | ||
2863 | static const struct inode_operations proc_tgid_base_inode_operations = { | 2863 | static const struct inode_operations proc_tgid_base_inode_operations = { |
2864 | .lookup = proc_tgid_base_lookup, | 2864 | .lookup = proc_tgid_base_lookup, |
2865 | .getattr = pid_getattr, | 2865 | .getattr = pid_getattr, |
2866 | .setattr = proc_setattr, | 2866 | .setattr = proc_setattr, |
2867 | }; | 2867 | }; |
2868 | 2868 | ||
2869 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) | 2869 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) |
2870 | { | 2870 | { |
2871 | struct dentry *dentry, *leader, *dir; | 2871 | struct dentry *dentry, *leader, *dir; |
2872 | char buf[PROC_NUMBUF]; | 2872 | char buf[PROC_NUMBUF]; |
2873 | struct qstr name; | 2873 | struct qstr name; |
2874 | 2874 | ||
2875 | name.name = buf; | 2875 | name.name = buf; |
2876 | name.len = snprintf(buf, sizeof(buf), "%d", pid); | 2876 | name.len = snprintf(buf, sizeof(buf), "%d", pid); |
2877 | dentry = d_hash_and_lookup(mnt->mnt_root, &name); | 2877 | dentry = d_hash_and_lookup(mnt->mnt_root, &name); |
2878 | if (dentry) { | 2878 | if (dentry) { |
2879 | shrink_dcache_parent(dentry); | 2879 | shrink_dcache_parent(dentry); |
2880 | d_drop(dentry); | 2880 | d_drop(dentry); |
2881 | dput(dentry); | 2881 | dput(dentry); |
2882 | } | 2882 | } |
2883 | 2883 | ||
2884 | name.name = buf; | 2884 | name.name = buf; |
2885 | name.len = snprintf(buf, sizeof(buf), "%d", tgid); | 2885 | name.len = snprintf(buf, sizeof(buf), "%d", tgid); |
2886 | leader = d_hash_and_lookup(mnt->mnt_root, &name); | 2886 | leader = d_hash_and_lookup(mnt->mnt_root, &name); |
2887 | if (!leader) | 2887 | if (!leader) |
2888 | goto out; | 2888 | goto out; |
2889 | 2889 | ||
2890 | name.name = "task"; | 2890 | name.name = "task"; |
2891 | name.len = strlen(name.name); | 2891 | name.len = strlen(name.name); |
2892 | dir = d_hash_and_lookup(leader, &name); | 2892 | dir = d_hash_and_lookup(leader, &name); |
2893 | if (!dir) | 2893 | if (!dir) |
2894 | goto out_put_leader; | 2894 | goto out_put_leader; |
2895 | 2895 | ||
2896 | name.name = buf; | 2896 | name.name = buf; |
2897 | name.len = snprintf(buf, sizeof(buf), "%d", pid); | 2897 | name.len = snprintf(buf, sizeof(buf), "%d", pid); |
2898 | dentry = d_hash_and_lookup(dir, &name); | 2898 | dentry = d_hash_and_lookup(dir, &name); |
2899 | if (dentry) { | 2899 | if (dentry) { |
2900 | shrink_dcache_parent(dentry); | 2900 | shrink_dcache_parent(dentry); |
2901 | d_drop(dentry); | 2901 | d_drop(dentry); |
2902 | dput(dentry); | 2902 | dput(dentry); |
2903 | } | 2903 | } |
2904 | 2904 | ||
2905 | dput(dir); | 2905 | dput(dir); |
2906 | out_put_leader: | 2906 | out_put_leader: |
2907 | dput(leader); | 2907 | dput(leader); |
2908 | out: | 2908 | out: |
2909 | return; | 2909 | return; |
2910 | } | 2910 | } |
2911 | 2911 | ||
2912 | /** | 2912 | /** |
2913 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. | 2913 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. |
2914 | * @task: task that should be flushed. | 2914 | * @task: task that should be flushed. |
2915 | * | 2915 | * |
2916 | * When flushing dentries from proc, one needs to flush them from global | 2916 | * When flushing dentries from proc, one needs to flush them from global |
2917 | * proc (proc_mnt) and from all the namespaces' procs this task was seen | 2917 | * proc (proc_mnt) and from all the namespaces' procs this task was seen |
2918 | * in. This call is supposed to do all of this job. | 2918 | * in. This call is supposed to do all of this job. |
2919 | * | 2919 | * |
2920 | * Looks in the dcache for | 2920 | * Looks in the dcache for |
2921 | * /proc/@pid | 2921 | * /proc/@pid |
2922 | * /proc/@tgid/task/@pid | 2922 | * /proc/@tgid/task/@pid |
2923 | * if either directory is present flushes it and all of it'ts children | 2923 | * if either directory is present flushes it and all of it'ts children |
2924 | * from the dcache. | 2924 | * from the dcache. |
2925 | * | 2925 | * |
2926 | * It is safe and reasonable to cache /proc entries for a task until | 2926 | * It is safe and reasonable to cache /proc entries for a task until |
2927 | * that task exits. After that they just clog up the dcache with | 2927 | * that task exits. After that they just clog up the dcache with |
2928 | * useless entries, possibly causing useful dcache entries to be | 2928 | * useless entries, possibly causing useful dcache entries to be |
2929 | * flushed instead. This routine is proved to flush those useless | 2929 | * flushed instead. This routine is proved to flush those useless |
2930 | * dcache entries at process exit time. | 2930 | * dcache entries at process exit time. |
2931 | * | 2931 | * |
2932 | * NOTE: This routine is just an optimization so it does not guarantee | 2932 | * NOTE: This routine is just an optimization so it does not guarantee |
2933 | * that no dcache entries will exist at process exit time it | 2933 | * that no dcache entries will exist at process exit time it |
2934 | * just makes it very unlikely that any will persist. | 2934 | * just makes it very unlikely that any will persist. |
2935 | */ | 2935 | */ |
2936 | 2936 | ||
2937 | void proc_flush_task(struct task_struct *task) | 2937 | void proc_flush_task(struct task_struct *task) |
2938 | { | 2938 | { |
2939 | int i; | 2939 | int i; |
2940 | struct pid *pid, *tgid; | 2940 | struct pid *pid, *tgid; |
2941 | struct upid *upid; | 2941 | struct upid *upid; |
2942 | 2942 | ||
2943 | pid = task_pid(task); | 2943 | pid = task_pid(task); |
2944 | tgid = task_tgid(task); | 2944 | tgid = task_tgid(task); |
2945 | 2945 | ||
2946 | for (i = 0; i <= pid->level; i++) { | 2946 | for (i = 0; i <= pid->level; i++) { |
2947 | upid = &pid->numbers[i]; | 2947 | upid = &pid->numbers[i]; |
2948 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, | 2948 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, |
2949 | tgid->numbers[i].nr); | 2949 | tgid->numbers[i].nr); |
2950 | } | 2950 | } |
2951 | 2951 | ||
2952 | upid = &pid->numbers[pid->level]; | 2952 | upid = &pid->numbers[pid->level]; |
2953 | if (upid->nr == 1) | 2953 | if (upid->nr == 1) |
2954 | pid_ns_release_proc(upid->ns); | 2954 | pid_ns_release_proc(upid->ns); |
2955 | } | 2955 | } |
2956 | 2956 | ||
2957 | static struct dentry *proc_pid_instantiate(struct inode *dir, | 2957 | static struct dentry *proc_pid_instantiate(struct inode *dir, |
2958 | struct dentry * dentry, | 2958 | struct dentry * dentry, |
2959 | struct task_struct *task, const void *ptr) | 2959 | struct task_struct *task, const void *ptr) |
2960 | { | 2960 | { |
2961 | struct dentry *error = ERR_PTR(-ENOENT); | 2961 | struct dentry *error = ERR_PTR(-ENOENT); |
2962 | struct inode *inode; | 2962 | struct inode *inode; |
2963 | 2963 | ||
2964 | inode = proc_pid_make_inode(dir->i_sb, task); | 2964 | inode = proc_pid_make_inode(dir->i_sb, task); |
2965 | if (!inode) | 2965 | if (!inode) |
2966 | goto out; | 2966 | goto out; |
2967 | 2967 | ||
2968 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | 2968 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; |
2969 | inode->i_op = &proc_tgid_base_inode_operations; | 2969 | inode->i_op = &proc_tgid_base_inode_operations; |
2970 | inode->i_fop = &proc_tgid_base_operations; | 2970 | inode->i_fop = &proc_tgid_base_operations; |
2971 | inode->i_flags|=S_IMMUTABLE; | 2971 | inode->i_flags|=S_IMMUTABLE; |
2972 | 2972 | ||
2973 | inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, | 2973 | inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, |
2974 | ARRAY_SIZE(tgid_base_stuff)); | 2974 | ARRAY_SIZE(tgid_base_stuff)); |
2975 | 2975 | ||
2976 | d_set_d_op(dentry, &pid_dentry_operations); | 2976 | d_set_d_op(dentry, &pid_dentry_operations); |
2977 | 2977 | ||
2978 | d_add(dentry, inode); | 2978 | d_add(dentry, inode); |
2979 | /* Close the race of the process dying before we return the dentry */ | 2979 | /* Close the race of the process dying before we return the dentry */ |
2980 | if (pid_revalidate(dentry, NULL)) | 2980 | if (pid_revalidate(dentry, NULL)) |
2981 | error = NULL; | 2981 | error = NULL; |
2982 | out: | 2982 | out: |
2983 | return error; | 2983 | return error; |
2984 | } | 2984 | } |
2985 | 2985 | ||
2986 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 2986 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2987 | { | 2987 | { |
2988 | struct dentry *result; | 2988 | struct dentry *result; |
2989 | struct task_struct *task; | 2989 | struct task_struct *task; |
2990 | unsigned tgid; | 2990 | unsigned tgid; |
2991 | struct pid_namespace *ns; | 2991 | struct pid_namespace *ns; |
2992 | 2992 | ||
2993 | result = proc_base_lookup(dir, dentry); | 2993 | result = proc_base_lookup(dir, dentry); |
2994 | if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) | 2994 | if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) |
2995 | goto out; | 2995 | goto out; |
2996 | 2996 | ||
2997 | tgid = name_to_int(dentry); | 2997 | tgid = name_to_int(dentry); |
2998 | if (tgid == ~0U) | 2998 | if (tgid == ~0U) |
2999 | goto out; | 2999 | goto out; |
3000 | 3000 | ||
3001 | ns = dentry->d_sb->s_fs_info; | 3001 | ns = dentry->d_sb->s_fs_info; |
3002 | rcu_read_lock(); | 3002 | rcu_read_lock(); |
3003 | task = find_task_by_pid_ns(tgid, ns); | 3003 | task = find_task_by_pid_ns(tgid, ns); |
3004 | if (task) | 3004 | if (task) |
3005 | get_task_struct(task); | 3005 | get_task_struct(task); |
3006 | rcu_read_unlock(); | 3006 | rcu_read_unlock(); |
3007 | if (!task) | 3007 | if (!task) |
3008 | goto out; | 3008 | goto out; |
3009 | 3009 | ||
3010 | result = proc_pid_instantiate(dir, dentry, task, NULL); | 3010 | result = proc_pid_instantiate(dir, dentry, task, NULL); |
3011 | put_task_struct(task); | 3011 | put_task_struct(task); |
3012 | out: | 3012 | out: |
3013 | return result; | 3013 | return result; |
3014 | } | 3014 | } |
3015 | 3015 | ||
3016 | /* | 3016 | /* |
3017 | * Find the first task with tgid >= tgid | 3017 | * Find the first task with tgid >= tgid |
3018 | * | 3018 | * |
3019 | */ | 3019 | */ |
3020 | struct tgid_iter { | 3020 | struct tgid_iter { |
3021 | unsigned int tgid; | 3021 | unsigned int tgid; |
3022 | struct task_struct *task; | 3022 | struct task_struct *task; |
3023 | }; | 3023 | }; |
3024 | static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) | 3024 | static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) |
3025 | { | 3025 | { |
3026 | struct pid *pid; | 3026 | struct pid *pid; |
3027 | 3027 | ||
3028 | if (iter.task) | 3028 | if (iter.task) |
3029 | put_task_struct(iter.task); | 3029 | put_task_struct(iter.task); |
3030 | rcu_read_lock(); | 3030 | rcu_read_lock(); |
3031 | retry: | 3031 | retry: |
3032 | iter.task = NULL; | 3032 | iter.task = NULL; |
3033 | pid = find_ge_pid(iter.tgid, ns); | 3033 | pid = find_ge_pid(iter.tgid, ns); |
3034 | if (pid) { | 3034 | if (pid) { |
3035 | iter.tgid = pid_nr_ns(pid, ns); | 3035 | iter.tgid = pid_nr_ns(pid, ns); |
3036 | iter.task = pid_task(pid, PIDTYPE_PID); | 3036 | iter.task = pid_task(pid, PIDTYPE_PID); |
3037 | /* What we to know is if the pid we have find is the | 3037 | /* What we to know is if the pid we have find is the |
3038 | * pid of a thread_group_leader. Testing for task | 3038 | * pid of a thread_group_leader. Testing for task |
3039 | * being a thread_group_leader is the obvious thing | 3039 | * being a thread_group_leader is the obvious thing |
3040 | * todo but there is a window when it fails, due to | 3040 | * todo but there is a window when it fails, due to |
3041 | * the pid transfer logic in de_thread. | 3041 | * the pid transfer logic in de_thread. |
3042 | * | 3042 | * |
3043 | * So we perform the straight forward test of seeing | 3043 | * So we perform the straight forward test of seeing |
3044 | * if the pid we have found is the pid of a thread | 3044 | * if the pid we have found is the pid of a thread |
3045 | * group leader, and don't worry if the task we have | 3045 | * group leader, and don't worry if the task we have |
3046 | * found doesn't happen to be a thread group leader. | 3046 | * found doesn't happen to be a thread group leader. |
3047 | * As we don't care in the case of readdir. | 3047 | * As we don't care in the case of readdir. |
3048 | */ | 3048 | */ |
3049 | if (!iter.task || !has_group_leader_pid(iter.task)) { | 3049 | if (!iter.task || !has_group_leader_pid(iter.task)) { |
3050 | iter.tgid += 1; | 3050 | iter.tgid += 1; |
3051 | goto retry; | 3051 | goto retry; |
3052 | } | 3052 | } |
3053 | get_task_struct(iter.task); | 3053 | get_task_struct(iter.task); |
3054 | } | 3054 | } |
3055 | rcu_read_unlock(); | 3055 | rcu_read_unlock(); |
3056 | return iter; | 3056 | return iter; |
3057 | } | 3057 | } |
3058 | 3058 | ||
3059 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) | 3059 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) |
3060 | 3060 | ||
3061 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 3061 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
3062 | struct tgid_iter iter) | 3062 | struct tgid_iter iter) |
3063 | { | 3063 | { |
3064 | char name[PROC_NUMBUF]; | 3064 | char name[PROC_NUMBUF]; |
3065 | int len = snprintf(name, sizeof(name), "%d", iter.tgid); | 3065 | int len = snprintf(name, sizeof(name), "%d", iter.tgid); |
3066 | return proc_fill_cache(filp, dirent, filldir, name, len, | 3066 | return proc_fill_cache(filp, dirent, filldir, name, len, |
3067 | proc_pid_instantiate, iter.task, NULL); | 3067 | proc_pid_instantiate, iter.task, NULL); |
3068 | } | 3068 | } |
3069 | 3069 | ||
3070 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 3070 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
3071 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3071 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
3072 | { | 3072 | { |
3073 | unsigned int nr; | 3073 | unsigned int nr; |
3074 | struct task_struct *reaper; | 3074 | struct task_struct *reaper; |
3075 | struct tgid_iter iter; | 3075 | struct tgid_iter iter; |
3076 | struct pid_namespace *ns; | 3076 | struct pid_namespace *ns; |
3077 | 3077 | ||
3078 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) | 3078 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) |
3079 | goto out_no_task; | 3079 | goto out_no_task; |
3080 | nr = filp->f_pos - FIRST_PROCESS_ENTRY; | 3080 | nr = filp->f_pos - FIRST_PROCESS_ENTRY; |
3081 | 3081 | ||
3082 | reaper = get_proc_task(filp->f_path.dentry->d_inode); | 3082 | reaper = get_proc_task(filp->f_path.dentry->d_inode); |
3083 | if (!reaper) | 3083 | if (!reaper) |
3084 | goto out_no_task; | 3084 | goto out_no_task; |
3085 | 3085 | ||
3086 | for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { | 3086 | for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { |
3087 | const struct pid_entry *p = &proc_base_stuff[nr]; | 3087 | const struct pid_entry *p = &proc_base_stuff[nr]; |
3088 | if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) | 3088 | if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) |
3089 | goto out; | 3089 | goto out; |
3090 | } | 3090 | } |
3091 | 3091 | ||
3092 | ns = filp->f_dentry->d_sb->s_fs_info; | 3092 | ns = filp->f_dentry->d_sb->s_fs_info; |
3093 | iter.task = NULL; | 3093 | iter.task = NULL; |
3094 | iter.tgid = filp->f_pos - TGID_OFFSET; | 3094 | iter.tgid = filp->f_pos - TGID_OFFSET; |
3095 | for (iter = next_tgid(ns, iter); | 3095 | for (iter = next_tgid(ns, iter); |
3096 | iter.task; | 3096 | iter.task; |
3097 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 3097 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
3098 | filp->f_pos = iter.tgid + TGID_OFFSET; | 3098 | filp->f_pos = iter.tgid + TGID_OFFSET; |
3099 | if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { | 3099 | if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { |
3100 | put_task_struct(iter.task); | 3100 | put_task_struct(iter.task); |
3101 | goto out; | 3101 | goto out; |
3102 | } | 3102 | } |
3103 | } | 3103 | } |
3104 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; | 3104 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; |
3105 | out: | 3105 | out: |
3106 | put_task_struct(reaper); | 3106 | put_task_struct(reaper); |
3107 | out_no_task: | 3107 | out_no_task: |
3108 | return 0; | 3108 | return 0; |
3109 | } | 3109 | } |
3110 | 3110 | ||
3111 | /* | 3111 | /* |
3112 | * Tasks | 3112 | * Tasks |
3113 | */ | 3113 | */ |
3114 | static const struct pid_entry tid_base_stuff[] = { | 3114 | static const struct pid_entry tid_base_stuff[] = { |
3115 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 3115 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
3116 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 3116 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
3117 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | 3117 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), |
3118 | REG("environ", S_IRUSR, proc_environ_operations), | 3118 | REG("environ", S_IRUSR, proc_environ_operations), |
3119 | INF("auxv", S_IRUSR, proc_pid_auxv), | 3119 | INF("auxv", S_IRUSR, proc_pid_auxv), |
3120 | ONE("status", S_IRUGO, proc_pid_status), | 3120 | ONE("status", S_IRUGO, proc_pid_status), |
3121 | ONE("personality", S_IRUGO, proc_pid_personality), | 3121 | ONE("personality", S_IRUGO, proc_pid_personality), |
3122 | INF("limits", S_IRUGO, proc_pid_limits), | 3122 | INF("limits", S_IRUGO, proc_pid_limits), |
3123 | #ifdef CONFIG_SCHED_DEBUG | 3123 | #ifdef CONFIG_SCHED_DEBUG |
3124 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 3124 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
3125 | #endif | 3125 | #endif |
3126 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), | 3126 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), |
3127 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 3127 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
3128 | INF("syscall", S_IRUGO, proc_pid_syscall), | 3128 | INF("syscall", S_IRUGO, proc_pid_syscall), |
3129 | #endif | 3129 | #endif |
3130 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 3130 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
3131 | ONE("stat", S_IRUGO, proc_tid_stat), | 3131 | ONE("stat", S_IRUGO, proc_tid_stat), |
3132 | ONE("statm", S_IRUGO, proc_pid_statm), | 3132 | ONE("statm", S_IRUGO, proc_pid_statm), |
3133 | REG("maps", S_IRUGO, proc_maps_operations), | 3133 | REG("maps", S_IRUGO, proc_maps_operations), |
3134 | #ifdef CONFIG_NUMA | 3134 | #ifdef CONFIG_NUMA |
3135 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), | 3135 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), |
3136 | #endif | 3136 | #endif |
3137 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), | 3137 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), |
3138 | LNK("cwd", proc_cwd_link), | 3138 | LNK("cwd", proc_cwd_link), |
3139 | LNK("root", proc_root_link), | 3139 | LNK("root", proc_root_link), |
3140 | LNK("exe", proc_exe_link), | 3140 | LNK("exe", proc_exe_link), |
3141 | REG("mounts", S_IRUGO, proc_mounts_operations), | 3141 | REG("mounts", S_IRUGO, proc_mounts_operations), |
3142 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), | 3142 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), |
3143 | #ifdef CONFIG_PROC_PAGE_MONITOR | 3143 | #ifdef CONFIG_PROC_PAGE_MONITOR |
3144 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 3144 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
3145 | REG("smaps", S_IRUGO, proc_smaps_operations), | 3145 | REG("smaps", S_IRUGO, proc_smaps_operations), |
3146 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 3146 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
3147 | #endif | 3147 | #endif |
3148 | #ifdef CONFIG_SECURITY | 3148 | #ifdef CONFIG_SECURITY |
3149 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), | 3149 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), |
3150 | #endif | 3150 | #endif |
3151 | #ifdef CONFIG_KALLSYMS | 3151 | #ifdef CONFIG_KALLSYMS |
3152 | INF("wchan", S_IRUGO, proc_pid_wchan), | 3152 | INF("wchan", S_IRUGO, proc_pid_wchan), |
3153 | #endif | 3153 | #endif |
3154 | #ifdef CONFIG_STACKTRACE | 3154 | #ifdef CONFIG_STACKTRACE |
3155 | ONE("stack", S_IRUGO, proc_pid_stack), | 3155 | ONE("stack", S_IRUGO, proc_pid_stack), |
3156 | #endif | 3156 | #endif |
3157 | #ifdef CONFIG_SCHEDSTATS | 3157 | #ifdef CONFIG_SCHEDSTATS |
3158 | INF("schedstat", S_IRUGO, proc_pid_schedstat), | 3158 | INF("schedstat", S_IRUGO, proc_pid_schedstat), |
3159 | #endif | 3159 | #endif |
3160 | #ifdef CONFIG_LATENCYTOP | 3160 | #ifdef CONFIG_LATENCYTOP |
3161 | REG("latency", S_IRUGO, proc_lstats_operations), | 3161 | REG("latency", S_IRUGO, proc_lstats_operations), |
3162 | #endif | 3162 | #endif |
3163 | #ifdef CONFIG_PROC_PID_CPUSET | 3163 | #ifdef CONFIG_PROC_PID_CPUSET |
3164 | REG("cpuset", S_IRUGO, proc_cpuset_operations), | 3164 | REG("cpuset", S_IRUGO, proc_cpuset_operations), |
3165 | #endif | 3165 | #endif |
3166 | #ifdef CONFIG_CGROUPS | 3166 | #ifdef CONFIG_CGROUPS |
3167 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 3167 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
3168 | #endif | 3168 | #endif |
3169 | INF("oom_score", S_IRUGO, proc_oom_score), | 3169 | INF("oom_score", S_IRUGO, proc_oom_score), |
3170 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), | 3170 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), |
3171 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 3171 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
3172 | #ifdef CONFIG_AUDITSYSCALL | 3172 | #ifdef CONFIG_AUDITSYSCALL |
3173 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 3173 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
3174 | REG("sessionid", S_IRUGO, proc_sessionid_operations), | 3174 | REG("sessionid", S_IRUGO, proc_sessionid_operations), |
3175 | #endif | 3175 | #endif |
3176 | #ifdef CONFIG_FAULT_INJECTION | 3176 | #ifdef CONFIG_FAULT_INJECTION |
3177 | REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), | 3177 | REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), |
3178 | #endif | 3178 | #endif |
3179 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 3179 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
3180 | INF("io", S_IRUGO, proc_tid_io_accounting), | 3180 | INF("io", S_IRUGO, proc_tid_io_accounting), |
3181 | #endif | 3181 | #endif |
3182 | }; | 3182 | }; |
3183 | 3183 | ||
3184 | static int proc_tid_base_readdir(struct file * filp, | 3184 | static int proc_tid_base_readdir(struct file * filp, |
3185 | void * dirent, filldir_t filldir) | 3185 | void * dirent, filldir_t filldir) |
3186 | { | 3186 | { |
3187 | return proc_pident_readdir(filp,dirent,filldir, | 3187 | return proc_pident_readdir(filp,dirent,filldir, |
3188 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | 3188 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); |
3189 | } | 3189 | } |
3190 | 3190 | ||
3191 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | 3191 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
3192 | return proc_pident_lookup(dir, dentry, | 3192 | return proc_pident_lookup(dir, dentry, |
3193 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); | 3193 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); |
3194 | } | 3194 | } |
3195 | 3195 | ||
3196 | static const struct file_operations proc_tid_base_operations = { | 3196 | static const struct file_operations proc_tid_base_operations = { |
3197 | .read = generic_read_dir, | 3197 | .read = generic_read_dir, |
3198 | .readdir = proc_tid_base_readdir, | 3198 | .readdir = proc_tid_base_readdir, |
3199 | .llseek = default_llseek, | 3199 | .llseek = default_llseek, |
3200 | }; | 3200 | }; |
3201 | 3201 | ||
3202 | static const struct inode_operations proc_tid_base_inode_operations = { | 3202 | static const struct inode_operations proc_tid_base_inode_operations = { |
3203 | .lookup = proc_tid_base_lookup, | 3203 | .lookup = proc_tid_base_lookup, |
3204 | .getattr = pid_getattr, | 3204 | .getattr = pid_getattr, |
3205 | .setattr = proc_setattr, | 3205 | .setattr = proc_setattr, |
3206 | }; | 3206 | }; |
3207 | 3207 | ||
3208 | static struct dentry *proc_task_instantiate(struct inode *dir, | 3208 | static struct dentry *proc_task_instantiate(struct inode *dir, |
3209 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 3209 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
3210 | { | 3210 | { |
3211 | struct dentry *error = ERR_PTR(-ENOENT); | 3211 | struct dentry *error = ERR_PTR(-ENOENT); |
3212 | struct inode *inode; | 3212 | struct inode *inode; |
3213 | inode = proc_pid_make_inode(dir->i_sb, task); | 3213 | inode = proc_pid_make_inode(dir->i_sb, task); |
3214 | 3214 | ||
3215 | if (!inode) | 3215 | if (!inode) |
3216 | goto out; | 3216 | goto out; |
3217 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | 3217 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; |
3218 | inode->i_op = &proc_tid_base_inode_operations; | 3218 | inode->i_op = &proc_tid_base_inode_operations; |
3219 | inode->i_fop = &proc_tid_base_operations; | 3219 | inode->i_fop = &proc_tid_base_operations; |
3220 | inode->i_flags|=S_IMMUTABLE; | 3220 | inode->i_flags|=S_IMMUTABLE; |
3221 | 3221 | ||
3222 | inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, | 3222 | inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, |
3223 | ARRAY_SIZE(tid_base_stuff)); | 3223 | ARRAY_SIZE(tid_base_stuff)); |
3224 | 3224 | ||
3225 | d_set_d_op(dentry, &pid_dentry_operations); | 3225 | d_set_d_op(dentry, &pid_dentry_operations); |
3226 | 3226 | ||
3227 | d_add(dentry, inode); | 3227 | d_add(dentry, inode); |
3228 | /* Close the race of the process dying before we return the dentry */ | 3228 | /* Close the race of the process dying before we return the dentry */ |
3229 | if (pid_revalidate(dentry, NULL)) | 3229 | if (pid_revalidate(dentry, NULL)) |
3230 | error = NULL; | 3230 | error = NULL; |
3231 | out: | 3231 | out: |
3232 | return error; | 3232 | return error; |
3233 | } | 3233 | } |
3234 | 3234 | ||
3235 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 3235 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
3236 | { | 3236 | { |
3237 | struct dentry *result = ERR_PTR(-ENOENT); | 3237 | struct dentry *result = ERR_PTR(-ENOENT); |
3238 | struct task_struct *task; | 3238 | struct task_struct *task; |
3239 | struct task_struct *leader = get_proc_task(dir); | 3239 | struct task_struct *leader = get_proc_task(dir); |
3240 | unsigned tid; | 3240 | unsigned tid; |
3241 | struct pid_namespace *ns; | 3241 | struct pid_namespace *ns; |
3242 | 3242 | ||
3243 | if (!leader) | 3243 | if (!leader) |
3244 | goto out_no_task; | 3244 | goto out_no_task; |
3245 | 3245 | ||
3246 | tid = name_to_int(dentry); | 3246 | tid = name_to_int(dentry); |
3247 | if (tid == ~0U) | 3247 | if (tid == ~0U) |
3248 | goto out; | 3248 | goto out; |
3249 | 3249 | ||
3250 | ns = dentry->d_sb->s_fs_info; | 3250 | ns = dentry->d_sb->s_fs_info; |
3251 | rcu_read_lock(); | 3251 | rcu_read_lock(); |
3252 | task = find_task_by_pid_ns(tid, ns); | 3252 | task = find_task_by_pid_ns(tid, ns); |
3253 | if (task) | 3253 | if (task) |
3254 | get_task_struct(task); | 3254 | get_task_struct(task); |
3255 | rcu_read_unlock(); | 3255 | rcu_read_unlock(); |
3256 | if (!task) | 3256 | if (!task) |
3257 | goto out; | 3257 | goto out; |
3258 | if (!same_thread_group(leader, task)) | 3258 | if (!same_thread_group(leader, task)) |
3259 | goto out_drop_task; | 3259 | goto out_drop_task; |
3260 | 3260 | ||
3261 | result = proc_task_instantiate(dir, dentry, task, NULL); | 3261 | result = proc_task_instantiate(dir, dentry, task, NULL); |
3262 | out_drop_task: | 3262 | out_drop_task: |
3263 | put_task_struct(task); | 3263 | put_task_struct(task); |
3264 | out: | 3264 | out: |
3265 | put_task_struct(leader); | 3265 | put_task_struct(leader); |
3266 | out_no_task: | 3266 | out_no_task: |
3267 | return result; | 3267 | return result; |
3268 | } | 3268 | } |
3269 | 3269 | ||
3270 | /* | 3270 | /* |
3271 | * Find the first tid of a thread group to return to user space. | 3271 | * Find the first tid of a thread group to return to user space. |
3272 | * | 3272 | * |
3273 | * Usually this is just the thread group leader, but if the users | 3273 | * Usually this is just the thread group leader, but if the users |
3274 | * buffer was too small or there was a seek into the middle of the | 3274 | * buffer was too small or there was a seek into the middle of the |
3275 | * directory we have more work todo. | 3275 | * directory we have more work todo. |
3276 | * | 3276 | * |
3277 | * In the case of a short read we start with find_task_by_pid. | 3277 | * In the case of a short read we start with find_task_by_pid. |
3278 | * | 3278 | * |
3279 | * In the case of a seek we start with the leader and walk nr | 3279 | * In the case of a seek we start with the leader and walk nr |
3280 | * threads past it. | 3280 | * threads past it. |
3281 | */ | 3281 | */ |
3282 | static struct task_struct *first_tid(struct task_struct *leader, | 3282 | static struct task_struct *first_tid(struct task_struct *leader, |
3283 | int tid, int nr, struct pid_namespace *ns) | 3283 | int tid, int nr, struct pid_namespace *ns) |
3284 | { | 3284 | { |
3285 | struct task_struct *pos; | 3285 | struct task_struct *pos; |
3286 | 3286 | ||
3287 | rcu_read_lock(); | 3287 | rcu_read_lock(); |
3288 | /* Attempt to start with the pid of a thread */ | 3288 | /* Attempt to start with the pid of a thread */ |
3289 | if (tid && (nr > 0)) { | 3289 | if (tid && (nr > 0)) { |
3290 | pos = find_task_by_pid_ns(tid, ns); | 3290 | pos = find_task_by_pid_ns(tid, ns); |
3291 | if (pos && (pos->group_leader == leader)) | 3291 | if (pos && (pos->group_leader == leader)) |
3292 | goto found; | 3292 | goto found; |
3293 | } | 3293 | } |
3294 | 3294 | ||
3295 | /* If nr exceeds the number of threads there is nothing todo */ | 3295 | /* If nr exceeds the number of threads there is nothing todo */ |
3296 | pos = NULL; | 3296 | pos = NULL; |
3297 | if (nr && nr >= get_nr_threads(leader)) | 3297 | if (nr && nr >= get_nr_threads(leader)) |
3298 | goto out; | 3298 | goto out; |
3299 | 3299 | ||
3300 | /* If we haven't found our starting place yet start | 3300 | /* If we haven't found our starting place yet start |
3301 | * with the leader and walk nr threads forward. | 3301 | * with the leader and walk nr threads forward. |
3302 | */ | 3302 | */ |
3303 | for (pos = leader; nr > 0; --nr) { | 3303 | for (pos = leader; nr > 0; --nr) { |
3304 | pos = next_thread(pos); | 3304 | pos = next_thread(pos); |
3305 | if (pos == leader) { | 3305 | if (pos == leader) { |
3306 | pos = NULL; | 3306 | pos = NULL; |
3307 | goto out; | 3307 | goto out; |
3308 | } | 3308 | } |
3309 | } | 3309 | } |
3310 | found: | 3310 | found: |
3311 | get_task_struct(pos); | 3311 | get_task_struct(pos); |
3312 | out: | 3312 | out: |
3313 | rcu_read_unlock(); | 3313 | rcu_read_unlock(); |
3314 | return pos; | 3314 | return pos; |
3315 | } | 3315 | } |
3316 | 3316 | ||
3317 | /* | 3317 | /* |
3318 | * Find the next thread in the thread list. | 3318 | * Find the next thread in the thread list. |
3319 | * Return NULL if there is an error or no next thread. | 3319 | * Return NULL if there is an error or no next thread. |
3320 | * | 3320 | * |
3321 | * The reference to the input task_struct is released. | 3321 | * The reference to the input task_struct is released. |
3322 | */ | 3322 | */ |
3323 | static struct task_struct *next_tid(struct task_struct *start) | 3323 | static struct task_struct *next_tid(struct task_struct *start) |
3324 | { | 3324 | { |
3325 | struct task_struct *pos = NULL; | 3325 | struct task_struct *pos = NULL; |
3326 | rcu_read_lock(); | 3326 | rcu_read_lock(); |
3327 | if (pid_alive(start)) { | 3327 | if (pid_alive(start)) { |
3328 | pos = next_thread(start); | 3328 | pos = next_thread(start); |
3329 | if (thread_group_leader(pos)) | 3329 | if (thread_group_leader(pos)) |
3330 | pos = NULL; | 3330 | pos = NULL; |
3331 | else | 3331 | else |
3332 | get_task_struct(pos); | 3332 | get_task_struct(pos); |
3333 | } | 3333 | } |
3334 | rcu_read_unlock(); | 3334 | rcu_read_unlock(); |
3335 | put_task_struct(start); | 3335 | put_task_struct(start); |
3336 | return pos; | 3336 | return pos; |
3337 | } | 3337 | } |
3338 | 3338 | ||
3339 | static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 3339 | static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
3340 | struct task_struct *task, int tid) | 3340 | struct task_struct *task, int tid) |
3341 | { | 3341 | { |
3342 | char name[PROC_NUMBUF]; | 3342 | char name[PROC_NUMBUF]; |
3343 | int len = snprintf(name, sizeof(name), "%d", tid); | 3343 | int len = snprintf(name, sizeof(name), "%d", tid); |
3344 | return proc_fill_cache(filp, dirent, filldir, name, len, | 3344 | return proc_fill_cache(filp, dirent, filldir, name, len, |
3345 | proc_task_instantiate, task, NULL); | 3345 | proc_task_instantiate, task, NULL); |
3346 | } | 3346 | } |
3347 | 3347 | ||
3348 | /* for the /proc/TGID/task/ directories */ | 3348 | /* for the /proc/TGID/task/ directories */ |
3349 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3349 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) |
3350 | { | 3350 | { |
3351 | struct dentry *dentry = filp->f_path.dentry; | 3351 | struct dentry *dentry = filp->f_path.dentry; |
3352 | struct inode *inode = dentry->d_inode; | 3352 | struct inode *inode = dentry->d_inode; |
3353 | struct task_struct *leader = NULL; | 3353 | struct task_struct *leader = NULL; |
3354 | struct task_struct *task; | 3354 | struct task_struct *task; |
3355 | int retval = -ENOENT; | 3355 | int retval = -ENOENT; |
3356 | ino_t ino; | 3356 | ino_t ino; |
3357 | int tid; | 3357 | int tid; |
3358 | struct pid_namespace *ns; | 3358 | struct pid_namespace *ns; |
3359 | 3359 | ||
3360 | task = get_proc_task(inode); | 3360 | task = get_proc_task(inode); |
3361 | if (!task) | 3361 | if (!task) |
3362 | goto out_no_task; | 3362 | goto out_no_task; |
3363 | rcu_read_lock(); | 3363 | rcu_read_lock(); |
3364 | if (pid_alive(task)) { | 3364 | if (pid_alive(task)) { |
3365 | leader = task->group_leader; | 3365 | leader = task->group_leader; |
3366 | get_task_struct(leader); | 3366 | get_task_struct(leader); |
3367 | } | 3367 | } |
3368 | rcu_read_unlock(); | 3368 | rcu_read_unlock(); |
3369 | put_task_struct(task); | 3369 | put_task_struct(task); |
3370 | if (!leader) | 3370 | if (!leader) |
3371 | goto out_no_task; | 3371 | goto out_no_task; |
3372 | retval = 0; | 3372 | retval = 0; |
3373 | 3373 | ||
3374 | switch ((unsigned long)filp->f_pos) { | 3374 | switch ((unsigned long)filp->f_pos) { |
3375 | case 0: | 3375 | case 0: |
3376 | ino = inode->i_ino; | 3376 | ino = inode->i_ino; |
3377 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) | 3377 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) |
3378 | goto out; | 3378 | goto out; |
3379 | filp->f_pos++; | 3379 | filp->f_pos++; |
3380 | /* fall through */ | 3380 | /* fall through */ |
3381 | case 1: | 3381 | case 1: |
3382 | ino = parent_ino(dentry); | 3382 | ino = parent_ino(dentry); |
3383 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) | 3383 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) |
3384 | goto out; | 3384 | goto out; |
3385 | filp->f_pos++; | 3385 | filp->f_pos++; |
3386 | /* fall through */ | 3386 | /* fall through */ |
3387 | } | 3387 | } |
3388 | 3388 | ||
3389 | /* f_version caches the tgid value that the last readdir call couldn't | 3389 | /* f_version caches the tgid value that the last readdir call couldn't |
3390 | * return. lseek aka telldir automagically resets f_version to 0. | 3390 | * return. lseek aka telldir automagically resets f_version to 0. |
3391 | */ | 3391 | */ |
3392 | ns = filp->f_dentry->d_sb->s_fs_info; | 3392 | ns = filp->f_dentry->d_sb->s_fs_info; |
3393 | tid = (int)filp->f_version; | 3393 | tid = (int)filp->f_version; |
3394 | filp->f_version = 0; | 3394 | filp->f_version = 0; |
3395 | for (task = first_tid(leader, tid, filp->f_pos - 2, ns); | 3395 | for (task = first_tid(leader, tid, filp->f_pos - 2, ns); |
3396 | task; | 3396 | task; |
3397 | task = next_tid(task), filp->f_pos++) { | 3397 | task = next_tid(task), filp->f_pos++) { |
3398 | tid = task_pid_nr_ns(task, ns); | 3398 | tid = task_pid_nr_ns(task, ns); |
3399 | if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { | 3399 | if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { |
3400 | /* returning this tgid failed, save it as the first | 3400 | /* returning this tgid failed, save it as the first |
3401 | * pid for the next readir call */ | 3401 | * pid for the next readir call */ |
3402 | filp->f_version = (u64)tid; | 3402 | filp->f_version = (u64)tid; |
3403 | put_task_struct(task); | 3403 | put_task_struct(task); |
3404 | break; | 3404 | break; |
3405 | } | 3405 | } |
3406 | } | 3406 | } |
3407 | out: | 3407 | out: |
3408 | put_task_struct(leader); | 3408 | put_task_struct(leader); |
3409 | out_no_task: | 3409 | out_no_task: |
3410 | return retval; | 3410 | return retval; |
3411 | } | 3411 | } |
3412 | 3412 | ||
3413 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 3413 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
3414 | { | 3414 | { |
3415 | struct inode *inode = dentry->d_inode; | 3415 | struct inode *inode = dentry->d_inode; |
3416 | struct task_struct *p = get_proc_task(inode); | 3416 | struct task_struct *p = get_proc_task(inode); |
3417 | generic_fillattr(inode, stat); | 3417 | generic_fillattr(inode, stat); |
3418 | 3418 | ||
3419 | if (p) { | 3419 | if (p) { |
3420 | stat->nlink += get_nr_threads(p); | 3420 | stat->nlink += get_nr_threads(p); |
3421 | put_task_struct(p); | 3421 | put_task_struct(p); |
3422 | } | 3422 | } |
3423 | 3423 | ||
3424 | return 0; | 3424 | return 0; |
3425 | } | 3425 | } |
3426 | 3426 | ||
3427 | static const struct inode_operations proc_task_inode_operations = { | 3427 | static const struct inode_operations proc_task_inode_operations = { |
3428 | .lookup = proc_task_lookup, | 3428 | .lookup = proc_task_lookup, |
3429 | .getattr = proc_task_getattr, | 3429 | .getattr = proc_task_getattr, |
3430 | .setattr = proc_setattr, | 3430 | .setattr = proc_setattr, |
3431 | }; | 3431 | }; |
3432 | 3432 | ||
3433 | static const struct file_operations proc_task_operations = { | 3433 | static const struct file_operations proc_task_operations = { |
3434 | .read = generic_read_dir, | 3434 | .read = generic_read_dir, |
3435 | .readdir = proc_task_readdir, | 3435 | .readdir = proc_task_readdir, |
3436 | .llseek = default_llseek, | 3436 | .llseek = default_llseek, |
3437 | }; | 3437 | }; |
3438 | 3438 |
fs/proc/task_mmu.c
1 | #include <linux/mm.h> | 1 | #include <linux/mm.h> |
2 | #include <linux/hugetlb.h> | 2 | #include <linux/hugetlb.h> |
3 | #include <linux/huge_mm.h> | 3 | #include <linux/huge_mm.h> |
4 | #include <linux/mount.h> | 4 | #include <linux/mount.h> |
5 | #include <linux/seq_file.h> | 5 | #include <linux/seq_file.h> |
6 | #include <linux/highmem.h> | 6 | #include <linux/highmem.h> |
7 | #include <linux/ptrace.h> | 7 | #include <linux/ptrace.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
10 | #include <linux/mempolicy.h> | 10 | #include <linux/mempolicy.h> |
11 | #include <linux/rmap.h> | 11 | #include <linux/rmap.h> |
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/swapops.h> | 13 | #include <linux/swapops.h> |
14 | 14 | ||
15 | #include <asm/elf.h> | 15 | #include <asm/elf.h> |
16 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
17 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
18 | #include "internal.h" | 18 | #include "internal.h" |
19 | 19 | ||
20 | void task_mem(struct seq_file *m, struct mm_struct *mm) | 20 | void task_mem(struct seq_file *m, struct mm_struct *mm) |
21 | { | 21 | { |
22 | unsigned long data, text, lib, swap; | 22 | unsigned long data, text, lib, swap; |
23 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; | 23 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Note: to minimize their overhead, mm maintains hiwater_vm and | 26 | * Note: to minimize their overhead, mm maintains hiwater_vm and |
27 | * hiwater_rss only when about to *lower* total_vm or rss. Any | 27 | * hiwater_rss only when about to *lower* total_vm or rss. Any |
28 | * collector of these hiwater stats must therefore get total_vm | 28 | * collector of these hiwater stats must therefore get total_vm |
29 | * and rss too, which will usually be the higher. Barriers? not | 29 | * and rss too, which will usually be the higher. Barriers? not |
30 | * worth the effort, such snapshots can always be inconsistent. | 30 | * worth the effort, such snapshots can always be inconsistent. |
31 | */ | 31 | */ |
32 | hiwater_vm = total_vm = mm->total_vm; | 32 | hiwater_vm = total_vm = mm->total_vm; |
33 | if (hiwater_vm < mm->hiwater_vm) | 33 | if (hiwater_vm < mm->hiwater_vm) |
34 | hiwater_vm = mm->hiwater_vm; | 34 | hiwater_vm = mm->hiwater_vm; |
35 | hiwater_rss = total_rss = get_mm_rss(mm); | 35 | hiwater_rss = total_rss = get_mm_rss(mm); |
36 | if (hiwater_rss < mm->hiwater_rss) | 36 | if (hiwater_rss < mm->hiwater_rss) |
37 | hiwater_rss = mm->hiwater_rss; | 37 | hiwater_rss = mm->hiwater_rss; |
38 | 38 | ||
39 | data = mm->total_vm - mm->shared_vm - mm->stack_vm; | 39 | data = mm->total_vm - mm->shared_vm - mm->stack_vm; |
40 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; | 40 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; |
41 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; | 41 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; |
42 | swap = get_mm_counter(mm, MM_SWAPENTS); | 42 | swap = get_mm_counter(mm, MM_SWAPENTS); |
43 | seq_printf(m, | 43 | seq_printf(m, |
44 | "VmPeak:\t%8lu kB\n" | 44 | "VmPeak:\t%8lu kB\n" |
45 | "VmSize:\t%8lu kB\n" | 45 | "VmSize:\t%8lu kB\n" |
46 | "VmLck:\t%8lu kB\n" | 46 | "VmLck:\t%8lu kB\n" |
47 | "VmHWM:\t%8lu kB\n" | 47 | "VmHWM:\t%8lu kB\n" |
48 | "VmRSS:\t%8lu kB\n" | 48 | "VmRSS:\t%8lu kB\n" |
49 | "VmData:\t%8lu kB\n" | 49 | "VmData:\t%8lu kB\n" |
50 | "VmStk:\t%8lu kB\n" | 50 | "VmStk:\t%8lu kB\n" |
51 | "VmExe:\t%8lu kB\n" | 51 | "VmExe:\t%8lu kB\n" |
52 | "VmLib:\t%8lu kB\n" | 52 | "VmLib:\t%8lu kB\n" |
53 | "VmPTE:\t%8lu kB\n" | 53 | "VmPTE:\t%8lu kB\n" |
54 | "VmSwap:\t%8lu kB\n", | 54 | "VmSwap:\t%8lu kB\n", |
55 | hiwater_vm << (PAGE_SHIFT-10), | 55 | hiwater_vm << (PAGE_SHIFT-10), |
56 | (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), | 56 | (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), |
57 | mm->locked_vm << (PAGE_SHIFT-10), | 57 | mm->locked_vm << (PAGE_SHIFT-10), |
58 | hiwater_rss << (PAGE_SHIFT-10), | 58 | hiwater_rss << (PAGE_SHIFT-10), |
59 | total_rss << (PAGE_SHIFT-10), | 59 | total_rss << (PAGE_SHIFT-10), |
60 | data << (PAGE_SHIFT-10), | 60 | data << (PAGE_SHIFT-10), |
61 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, | 61 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, |
62 | (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, | 62 | (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, |
63 | swap << (PAGE_SHIFT-10)); | 63 | swap << (PAGE_SHIFT-10)); |
64 | } | 64 | } |
65 | 65 | ||
66 | unsigned long task_vsize(struct mm_struct *mm) | 66 | unsigned long task_vsize(struct mm_struct *mm) |
67 | { | 67 | { |
68 | return PAGE_SIZE * mm->total_vm; | 68 | return PAGE_SIZE * mm->total_vm; |
69 | } | 69 | } |
70 | 70 | ||
71 | unsigned long task_statm(struct mm_struct *mm, | 71 | unsigned long task_statm(struct mm_struct *mm, |
72 | unsigned long *shared, unsigned long *text, | 72 | unsigned long *shared, unsigned long *text, |
73 | unsigned long *data, unsigned long *resident) | 73 | unsigned long *data, unsigned long *resident) |
74 | { | 74 | { |
75 | *shared = get_mm_counter(mm, MM_FILEPAGES); | 75 | *shared = get_mm_counter(mm, MM_FILEPAGES); |
76 | *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) | 76 | *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) |
77 | >> PAGE_SHIFT; | 77 | >> PAGE_SHIFT; |
78 | *data = mm->total_vm - mm->shared_vm; | 78 | *data = mm->total_vm - mm->shared_vm; |
79 | *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); | 79 | *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); |
80 | return mm->total_vm; | 80 | return mm->total_vm; |
81 | } | 81 | } |
82 | 82 | ||
83 | static void pad_len_spaces(struct seq_file *m, int len) | 83 | static void pad_len_spaces(struct seq_file *m, int len) |
84 | { | 84 | { |
85 | len = 25 + sizeof(void*) * 6 - len; | 85 | len = 25 + sizeof(void*) * 6 - len; |
86 | if (len < 1) | 86 | if (len < 1) |
87 | len = 1; | 87 | len = 1; |
88 | seq_printf(m, "%*c", len, ' '); | 88 | seq_printf(m, "%*c", len, ' '); |
89 | } | 89 | } |
90 | 90 | ||
91 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | 91 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) |
92 | { | 92 | { |
93 | if (vma && vma != priv->tail_vma) { | 93 | if (vma && vma != priv->tail_vma) { |
94 | struct mm_struct *mm = vma->vm_mm; | 94 | struct mm_struct *mm = vma->vm_mm; |
95 | up_read(&mm->mmap_sem); | 95 | up_read(&mm->mmap_sem); |
96 | mmput(mm); | 96 | mmput(mm); |
97 | } | 97 | } |
98 | } | 98 | } |
99 | 99 | ||
100 | static void *m_start(struct seq_file *m, loff_t *pos) | 100 | static void *m_start(struct seq_file *m, loff_t *pos) |
101 | { | 101 | { |
102 | struct proc_maps_private *priv = m->private; | 102 | struct proc_maps_private *priv = m->private; |
103 | unsigned long last_addr = m->version; | 103 | unsigned long last_addr = m->version; |
104 | struct mm_struct *mm; | 104 | struct mm_struct *mm; |
105 | struct vm_area_struct *vma, *tail_vma = NULL; | 105 | struct vm_area_struct *vma, *tail_vma = NULL; |
106 | loff_t l = *pos; | 106 | loff_t l = *pos; |
107 | 107 | ||
108 | /* Clear the per syscall fields in priv */ | 108 | /* Clear the per syscall fields in priv */ |
109 | priv->task = NULL; | 109 | priv->task = NULL; |
110 | priv->tail_vma = NULL; | 110 | priv->tail_vma = NULL; |
111 | 111 | ||
112 | /* | 112 | /* |
113 | * We remember last_addr rather than next_addr to hit with | 113 | * We remember last_addr rather than next_addr to hit with |
114 | * mmap_cache most of the time. We have zero last_addr at | 114 | * mmap_cache most of the time. We have zero last_addr at |
115 | * the beginning and also after lseek. We will have -1 last_addr | 115 | * the beginning and also after lseek. We will have -1 last_addr |
116 | * after the end of the vmas. | 116 | * after the end of the vmas. |
117 | */ | 117 | */ |
118 | 118 | ||
119 | if (last_addr == -1UL) | 119 | if (last_addr == -1UL) |
120 | return NULL; | 120 | return NULL; |
121 | 121 | ||
122 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 122 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); |
123 | if (!priv->task) | 123 | if (!priv->task) |
124 | return ERR_PTR(-ESRCH); | 124 | return ERR_PTR(-ESRCH); |
125 | 125 | ||
126 | mm = mm_for_maps(priv->task); | 126 | mm = mm_for_maps(priv->task); |
127 | if (!mm || IS_ERR(mm)) | 127 | if (!mm || IS_ERR(mm)) |
128 | return mm; | 128 | return mm; |
129 | down_read(&mm->mmap_sem); | 129 | down_read(&mm->mmap_sem); |
130 | 130 | ||
131 | tail_vma = get_gate_vma(priv->task->mm); | 131 | tail_vma = get_gate_vma(priv->task->mm); |
132 | priv->tail_vma = tail_vma; | 132 | priv->tail_vma = tail_vma; |
133 | 133 | ||
134 | /* Start with last addr hint */ | 134 | /* Start with last addr hint */ |
135 | vma = find_vma(mm, last_addr); | 135 | vma = find_vma(mm, last_addr); |
136 | if (last_addr && vma) { | 136 | if (last_addr && vma) { |
137 | vma = vma->vm_next; | 137 | vma = vma->vm_next; |
138 | goto out; | 138 | goto out; |
139 | } | 139 | } |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * Check the vma index is within the range and do | 142 | * Check the vma index is within the range and do |
143 | * sequential scan until m_index. | 143 | * sequential scan until m_index. |
144 | */ | 144 | */ |
145 | vma = NULL; | 145 | vma = NULL; |
146 | if ((unsigned long)l < mm->map_count) { | 146 | if ((unsigned long)l < mm->map_count) { |
147 | vma = mm->mmap; | 147 | vma = mm->mmap; |
148 | while (l-- && vma) | 148 | while (l-- && vma) |
149 | vma = vma->vm_next; | 149 | vma = vma->vm_next; |
150 | goto out; | 150 | goto out; |
151 | } | 151 | } |
152 | 152 | ||
153 | if (l != mm->map_count) | 153 | if (l != mm->map_count) |
154 | tail_vma = NULL; /* After gate vma */ | 154 | tail_vma = NULL; /* After gate vma */ |
155 | 155 | ||
156 | out: | 156 | out: |
157 | if (vma) | 157 | if (vma) |
158 | return vma; | 158 | return vma; |
159 | 159 | ||
160 | /* End of vmas has been reached */ | 160 | /* End of vmas has been reached */ |
161 | m->version = (tail_vma != NULL)? 0: -1UL; | 161 | m->version = (tail_vma != NULL)? 0: -1UL; |
162 | up_read(&mm->mmap_sem); | 162 | up_read(&mm->mmap_sem); |
163 | mmput(mm); | 163 | mmput(mm); |
164 | return tail_vma; | 164 | return tail_vma; |
165 | } | 165 | } |
166 | 166 | ||
167 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 167 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
168 | { | 168 | { |
169 | struct proc_maps_private *priv = m->private; | 169 | struct proc_maps_private *priv = m->private; |
170 | struct vm_area_struct *vma = v; | 170 | struct vm_area_struct *vma = v; |
171 | struct vm_area_struct *tail_vma = priv->tail_vma; | 171 | struct vm_area_struct *tail_vma = priv->tail_vma; |
172 | 172 | ||
173 | (*pos)++; | 173 | (*pos)++; |
174 | if (vma && (vma != tail_vma) && vma->vm_next) | 174 | if (vma && (vma != tail_vma) && vma->vm_next) |
175 | return vma->vm_next; | 175 | return vma->vm_next; |
176 | vma_stop(priv, vma); | 176 | vma_stop(priv, vma); |
177 | return (vma != tail_vma)? tail_vma: NULL; | 177 | return (vma != tail_vma)? tail_vma: NULL; |
178 | } | 178 | } |
179 | 179 | ||
180 | static void m_stop(struct seq_file *m, void *v) | 180 | static void m_stop(struct seq_file *m, void *v) |
181 | { | 181 | { |
182 | struct proc_maps_private *priv = m->private; | 182 | struct proc_maps_private *priv = m->private; |
183 | struct vm_area_struct *vma = v; | 183 | struct vm_area_struct *vma = v; |
184 | 184 | ||
185 | if (!IS_ERR(vma)) | 185 | if (!IS_ERR(vma)) |
186 | vma_stop(priv, vma); | 186 | vma_stop(priv, vma); |
187 | if (priv->task) | 187 | if (priv->task) |
188 | put_task_struct(priv->task); | 188 | put_task_struct(priv->task); |
189 | } | 189 | } |
190 | 190 | ||
191 | static int do_maps_open(struct inode *inode, struct file *file, | 191 | static int do_maps_open(struct inode *inode, struct file *file, |
192 | const struct seq_operations *ops) | 192 | const struct seq_operations *ops) |
193 | { | 193 | { |
194 | struct proc_maps_private *priv; | 194 | struct proc_maps_private *priv; |
195 | int ret = -ENOMEM; | 195 | int ret = -ENOMEM; |
196 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 196 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
197 | if (priv) { | 197 | if (priv) { |
198 | priv->pid = proc_pid(inode); | 198 | priv->pid = proc_pid(inode); |
199 | ret = seq_open(file, ops); | 199 | ret = seq_open(file, ops); |
200 | if (!ret) { | 200 | if (!ret) { |
201 | struct seq_file *m = file->private_data; | 201 | struct seq_file *m = file->private_data; |
202 | m->private = priv; | 202 | m->private = priv; |
203 | } else { | 203 | } else { |
204 | kfree(priv); | 204 | kfree(priv); |
205 | } | 205 | } |
206 | } | 206 | } |
207 | return ret; | 207 | return ret; |
208 | } | 208 | } |
209 | 209 | ||
210 | static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | 210 | static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) |
211 | { | 211 | { |
212 | struct mm_struct *mm = vma->vm_mm; | 212 | struct mm_struct *mm = vma->vm_mm; |
213 | struct file *file = vma->vm_file; | 213 | struct file *file = vma->vm_file; |
214 | vm_flags_t flags = vma->vm_flags; | 214 | vm_flags_t flags = vma->vm_flags; |
215 | unsigned long ino = 0; | 215 | unsigned long ino = 0; |
216 | unsigned long long pgoff = 0; | 216 | unsigned long long pgoff = 0; |
217 | unsigned long start, end; | 217 | unsigned long start, end; |
218 | dev_t dev = 0; | 218 | dev_t dev = 0; |
219 | int len; | 219 | int len; |
220 | 220 | ||
221 | if (file) { | 221 | if (file) { |
222 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 222 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
223 | dev = inode->i_sb->s_dev; | 223 | dev = inode->i_sb->s_dev; |
224 | ino = inode->i_ino; | 224 | ino = inode->i_ino; |
225 | pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; | 225 | pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; |
226 | } | 226 | } |
227 | 227 | ||
228 | /* We don't show the stack guard page in /proc/maps */ | 228 | /* We don't show the stack guard page in /proc/maps */ |
229 | start = vma->vm_start; | 229 | start = vma->vm_start; |
230 | if (stack_guard_page_start(vma, start)) | 230 | if (stack_guard_page_start(vma, start)) |
231 | start += PAGE_SIZE; | 231 | start += PAGE_SIZE; |
232 | end = vma->vm_end; | 232 | end = vma->vm_end; |
233 | if (stack_guard_page_end(vma, end)) | 233 | if (stack_guard_page_end(vma, end)) |
234 | end -= PAGE_SIZE; | 234 | end -= PAGE_SIZE; |
235 | 235 | ||
236 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", | 236 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
237 | start, | 237 | start, |
238 | end, | 238 | end, |
239 | flags & VM_READ ? 'r' : '-', | 239 | flags & VM_READ ? 'r' : '-', |
240 | flags & VM_WRITE ? 'w' : '-', | 240 | flags & VM_WRITE ? 'w' : '-', |
241 | flags & VM_EXEC ? 'x' : '-', | 241 | flags & VM_EXEC ? 'x' : '-', |
242 | flags & VM_MAYSHARE ? 's' : 'p', | 242 | flags & VM_MAYSHARE ? 's' : 'p', |
243 | pgoff, | 243 | pgoff, |
244 | MAJOR(dev), MINOR(dev), ino, &len); | 244 | MAJOR(dev), MINOR(dev), ino, &len); |
245 | 245 | ||
246 | /* | 246 | /* |
247 | * Print the dentry name for named mappings, and a | 247 | * Print the dentry name for named mappings, and a |
248 | * special [heap] marker for the heap: | 248 | * special [heap] marker for the heap: |
249 | */ | 249 | */ |
250 | if (file) { | 250 | if (file) { |
251 | pad_len_spaces(m, len); | 251 | pad_len_spaces(m, len); |
252 | seq_path(m, &file->f_path, "\n"); | 252 | seq_path(m, &file->f_path, "\n"); |
253 | } else { | 253 | } else { |
254 | const char *name = arch_vma_name(vma); | 254 | const char *name = arch_vma_name(vma); |
255 | if (!name) { | 255 | if (!name) { |
256 | if (mm) { | 256 | if (mm) { |
257 | if (vma->vm_start <= mm->brk && | 257 | if (vma->vm_start <= mm->brk && |
258 | vma->vm_end >= mm->start_brk) { | 258 | vma->vm_end >= mm->start_brk) { |
259 | name = "[heap]"; | 259 | name = "[heap]"; |
260 | } else if (vma->vm_start <= mm->start_stack && | 260 | } else if (vma->vm_start <= mm->start_stack && |
261 | vma->vm_end >= mm->start_stack) { | 261 | vma->vm_end >= mm->start_stack) { |
262 | name = "[stack]"; | 262 | name = "[stack]"; |
263 | } | 263 | } |
264 | } else { | 264 | } else { |
265 | name = "[vdso]"; | 265 | name = "[vdso]"; |
266 | } | 266 | } |
267 | } | 267 | } |
268 | if (name) { | 268 | if (name) { |
269 | pad_len_spaces(m, len); | 269 | pad_len_spaces(m, len); |
270 | seq_puts(m, name); | 270 | seq_puts(m, name); |
271 | } | 271 | } |
272 | } | 272 | } |
273 | seq_putc(m, '\n'); | 273 | seq_putc(m, '\n'); |
274 | } | 274 | } |
275 | 275 | ||
276 | static int show_map(struct seq_file *m, void *v) | 276 | static int show_map(struct seq_file *m, void *v) |
277 | { | 277 | { |
278 | struct vm_area_struct *vma = v; | 278 | struct vm_area_struct *vma = v; |
279 | struct proc_maps_private *priv = m->private; | 279 | struct proc_maps_private *priv = m->private; |
280 | struct task_struct *task = priv->task; | 280 | struct task_struct *task = priv->task; |
281 | 281 | ||
282 | show_map_vma(m, vma); | 282 | show_map_vma(m, vma); |
283 | 283 | ||
284 | if (m->count < m->size) /* vma is copied successfully */ | 284 | if (m->count < m->size) /* vma is copied successfully */ |
285 | m->version = (vma != get_gate_vma(task->mm)) | 285 | m->version = (vma != get_gate_vma(task->mm)) |
286 | ? vma->vm_start : 0; | 286 | ? vma->vm_start : 0; |
287 | return 0; | 287 | return 0; |
288 | } | 288 | } |
289 | 289 | ||
290 | static const struct seq_operations proc_pid_maps_op = { | 290 | static const struct seq_operations proc_pid_maps_op = { |
291 | .start = m_start, | 291 | .start = m_start, |
292 | .next = m_next, | 292 | .next = m_next, |
293 | .stop = m_stop, | 293 | .stop = m_stop, |
294 | .show = show_map | 294 | .show = show_map |
295 | }; | 295 | }; |
296 | 296 | ||
297 | static int maps_open(struct inode *inode, struct file *file) | 297 | static int maps_open(struct inode *inode, struct file *file) |
298 | { | 298 | { |
299 | return do_maps_open(inode, file, &proc_pid_maps_op); | 299 | return do_maps_open(inode, file, &proc_pid_maps_op); |
300 | } | 300 | } |
301 | 301 | ||
302 | const struct file_operations proc_maps_operations = { | 302 | const struct file_operations proc_maps_operations = { |
303 | .open = maps_open, | 303 | .open = maps_open, |
304 | .read = seq_read, | 304 | .read = seq_read, |
305 | .llseek = seq_lseek, | 305 | .llseek = seq_lseek, |
306 | .release = seq_release_private, | 306 | .release = seq_release_private, |
307 | }; | 307 | }; |
308 | 308 | ||
309 | /* | 309 | /* |
310 | * Proportional Set Size(PSS): my share of RSS. | 310 | * Proportional Set Size(PSS): my share of RSS. |
311 | * | 311 | * |
312 | * PSS of a process is the count of pages it has in memory, where each | 312 | * PSS of a process is the count of pages it has in memory, where each |
313 | * page is divided by the number of processes sharing it. So if a | 313 | * page is divided by the number of processes sharing it. So if a |
314 | * process has 1000 pages all to itself, and 1000 shared with one other | 314 | * process has 1000 pages all to itself, and 1000 shared with one other |
315 | * process, its PSS will be 1500. | 315 | * process, its PSS will be 1500. |
316 | * | 316 | * |
317 | * To keep (accumulated) division errors low, we adopt a 64bit | 317 | * To keep (accumulated) division errors low, we adopt a 64bit |
318 | * fixed-point pss counter to minimize division errors. So (pss >> | 318 | * fixed-point pss counter to minimize division errors. So (pss >> |
319 | * PSS_SHIFT) would be the real byte count. | 319 | * PSS_SHIFT) would be the real byte count. |
320 | * | 320 | * |
321 | * A shift of 12 before division means (assuming 4K page size): | 321 | * A shift of 12 before division means (assuming 4K page size): |
322 | * - 1M 3-user-pages add up to 8KB errors; | 322 | * - 1M 3-user-pages add up to 8KB errors; |
323 | * - supports mapcount up to 2^24, or 16M; | 323 | * - supports mapcount up to 2^24, or 16M; |
324 | * - supports PSS up to 2^52 bytes, or 4PB. | 324 | * - supports PSS up to 2^52 bytes, or 4PB. |
325 | */ | 325 | */ |
326 | #define PSS_SHIFT 12 | 326 | #define PSS_SHIFT 12 |
327 | 327 | ||
328 | #ifdef CONFIG_PROC_PAGE_MONITOR | 328 | #ifdef CONFIG_PROC_PAGE_MONITOR |
329 | struct mem_size_stats { | 329 | struct mem_size_stats { |
330 | struct vm_area_struct *vma; | 330 | struct vm_area_struct *vma; |
331 | unsigned long resident; | 331 | unsigned long resident; |
332 | unsigned long shared_clean; | 332 | unsigned long shared_clean; |
333 | unsigned long shared_dirty; | 333 | unsigned long shared_dirty; |
334 | unsigned long private_clean; | 334 | unsigned long private_clean; |
335 | unsigned long private_dirty; | 335 | unsigned long private_dirty; |
336 | unsigned long referenced; | 336 | unsigned long referenced; |
337 | unsigned long anonymous; | 337 | unsigned long anonymous; |
338 | unsigned long anonymous_thp; | 338 | unsigned long anonymous_thp; |
339 | unsigned long swap; | 339 | unsigned long swap; |
340 | u64 pss; | 340 | u64 pss; |
341 | }; | 341 | }; |
342 | 342 | ||
343 | 343 | ||
344 | static void smaps_pte_entry(pte_t ptent, unsigned long addr, | 344 | static void smaps_pte_entry(pte_t ptent, unsigned long addr, |
345 | unsigned long ptent_size, struct mm_walk *walk) | 345 | unsigned long ptent_size, struct mm_walk *walk) |
346 | { | 346 | { |
347 | struct mem_size_stats *mss = walk->private; | 347 | struct mem_size_stats *mss = walk->private; |
348 | struct vm_area_struct *vma = mss->vma; | 348 | struct vm_area_struct *vma = mss->vma; |
349 | struct page *page; | 349 | struct page *page; |
350 | int mapcount; | 350 | int mapcount; |
351 | 351 | ||
352 | if (is_swap_pte(ptent)) { | 352 | if (is_swap_pte(ptent)) { |
353 | mss->swap += ptent_size; | 353 | mss->swap += ptent_size; |
354 | return; | 354 | return; |
355 | } | 355 | } |
356 | 356 | ||
357 | if (!pte_present(ptent)) | 357 | if (!pte_present(ptent)) |
358 | return; | 358 | return; |
359 | 359 | ||
360 | page = vm_normal_page(vma, addr, ptent); | 360 | page = vm_normal_page(vma, addr, ptent); |
361 | if (!page) | 361 | if (!page) |
362 | return; | 362 | return; |
363 | 363 | ||
364 | if (PageAnon(page)) | 364 | if (PageAnon(page)) |
365 | mss->anonymous += ptent_size; | 365 | mss->anonymous += ptent_size; |
366 | 366 | ||
367 | mss->resident += ptent_size; | 367 | mss->resident += ptent_size; |
368 | /* Accumulate the size in pages that have been accessed. */ | 368 | /* Accumulate the size in pages that have been accessed. */ |
369 | if (pte_young(ptent) || PageReferenced(page)) | 369 | if (pte_young(ptent) || PageReferenced(page)) |
370 | mss->referenced += ptent_size; | 370 | mss->referenced += ptent_size; |
371 | mapcount = page_mapcount(page); | 371 | mapcount = page_mapcount(page); |
372 | if (mapcount >= 2) { | 372 | if (mapcount >= 2) { |
373 | if (pte_dirty(ptent) || PageDirty(page)) | 373 | if (pte_dirty(ptent) || PageDirty(page)) |
374 | mss->shared_dirty += ptent_size; | 374 | mss->shared_dirty += ptent_size; |
375 | else | 375 | else |
376 | mss->shared_clean += ptent_size; | 376 | mss->shared_clean += ptent_size; |
377 | mss->pss += (ptent_size << PSS_SHIFT) / mapcount; | 377 | mss->pss += (ptent_size << PSS_SHIFT) / mapcount; |
378 | } else { | 378 | } else { |
379 | if (pte_dirty(ptent) || PageDirty(page)) | 379 | if (pte_dirty(ptent) || PageDirty(page)) |
380 | mss->private_dirty += ptent_size; | 380 | mss->private_dirty += ptent_size; |
381 | else | 381 | else |
382 | mss->private_clean += ptent_size; | 382 | mss->private_clean += ptent_size; |
383 | mss->pss += (ptent_size << PSS_SHIFT); | 383 | mss->pss += (ptent_size << PSS_SHIFT); |
384 | } | 384 | } |
385 | } | 385 | } |
386 | 386 | ||
387 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 387 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
388 | struct mm_walk *walk) | 388 | struct mm_walk *walk) |
389 | { | 389 | { |
390 | struct mem_size_stats *mss = walk->private; | 390 | struct mem_size_stats *mss = walk->private; |
391 | struct vm_area_struct *vma = mss->vma; | 391 | struct vm_area_struct *vma = mss->vma; |
392 | pte_t *pte; | 392 | pte_t *pte; |
393 | spinlock_t *ptl; | 393 | spinlock_t *ptl; |
394 | 394 | ||
395 | spin_lock(&walk->mm->page_table_lock); | 395 | spin_lock(&walk->mm->page_table_lock); |
396 | if (pmd_trans_huge(*pmd)) { | 396 | if (pmd_trans_huge(*pmd)) { |
397 | if (pmd_trans_splitting(*pmd)) { | 397 | if (pmd_trans_splitting(*pmd)) { |
398 | spin_unlock(&walk->mm->page_table_lock); | 398 | spin_unlock(&walk->mm->page_table_lock); |
399 | wait_split_huge_page(vma->anon_vma, pmd); | 399 | wait_split_huge_page(vma->anon_vma, pmd); |
400 | } else { | 400 | } else { |
401 | smaps_pte_entry(*(pte_t *)pmd, addr, | 401 | smaps_pte_entry(*(pte_t *)pmd, addr, |
402 | HPAGE_PMD_SIZE, walk); | 402 | HPAGE_PMD_SIZE, walk); |
403 | spin_unlock(&walk->mm->page_table_lock); | 403 | spin_unlock(&walk->mm->page_table_lock); |
404 | mss->anonymous_thp += HPAGE_PMD_SIZE; | 404 | mss->anonymous_thp += HPAGE_PMD_SIZE; |
405 | return 0; | 405 | return 0; |
406 | } | 406 | } |
407 | } else { | 407 | } else { |
408 | spin_unlock(&walk->mm->page_table_lock); | 408 | spin_unlock(&walk->mm->page_table_lock); |
409 | } | 409 | } |
410 | /* | 410 | /* |
411 | * The mmap_sem held all the way back in m_start() is what | 411 | * The mmap_sem held all the way back in m_start() is what |
412 | * keeps khugepaged out of here and from collapsing things | 412 | * keeps khugepaged out of here and from collapsing things |
413 | * in here. | 413 | * in here. |
414 | */ | 414 | */ |
415 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 415 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
416 | for (; addr != end; pte++, addr += PAGE_SIZE) | 416 | for (; addr != end; pte++, addr += PAGE_SIZE) |
417 | smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); | 417 | smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); |
418 | pte_unmap_unlock(pte - 1, ptl); | 418 | pte_unmap_unlock(pte - 1, ptl); |
419 | cond_resched(); | 419 | cond_resched(); |
420 | return 0; | 420 | return 0; |
421 | } | 421 | } |
422 | 422 | ||
423 | static int show_smap(struct seq_file *m, void *v) | 423 | static int show_smap(struct seq_file *m, void *v) |
424 | { | 424 | { |
425 | struct proc_maps_private *priv = m->private; | 425 | struct proc_maps_private *priv = m->private; |
426 | struct task_struct *task = priv->task; | 426 | struct task_struct *task = priv->task; |
427 | struct vm_area_struct *vma = v; | 427 | struct vm_area_struct *vma = v; |
428 | struct mem_size_stats mss; | 428 | struct mem_size_stats mss; |
429 | struct mm_walk smaps_walk = { | 429 | struct mm_walk smaps_walk = { |
430 | .pmd_entry = smaps_pte_range, | 430 | .pmd_entry = smaps_pte_range, |
431 | .mm = vma->vm_mm, | 431 | .mm = vma->vm_mm, |
432 | .private = &mss, | 432 | .private = &mss, |
433 | }; | 433 | }; |
434 | 434 | ||
435 | memset(&mss, 0, sizeof mss); | 435 | memset(&mss, 0, sizeof mss); |
436 | mss.vma = vma; | 436 | mss.vma = vma; |
437 | /* mmap_sem is held in m_start */ | 437 | /* mmap_sem is held in m_start */ |
438 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 438 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
439 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | 439 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
440 | 440 | ||
441 | show_map_vma(m, vma); | 441 | show_map_vma(m, vma); |
442 | 442 | ||
443 | seq_printf(m, | 443 | seq_printf(m, |
444 | "Size: %8lu kB\n" | 444 | "Size: %8lu kB\n" |
445 | "Rss: %8lu kB\n" | 445 | "Rss: %8lu kB\n" |
446 | "Pss: %8lu kB\n" | 446 | "Pss: %8lu kB\n" |
447 | "Shared_Clean: %8lu kB\n" | 447 | "Shared_Clean: %8lu kB\n" |
448 | "Shared_Dirty: %8lu kB\n" | 448 | "Shared_Dirty: %8lu kB\n" |
449 | "Private_Clean: %8lu kB\n" | 449 | "Private_Clean: %8lu kB\n" |
450 | "Private_Dirty: %8lu kB\n" | 450 | "Private_Dirty: %8lu kB\n" |
451 | "Referenced: %8lu kB\n" | 451 | "Referenced: %8lu kB\n" |
452 | "Anonymous: %8lu kB\n" | 452 | "Anonymous: %8lu kB\n" |
453 | "AnonHugePages: %8lu kB\n" | 453 | "AnonHugePages: %8lu kB\n" |
454 | "Swap: %8lu kB\n" | 454 | "Swap: %8lu kB\n" |
455 | "KernelPageSize: %8lu kB\n" | 455 | "KernelPageSize: %8lu kB\n" |
456 | "MMUPageSize: %8lu kB\n" | 456 | "MMUPageSize: %8lu kB\n" |
457 | "Locked: %8lu kB\n", | 457 | "Locked: %8lu kB\n", |
458 | (vma->vm_end - vma->vm_start) >> 10, | 458 | (vma->vm_end - vma->vm_start) >> 10, |
459 | mss.resident >> 10, | 459 | mss.resident >> 10, |
460 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), | 460 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), |
461 | mss.shared_clean >> 10, | 461 | mss.shared_clean >> 10, |
462 | mss.shared_dirty >> 10, | 462 | mss.shared_dirty >> 10, |
463 | mss.private_clean >> 10, | 463 | mss.private_clean >> 10, |
464 | mss.private_dirty >> 10, | 464 | mss.private_dirty >> 10, |
465 | mss.referenced >> 10, | 465 | mss.referenced >> 10, |
466 | mss.anonymous >> 10, | 466 | mss.anonymous >> 10, |
467 | mss.anonymous_thp >> 10, | 467 | mss.anonymous_thp >> 10, |
468 | mss.swap >> 10, | 468 | mss.swap >> 10, |
469 | vma_kernel_pagesize(vma) >> 10, | 469 | vma_kernel_pagesize(vma) >> 10, |
470 | vma_mmu_pagesize(vma) >> 10, | 470 | vma_mmu_pagesize(vma) >> 10, |
471 | (vma->vm_flags & VM_LOCKED) ? | 471 | (vma->vm_flags & VM_LOCKED) ? |
472 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); | 472 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); |
473 | 473 | ||
474 | if (m->count < m->size) /* vma is copied successfully */ | 474 | if (m->count < m->size) /* vma is copied successfully */ |
475 | m->version = (vma != get_gate_vma(task->mm)) | 475 | m->version = (vma != get_gate_vma(task->mm)) |
476 | ? vma->vm_start : 0; | 476 | ? vma->vm_start : 0; |
477 | return 0; | 477 | return 0; |
478 | } | 478 | } |
479 | 479 | ||
480 | static const struct seq_operations proc_pid_smaps_op = { | 480 | static const struct seq_operations proc_pid_smaps_op = { |
481 | .start = m_start, | 481 | .start = m_start, |
482 | .next = m_next, | 482 | .next = m_next, |
483 | .stop = m_stop, | 483 | .stop = m_stop, |
484 | .show = show_smap | 484 | .show = show_smap |
485 | }; | 485 | }; |
486 | 486 | ||
487 | static int smaps_open(struct inode *inode, struct file *file) | 487 | static int smaps_open(struct inode *inode, struct file *file) |
488 | { | 488 | { |
489 | return do_maps_open(inode, file, &proc_pid_smaps_op); | 489 | return do_maps_open(inode, file, &proc_pid_smaps_op); |
490 | } | 490 | } |
491 | 491 | ||
492 | const struct file_operations proc_smaps_operations = { | 492 | const struct file_operations proc_smaps_operations = { |
493 | .open = smaps_open, | 493 | .open = smaps_open, |
494 | .read = seq_read, | 494 | .read = seq_read, |
495 | .llseek = seq_lseek, | 495 | .llseek = seq_lseek, |
496 | .release = seq_release_private, | 496 | .release = seq_release_private, |
497 | }; | 497 | }; |
498 | 498 | ||
499 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 499 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
500 | unsigned long end, struct mm_walk *walk) | 500 | unsigned long end, struct mm_walk *walk) |
501 | { | 501 | { |
502 | struct vm_area_struct *vma = walk->private; | 502 | struct vm_area_struct *vma = walk->private; |
503 | pte_t *pte, ptent; | 503 | pte_t *pte, ptent; |
504 | spinlock_t *ptl; | 504 | spinlock_t *ptl; |
505 | struct page *page; | 505 | struct page *page; |
506 | 506 | ||
507 | split_huge_page_pmd(walk->mm, pmd); | 507 | split_huge_page_pmd(walk->mm, pmd); |
508 | 508 | ||
509 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 509 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
510 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 510 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
511 | ptent = *pte; | 511 | ptent = *pte; |
512 | if (!pte_present(ptent)) | 512 | if (!pte_present(ptent)) |
513 | continue; | 513 | continue; |
514 | 514 | ||
515 | page = vm_normal_page(vma, addr, ptent); | 515 | page = vm_normal_page(vma, addr, ptent); |
516 | if (!page) | 516 | if (!page) |
517 | continue; | 517 | continue; |
518 | 518 | ||
519 | /* Clear accessed and referenced bits. */ | 519 | /* Clear accessed and referenced bits. */ |
520 | ptep_test_and_clear_young(vma, addr, pte); | 520 | ptep_test_and_clear_young(vma, addr, pte); |
521 | ClearPageReferenced(page); | 521 | ClearPageReferenced(page); |
522 | } | 522 | } |
523 | pte_unmap_unlock(pte - 1, ptl); | 523 | pte_unmap_unlock(pte - 1, ptl); |
524 | cond_resched(); | 524 | cond_resched(); |
525 | return 0; | 525 | return 0; |
526 | } | 526 | } |
527 | 527 | ||
528 | #define CLEAR_REFS_ALL 1 | 528 | #define CLEAR_REFS_ALL 1 |
529 | #define CLEAR_REFS_ANON 2 | 529 | #define CLEAR_REFS_ANON 2 |
530 | #define CLEAR_REFS_MAPPED 3 | 530 | #define CLEAR_REFS_MAPPED 3 |
531 | 531 | ||
532 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 532 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
533 | size_t count, loff_t *ppos) | 533 | size_t count, loff_t *ppos) |
534 | { | 534 | { |
535 | struct task_struct *task; | 535 | struct task_struct *task; |
536 | char buffer[PROC_NUMBUF]; | 536 | char buffer[PROC_NUMBUF]; |
537 | struct mm_struct *mm; | 537 | struct mm_struct *mm; |
538 | struct vm_area_struct *vma; | 538 | struct vm_area_struct *vma; |
539 | long type; | 539 | int type; |
540 | int rv; | ||
540 | 541 | ||
541 | memset(buffer, 0, sizeof(buffer)); | 542 | memset(buffer, 0, sizeof(buffer)); |
542 | if (count > sizeof(buffer) - 1) | 543 | if (count > sizeof(buffer) - 1) |
543 | count = sizeof(buffer) - 1; | 544 | count = sizeof(buffer) - 1; |
544 | if (copy_from_user(buffer, buf, count)) | 545 | if (copy_from_user(buffer, buf, count)) |
545 | return -EFAULT; | 546 | return -EFAULT; |
546 | if (strict_strtol(strstrip(buffer), 10, &type)) | 547 | rv = kstrtoint(strstrip(buffer), 10, &type); |
547 | return -EINVAL; | 548 | if (rv < 0) |
549 | return rv; | ||
548 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) | 550 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) |
549 | return -EINVAL; | 551 | return -EINVAL; |
550 | task = get_proc_task(file->f_path.dentry->d_inode); | 552 | task = get_proc_task(file->f_path.dentry->d_inode); |
551 | if (!task) | 553 | if (!task) |
552 | return -ESRCH; | 554 | return -ESRCH; |
553 | mm = get_task_mm(task); | 555 | mm = get_task_mm(task); |
554 | if (mm) { | 556 | if (mm) { |
555 | struct mm_walk clear_refs_walk = { | 557 | struct mm_walk clear_refs_walk = { |
556 | .pmd_entry = clear_refs_pte_range, | 558 | .pmd_entry = clear_refs_pte_range, |
557 | .mm = mm, | 559 | .mm = mm, |
558 | }; | 560 | }; |
559 | down_read(&mm->mmap_sem); | 561 | down_read(&mm->mmap_sem); |
560 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 562 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
561 | clear_refs_walk.private = vma; | 563 | clear_refs_walk.private = vma; |
562 | if (is_vm_hugetlb_page(vma)) | 564 | if (is_vm_hugetlb_page(vma)) |
563 | continue; | 565 | continue; |
564 | /* | 566 | /* |
565 | * Writing 1 to /proc/pid/clear_refs affects all pages. | 567 | * Writing 1 to /proc/pid/clear_refs affects all pages. |
566 | * | 568 | * |
567 | * Writing 2 to /proc/pid/clear_refs only affects | 569 | * Writing 2 to /proc/pid/clear_refs only affects |
568 | * Anonymous pages. | 570 | * Anonymous pages. |
569 | * | 571 | * |
570 | * Writing 3 to /proc/pid/clear_refs only affects file | 572 | * Writing 3 to /proc/pid/clear_refs only affects file |
571 | * mapped pages. | 573 | * mapped pages. |
572 | */ | 574 | */ |
573 | if (type == CLEAR_REFS_ANON && vma->vm_file) | 575 | if (type == CLEAR_REFS_ANON && vma->vm_file) |
574 | continue; | 576 | continue; |
575 | if (type == CLEAR_REFS_MAPPED && !vma->vm_file) | 577 | if (type == CLEAR_REFS_MAPPED && !vma->vm_file) |
576 | continue; | 578 | continue; |
577 | walk_page_range(vma->vm_start, vma->vm_end, | 579 | walk_page_range(vma->vm_start, vma->vm_end, |
578 | &clear_refs_walk); | 580 | &clear_refs_walk); |
579 | } | 581 | } |
580 | flush_tlb_mm(mm); | 582 | flush_tlb_mm(mm); |
581 | up_read(&mm->mmap_sem); | 583 | up_read(&mm->mmap_sem); |
582 | mmput(mm); | 584 | mmput(mm); |
583 | } | 585 | } |
584 | put_task_struct(task); | 586 | put_task_struct(task); |
585 | 587 | ||
586 | return count; | 588 | return count; |
587 | } | 589 | } |
588 | 590 | ||
589 | const struct file_operations proc_clear_refs_operations = { | 591 | const struct file_operations proc_clear_refs_operations = { |
590 | .write = clear_refs_write, | 592 | .write = clear_refs_write, |
591 | .llseek = noop_llseek, | 593 | .llseek = noop_llseek, |
592 | }; | 594 | }; |
593 | 595 | ||
594 | struct pagemapread { | 596 | struct pagemapread { |
595 | int pos, len; | 597 | int pos, len; |
596 | u64 *buffer; | 598 | u64 *buffer; |
597 | }; | 599 | }; |
598 | 600 | ||
599 | #define PM_ENTRY_BYTES sizeof(u64) | 601 | #define PM_ENTRY_BYTES sizeof(u64) |
600 | #define PM_STATUS_BITS 3 | 602 | #define PM_STATUS_BITS 3 |
601 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | 603 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) |
602 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | 604 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) |
603 | #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) | 605 | #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) |
604 | #define PM_PSHIFT_BITS 6 | 606 | #define PM_PSHIFT_BITS 6 |
605 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | 607 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) |
606 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | 608 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) |
607 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | 609 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) |
608 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | 610 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) |
609 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | 611 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) |
610 | 612 | ||
611 | #define PM_PRESENT PM_STATUS(4LL) | 613 | #define PM_PRESENT PM_STATUS(4LL) |
612 | #define PM_SWAP PM_STATUS(2LL) | 614 | #define PM_SWAP PM_STATUS(2LL) |
613 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 615 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) |
614 | #define PM_END_OF_BUFFER 1 | 616 | #define PM_END_OF_BUFFER 1 |
615 | 617 | ||
616 | static int add_to_pagemap(unsigned long addr, u64 pfn, | 618 | static int add_to_pagemap(unsigned long addr, u64 pfn, |
617 | struct pagemapread *pm) | 619 | struct pagemapread *pm) |
618 | { | 620 | { |
619 | pm->buffer[pm->pos++] = pfn; | 621 | pm->buffer[pm->pos++] = pfn; |
620 | if (pm->pos >= pm->len) | 622 | if (pm->pos >= pm->len) |
621 | return PM_END_OF_BUFFER; | 623 | return PM_END_OF_BUFFER; |
622 | return 0; | 624 | return 0; |
623 | } | 625 | } |
624 | 626 | ||
625 | static int pagemap_pte_hole(unsigned long start, unsigned long end, | 627 | static int pagemap_pte_hole(unsigned long start, unsigned long end, |
626 | struct mm_walk *walk) | 628 | struct mm_walk *walk) |
627 | { | 629 | { |
628 | struct pagemapread *pm = walk->private; | 630 | struct pagemapread *pm = walk->private; |
629 | unsigned long addr; | 631 | unsigned long addr; |
630 | int err = 0; | 632 | int err = 0; |
631 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 633 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
632 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); | 634 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); |
633 | if (err) | 635 | if (err) |
634 | break; | 636 | break; |
635 | } | 637 | } |
636 | return err; | 638 | return err; |
637 | } | 639 | } |
638 | 640 | ||
639 | static u64 swap_pte_to_pagemap_entry(pte_t pte) | 641 | static u64 swap_pte_to_pagemap_entry(pte_t pte) |
640 | { | 642 | { |
641 | swp_entry_t e = pte_to_swp_entry(pte); | 643 | swp_entry_t e = pte_to_swp_entry(pte); |
642 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); | 644 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); |
643 | } | 645 | } |
644 | 646 | ||
645 | static u64 pte_to_pagemap_entry(pte_t pte) | 647 | static u64 pte_to_pagemap_entry(pte_t pte) |
646 | { | 648 | { |
647 | u64 pme = 0; | 649 | u64 pme = 0; |
648 | if (is_swap_pte(pte)) | 650 | if (is_swap_pte(pte)) |
649 | pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | 651 | pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) |
650 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | 652 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; |
651 | else if (pte_present(pte)) | 653 | else if (pte_present(pte)) |
652 | pme = PM_PFRAME(pte_pfn(pte)) | 654 | pme = PM_PFRAME(pte_pfn(pte)) |
653 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | 655 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; |
654 | return pme; | 656 | return pme; |
655 | } | 657 | } |
656 | 658 | ||
657 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 659 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
658 | struct mm_walk *walk) | 660 | struct mm_walk *walk) |
659 | { | 661 | { |
660 | struct vm_area_struct *vma; | 662 | struct vm_area_struct *vma; |
661 | struct pagemapread *pm = walk->private; | 663 | struct pagemapread *pm = walk->private; |
662 | pte_t *pte; | 664 | pte_t *pte; |
663 | int err = 0; | 665 | int err = 0; |
664 | 666 | ||
665 | split_huge_page_pmd(walk->mm, pmd); | 667 | split_huge_page_pmd(walk->mm, pmd); |
666 | 668 | ||
667 | /* find the first VMA at or above 'addr' */ | 669 | /* find the first VMA at or above 'addr' */ |
668 | vma = find_vma(walk->mm, addr); | 670 | vma = find_vma(walk->mm, addr); |
669 | for (; addr != end; addr += PAGE_SIZE) { | 671 | for (; addr != end; addr += PAGE_SIZE) { |
670 | u64 pfn = PM_NOT_PRESENT; | 672 | u64 pfn = PM_NOT_PRESENT; |
671 | 673 | ||
672 | /* check to see if we've left 'vma' behind | 674 | /* check to see if we've left 'vma' behind |
673 | * and need a new, higher one */ | 675 | * and need a new, higher one */ |
674 | if (vma && (addr >= vma->vm_end)) | 676 | if (vma && (addr >= vma->vm_end)) |
675 | vma = find_vma(walk->mm, addr); | 677 | vma = find_vma(walk->mm, addr); |
676 | 678 | ||
677 | /* check that 'vma' actually covers this address, | 679 | /* check that 'vma' actually covers this address, |
678 | * and that it isn't a huge page vma */ | 680 | * and that it isn't a huge page vma */ |
679 | if (vma && (vma->vm_start <= addr) && | 681 | if (vma && (vma->vm_start <= addr) && |
680 | !is_vm_hugetlb_page(vma)) { | 682 | !is_vm_hugetlb_page(vma)) { |
681 | pte = pte_offset_map(pmd, addr); | 683 | pte = pte_offset_map(pmd, addr); |
682 | pfn = pte_to_pagemap_entry(*pte); | 684 | pfn = pte_to_pagemap_entry(*pte); |
683 | /* unmap before userspace copy */ | 685 | /* unmap before userspace copy */ |
684 | pte_unmap(pte); | 686 | pte_unmap(pte); |
685 | } | 687 | } |
686 | err = add_to_pagemap(addr, pfn, pm); | 688 | err = add_to_pagemap(addr, pfn, pm); |
687 | if (err) | 689 | if (err) |
688 | return err; | 690 | return err; |
689 | } | 691 | } |
690 | 692 | ||
691 | cond_resched(); | 693 | cond_resched(); |
692 | 694 | ||
693 | return err; | 695 | return err; |
694 | } | 696 | } |
695 | 697 | ||
696 | #ifdef CONFIG_HUGETLB_PAGE | 698 | #ifdef CONFIG_HUGETLB_PAGE |
697 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) | 699 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) |
698 | { | 700 | { |
699 | u64 pme = 0; | 701 | u64 pme = 0; |
700 | if (pte_present(pte)) | 702 | if (pte_present(pte)) |
701 | pme = PM_PFRAME(pte_pfn(pte) + offset) | 703 | pme = PM_PFRAME(pte_pfn(pte) + offset) |
702 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | 704 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; |
703 | return pme; | 705 | return pme; |
704 | } | 706 | } |
705 | 707 | ||
706 | /* This function walks within one hugetlb entry in the single call */ | 708 | /* This function walks within one hugetlb entry in the single call */ |
707 | static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | 709 | static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, |
708 | unsigned long addr, unsigned long end, | 710 | unsigned long addr, unsigned long end, |
709 | struct mm_walk *walk) | 711 | struct mm_walk *walk) |
710 | { | 712 | { |
711 | struct pagemapread *pm = walk->private; | 713 | struct pagemapread *pm = walk->private; |
712 | int err = 0; | 714 | int err = 0; |
713 | u64 pfn; | 715 | u64 pfn; |
714 | 716 | ||
715 | for (; addr != end; addr += PAGE_SIZE) { | 717 | for (; addr != end; addr += PAGE_SIZE) { |
716 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 718 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
717 | pfn = huge_pte_to_pagemap_entry(*pte, offset); | 719 | pfn = huge_pte_to_pagemap_entry(*pte, offset); |
718 | err = add_to_pagemap(addr, pfn, pm); | 720 | err = add_to_pagemap(addr, pfn, pm); |
719 | if (err) | 721 | if (err) |
720 | return err; | 722 | return err; |
721 | } | 723 | } |
722 | 724 | ||
723 | cond_resched(); | 725 | cond_resched(); |
724 | 726 | ||
725 | return err; | 727 | return err; |
726 | } | 728 | } |
727 | #endif /* HUGETLB_PAGE */ | 729 | #endif /* HUGETLB_PAGE */ |
728 | 730 | ||
729 | /* | 731 | /* |
730 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | 732 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
731 | * | 733 | * |
732 | * For each page in the address space, this file contains one 64-bit entry | 734 | * For each page in the address space, this file contains one 64-bit entry |
733 | * consisting of the following: | 735 | * consisting of the following: |
734 | * | 736 | * |
735 | * Bits 0-55 page frame number (PFN) if present | 737 | * Bits 0-55 page frame number (PFN) if present |
736 | * Bits 0-4 swap type if swapped | 738 | * Bits 0-4 swap type if swapped |
737 | * Bits 5-55 swap offset if swapped | 739 | * Bits 5-55 swap offset if swapped |
738 | * Bits 55-60 page shift (page size = 1<<page shift) | 740 | * Bits 55-60 page shift (page size = 1<<page shift) |
739 | * Bit 61 reserved for future use | 741 | * Bit 61 reserved for future use |
740 | * Bit 62 page swapped | 742 | * Bit 62 page swapped |
741 | * Bit 63 page present | 743 | * Bit 63 page present |
742 | * | 744 | * |
743 | * If the page is not present but in swap, then the PFN contains an | 745 | * If the page is not present but in swap, then the PFN contains an |
744 | * encoding of the swap file number and the page's offset into the | 746 | * encoding of the swap file number and the page's offset into the |
745 | * swap. Unmapped pages return a null PFN. This allows determining | 747 | * swap. Unmapped pages return a null PFN. This allows determining |
746 | * precisely which pages are mapped (or in swap) and comparing mapped | 748 | * precisely which pages are mapped (or in swap) and comparing mapped |
747 | * pages between processes. | 749 | * pages between processes. |
748 | * | 750 | * |
749 | * Efficient users of this interface will use /proc/pid/maps to | 751 | * Efficient users of this interface will use /proc/pid/maps to |
750 | * determine which areas of memory are actually mapped and llseek to | 752 | * determine which areas of memory are actually mapped and llseek to |
751 | * skip over unmapped regions. | 753 | * skip over unmapped regions. |
752 | */ | 754 | */ |
753 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 755 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
754 | #define PAGEMAP_WALK_MASK (PMD_MASK) | 756 | #define PAGEMAP_WALK_MASK (PMD_MASK) |
755 | static ssize_t pagemap_read(struct file *file, char __user *buf, | 757 | static ssize_t pagemap_read(struct file *file, char __user *buf, |
756 | size_t count, loff_t *ppos) | 758 | size_t count, loff_t *ppos) |
757 | { | 759 | { |
758 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 760 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
759 | struct mm_struct *mm; | 761 | struct mm_struct *mm; |
760 | struct pagemapread pm; | 762 | struct pagemapread pm; |
761 | int ret = -ESRCH; | 763 | int ret = -ESRCH; |
762 | struct mm_walk pagemap_walk = {}; | 764 | struct mm_walk pagemap_walk = {}; |
763 | unsigned long src; | 765 | unsigned long src; |
764 | unsigned long svpfn; | 766 | unsigned long svpfn; |
765 | unsigned long start_vaddr; | 767 | unsigned long start_vaddr; |
766 | unsigned long end_vaddr; | 768 | unsigned long end_vaddr; |
767 | int copied = 0; | 769 | int copied = 0; |
768 | 770 | ||
769 | if (!task) | 771 | if (!task) |
770 | goto out; | 772 | goto out; |
771 | 773 | ||
772 | mm = mm_for_maps(task); | 774 | mm = mm_for_maps(task); |
773 | ret = PTR_ERR(mm); | 775 | ret = PTR_ERR(mm); |
774 | if (!mm || IS_ERR(mm)) | 776 | if (!mm || IS_ERR(mm)) |
775 | goto out_task; | 777 | goto out_task; |
776 | 778 | ||
777 | ret = -EINVAL; | 779 | ret = -EINVAL; |
778 | /* file position must be aligned */ | 780 | /* file position must be aligned */ |
779 | if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) | 781 | if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) |
780 | goto out_task; | 782 | goto out_task; |
781 | 783 | ||
782 | ret = 0; | 784 | ret = 0; |
783 | 785 | ||
784 | if (!count) | 786 | if (!count) |
785 | goto out_task; | 787 | goto out_task; |
786 | 788 | ||
787 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 789 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
788 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 790 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); |
789 | ret = -ENOMEM; | 791 | ret = -ENOMEM; |
790 | if (!pm.buffer) | 792 | if (!pm.buffer) |
791 | goto out_mm; | 793 | goto out_mm; |
792 | 794 | ||
793 | pagemap_walk.pmd_entry = pagemap_pte_range; | 795 | pagemap_walk.pmd_entry = pagemap_pte_range; |
794 | pagemap_walk.pte_hole = pagemap_pte_hole; | 796 | pagemap_walk.pte_hole = pagemap_pte_hole; |
795 | #ifdef CONFIG_HUGETLB_PAGE | 797 | #ifdef CONFIG_HUGETLB_PAGE |
796 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; | 798 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; |
797 | #endif | 799 | #endif |
798 | pagemap_walk.mm = mm; | 800 | pagemap_walk.mm = mm; |
799 | pagemap_walk.private = ± | 801 | pagemap_walk.private = ± |
800 | 802 | ||
801 | src = *ppos; | 803 | src = *ppos; |
802 | svpfn = src / PM_ENTRY_BYTES; | 804 | svpfn = src / PM_ENTRY_BYTES; |
803 | start_vaddr = svpfn << PAGE_SHIFT; | 805 | start_vaddr = svpfn << PAGE_SHIFT; |
804 | end_vaddr = TASK_SIZE_OF(task); | 806 | end_vaddr = TASK_SIZE_OF(task); |
805 | 807 | ||
806 | /* watch out for wraparound */ | 808 | /* watch out for wraparound */ |
807 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | 809 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) |
808 | start_vaddr = end_vaddr; | 810 | start_vaddr = end_vaddr; |
809 | 811 | ||
810 | /* | 812 | /* |
811 | * The odds are that this will stop walking way | 813 | * The odds are that this will stop walking way |
812 | * before end_vaddr, because the length of the | 814 | * before end_vaddr, because the length of the |
813 | * user buffer is tracked in "pm", and the walk | 815 | * user buffer is tracked in "pm", and the walk |
814 | * will stop when we hit the end of the buffer. | 816 | * will stop when we hit the end of the buffer. |
815 | */ | 817 | */ |
816 | ret = 0; | 818 | ret = 0; |
817 | while (count && (start_vaddr < end_vaddr)) { | 819 | while (count && (start_vaddr < end_vaddr)) { |
818 | int len; | 820 | int len; |
819 | unsigned long end; | 821 | unsigned long end; |
820 | 822 | ||
821 | pm.pos = 0; | 823 | pm.pos = 0; |
822 | end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; | 824 | end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; |
823 | /* overflow ? */ | 825 | /* overflow ? */ |
824 | if (end < start_vaddr || end > end_vaddr) | 826 | if (end < start_vaddr || end > end_vaddr) |
825 | end = end_vaddr; | 827 | end = end_vaddr; |
826 | down_read(&mm->mmap_sem); | 828 | down_read(&mm->mmap_sem); |
827 | ret = walk_page_range(start_vaddr, end, &pagemap_walk); | 829 | ret = walk_page_range(start_vaddr, end, &pagemap_walk); |
828 | up_read(&mm->mmap_sem); | 830 | up_read(&mm->mmap_sem); |
829 | start_vaddr = end; | 831 | start_vaddr = end; |
830 | 832 | ||
831 | len = min(count, PM_ENTRY_BYTES * pm.pos); | 833 | len = min(count, PM_ENTRY_BYTES * pm.pos); |
832 | if (copy_to_user(buf, pm.buffer, len)) { | 834 | if (copy_to_user(buf, pm.buffer, len)) { |
833 | ret = -EFAULT; | 835 | ret = -EFAULT; |
834 | goto out_free; | 836 | goto out_free; |
835 | } | 837 | } |
836 | copied += len; | 838 | copied += len; |
837 | buf += len; | 839 | buf += len; |
838 | count -= len; | 840 | count -= len; |
839 | } | 841 | } |
840 | *ppos += copied; | 842 | *ppos += copied; |
841 | if (!ret || ret == PM_END_OF_BUFFER) | 843 | if (!ret || ret == PM_END_OF_BUFFER) |
842 | ret = copied; | 844 | ret = copied; |
843 | 845 | ||
844 | out_free: | 846 | out_free: |
845 | kfree(pm.buffer); | 847 | kfree(pm.buffer); |
846 | out_mm: | 848 | out_mm: |
847 | mmput(mm); | 849 | mmput(mm); |
848 | out_task: | 850 | out_task: |
849 | put_task_struct(task); | 851 | put_task_struct(task); |
850 | out: | 852 | out: |
851 | return ret; | 853 | return ret; |
852 | } | 854 | } |
853 | 855 | ||
854 | const struct file_operations proc_pagemap_operations = { | 856 | const struct file_operations proc_pagemap_operations = { |
855 | .llseek = mem_lseek, /* borrow this */ | 857 | .llseek = mem_lseek, /* borrow this */ |
856 | .read = pagemap_read, | 858 | .read = pagemap_read, |
857 | }; | 859 | }; |
858 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 860 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
859 | 861 | ||
860 | #ifdef CONFIG_NUMA | 862 | #ifdef CONFIG_NUMA |
861 | 863 | ||
862 | struct numa_maps { | 864 | struct numa_maps { |
863 | struct vm_area_struct *vma; | 865 | struct vm_area_struct *vma; |
864 | unsigned long pages; | 866 | unsigned long pages; |
865 | unsigned long anon; | 867 | unsigned long anon; |
866 | unsigned long active; | 868 | unsigned long active; |
867 | unsigned long writeback; | 869 | unsigned long writeback; |
868 | unsigned long mapcount_max; | 870 | unsigned long mapcount_max; |
869 | unsigned long dirty; | 871 | unsigned long dirty; |
870 | unsigned long swapcache; | 872 | unsigned long swapcache; |
871 | unsigned long node[MAX_NUMNODES]; | 873 | unsigned long node[MAX_NUMNODES]; |
872 | }; | 874 | }; |
873 | 875 | ||
874 | struct numa_maps_private { | 876 | struct numa_maps_private { |
875 | struct proc_maps_private proc_maps; | 877 | struct proc_maps_private proc_maps; |
876 | struct numa_maps md; | 878 | struct numa_maps md; |
877 | }; | 879 | }; |
878 | 880 | ||
879 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) | 881 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) |
880 | { | 882 | { |
881 | int count = page_mapcount(page); | 883 | int count = page_mapcount(page); |
882 | 884 | ||
883 | md->pages++; | 885 | md->pages++; |
884 | if (pte_dirty || PageDirty(page)) | 886 | if (pte_dirty || PageDirty(page)) |
885 | md->dirty++; | 887 | md->dirty++; |
886 | 888 | ||
887 | if (PageSwapCache(page)) | 889 | if (PageSwapCache(page)) |
888 | md->swapcache++; | 890 | md->swapcache++; |
889 | 891 | ||
890 | if (PageActive(page) || PageUnevictable(page)) | 892 | if (PageActive(page) || PageUnevictable(page)) |
891 | md->active++; | 893 | md->active++; |
892 | 894 | ||
893 | if (PageWriteback(page)) | 895 | if (PageWriteback(page)) |
894 | md->writeback++; | 896 | md->writeback++; |
895 | 897 | ||
896 | if (PageAnon(page)) | 898 | if (PageAnon(page)) |
897 | md->anon++; | 899 | md->anon++; |
898 | 900 | ||
899 | if (count > md->mapcount_max) | 901 | if (count > md->mapcount_max) |
900 | md->mapcount_max = count; | 902 | md->mapcount_max = count; |
901 | 903 | ||
902 | md->node[page_to_nid(page)]++; | 904 | md->node[page_to_nid(page)]++; |
903 | } | 905 | } |
904 | 906 | ||
905 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | 907 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, |
906 | unsigned long end, struct mm_walk *walk) | 908 | unsigned long end, struct mm_walk *walk) |
907 | { | 909 | { |
908 | struct numa_maps *md; | 910 | struct numa_maps *md; |
909 | spinlock_t *ptl; | 911 | spinlock_t *ptl; |
910 | pte_t *orig_pte; | 912 | pte_t *orig_pte; |
911 | pte_t *pte; | 913 | pte_t *pte; |
912 | 914 | ||
913 | md = walk->private; | 915 | md = walk->private; |
914 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | 916 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
915 | do { | 917 | do { |
916 | struct page *page; | 918 | struct page *page; |
917 | int nid; | 919 | int nid; |
918 | 920 | ||
919 | if (!pte_present(*pte)) | 921 | if (!pte_present(*pte)) |
920 | continue; | 922 | continue; |
921 | 923 | ||
922 | page = vm_normal_page(md->vma, addr, *pte); | 924 | page = vm_normal_page(md->vma, addr, *pte); |
923 | if (!page) | 925 | if (!page) |
924 | continue; | 926 | continue; |
925 | 927 | ||
926 | if (PageReserved(page)) | 928 | if (PageReserved(page)) |
927 | continue; | 929 | continue; |
928 | 930 | ||
929 | nid = page_to_nid(page); | 931 | nid = page_to_nid(page); |
930 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | 932 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) |
931 | continue; | 933 | continue; |
932 | 934 | ||
933 | gather_stats(page, md, pte_dirty(*pte)); | 935 | gather_stats(page, md, pte_dirty(*pte)); |
934 | 936 | ||
935 | } while (pte++, addr += PAGE_SIZE, addr != end); | 937 | } while (pte++, addr += PAGE_SIZE, addr != end); |
936 | pte_unmap_unlock(orig_pte, ptl); | 938 | pte_unmap_unlock(orig_pte, ptl); |
937 | return 0; | 939 | return 0; |
938 | } | 940 | } |
939 | #ifdef CONFIG_HUGETLB_PAGE | 941 | #ifdef CONFIG_HUGETLB_PAGE |
940 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | 942 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, |
941 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 943 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
942 | { | 944 | { |
943 | struct numa_maps *md; | 945 | struct numa_maps *md; |
944 | struct page *page; | 946 | struct page *page; |
945 | 947 | ||
946 | if (pte_none(*pte)) | 948 | if (pte_none(*pte)) |
947 | return 0; | 949 | return 0; |
948 | 950 | ||
949 | page = pte_page(*pte); | 951 | page = pte_page(*pte); |
950 | if (!page) | 952 | if (!page) |
951 | return 0; | 953 | return 0; |
952 | 954 | ||
953 | md = walk->private; | 955 | md = walk->private; |
954 | gather_stats(page, md, pte_dirty(*pte)); | 956 | gather_stats(page, md, pte_dirty(*pte)); |
955 | return 0; | 957 | return 0; |
956 | } | 958 | } |
957 | 959 | ||
958 | #else | 960 | #else |
959 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | 961 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, |
960 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 962 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
961 | { | 963 | { |
962 | return 0; | 964 | return 0; |
963 | } | 965 | } |
964 | #endif | 966 | #endif |
965 | 967 | ||
966 | /* | 968 | /* |
967 | * Display pages allocated per node and memory policy via /proc. | 969 | * Display pages allocated per node and memory policy via /proc. |
968 | */ | 970 | */ |
969 | static int show_numa_map(struct seq_file *m, void *v) | 971 | static int show_numa_map(struct seq_file *m, void *v) |
970 | { | 972 | { |
971 | struct numa_maps_private *numa_priv = m->private; | 973 | struct numa_maps_private *numa_priv = m->private; |
972 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; | 974 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; |
973 | struct vm_area_struct *vma = v; | 975 | struct vm_area_struct *vma = v; |
974 | struct numa_maps *md = &numa_priv->md; | 976 | struct numa_maps *md = &numa_priv->md; |
975 | struct file *file = vma->vm_file; | 977 | struct file *file = vma->vm_file; |
976 | struct mm_struct *mm = vma->vm_mm; | 978 | struct mm_struct *mm = vma->vm_mm; |
977 | struct mm_walk walk = {}; | 979 | struct mm_walk walk = {}; |
978 | struct mempolicy *pol; | 980 | struct mempolicy *pol; |
979 | int n; | 981 | int n; |
980 | char buffer[50]; | 982 | char buffer[50]; |
981 | 983 | ||
982 | if (!mm) | 984 | if (!mm) |
983 | return 0; | 985 | return 0; |
984 | 986 | ||
985 | /* Ensure we start with an empty set of numa_maps statistics. */ | 987 | /* Ensure we start with an empty set of numa_maps statistics. */ |
986 | memset(md, 0, sizeof(*md)); | 988 | memset(md, 0, sizeof(*md)); |
987 | 989 | ||
988 | md->vma = vma; | 990 | md->vma = vma; |
989 | 991 | ||
990 | walk.hugetlb_entry = gather_hugetbl_stats; | 992 | walk.hugetlb_entry = gather_hugetbl_stats; |
991 | walk.pmd_entry = gather_pte_stats; | 993 | walk.pmd_entry = gather_pte_stats; |
992 | walk.private = md; | 994 | walk.private = md; |
993 | walk.mm = mm; | 995 | walk.mm = mm; |
994 | 996 | ||
995 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); | 997 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); |
996 | mpol_to_str(buffer, sizeof(buffer), pol, 0); | 998 | mpol_to_str(buffer, sizeof(buffer), pol, 0); |
997 | mpol_cond_put(pol); | 999 | mpol_cond_put(pol); |
998 | 1000 | ||
999 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1001 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1000 | 1002 | ||
1001 | if (file) { | 1003 | if (file) { |
1002 | seq_printf(m, " file="); | 1004 | seq_printf(m, " file="); |
1003 | seq_path(m, &file->f_path, "\n\t= "); | 1005 | seq_path(m, &file->f_path, "\n\t= "); |
1004 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1006 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
1005 | seq_printf(m, " heap"); | 1007 | seq_printf(m, " heap"); |
1006 | } else if (vma->vm_start <= mm->start_stack && | 1008 | } else if (vma->vm_start <= mm->start_stack && |
1007 | vma->vm_end >= mm->start_stack) { | 1009 | vma->vm_end >= mm->start_stack) { |
1008 | seq_printf(m, " stack"); | 1010 | seq_printf(m, " stack"); |
1009 | } | 1011 | } |
1010 | 1012 | ||
1011 | walk_page_range(vma->vm_start, vma->vm_end, &walk); | 1013 | walk_page_range(vma->vm_start, vma->vm_end, &walk); |
1012 | 1014 | ||
1013 | if (!md->pages) | 1015 | if (!md->pages) |
1014 | goto out; | 1016 | goto out; |
1015 | 1017 | ||
1016 | if (md->anon) | 1018 | if (md->anon) |
1017 | seq_printf(m, " anon=%lu", md->anon); | 1019 | seq_printf(m, " anon=%lu", md->anon); |
1018 | 1020 | ||
1019 | if (md->dirty) | 1021 | if (md->dirty) |
1020 | seq_printf(m, " dirty=%lu", md->dirty); | 1022 | seq_printf(m, " dirty=%lu", md->dirty); |
1021 | 1023 | ||
1022 | if (md->pages != md->anon && md->pages != md->dirty) | 1024 | if (md->pages != md->anon && md->pages != md->dirty) |
1023 | seq_printf(m, " mapped=%lu", md->pages); | 1025 | seq_printf(m, " mapped=%lu", md->pages); |
1024 | 1026 | ||
1025 | if (md->mapcount_max > 1) | 1027 | if (md->mapcount_max > 1) |
1026 | seq_printf(m, " mapmax=%lu", md->mapcount_max); | 1028 | seq_printf(m, " mapmax=%lu", md->mapcount_max); |
1027 | 1029 | ||
1028 | if (md->swapcache) | 1030 | if (md->swapcache) |
1029 | seq_printf(m, " swapcache=%lu", md->swapcache); | 1031 | seq_printf(m, " swapcache=%lu", md->swapcache); |
1030 | 1032 | ||
1031 | if (md->active < md->pages && !is_vm_hugetlb_page(vma)) | 1033 | if (md->active < md->pages && !is_vm_hugetlb_page(vma)) |
1032 | seq_printf(m, " active=%lu", md->active); | 1034 | seq_printf(m, " active=%lu", md->active); |
1033 | 1035 | ||
1034 | if (md->writeback) | 1036 | if (md->writeback) |
1035 | seq_printf(m, " writeback=%lu", md->writeback); | 1037 | seq_printf(m, " writeback=%lu", md->writeback); |
1036 | 1038 | ||
1037 | for_each_node_state(n, N_HIGH_MEMORY) | 1039 | for_each_node_state(n, N_HIGH_MEMORY) |
1038 | if (md->node[n]) | 1040 | if (md->node[n]) |
1039 | seq_printf(m, " N%d=%lu", n, md->node[n]); | 1041 | seq_printf(m, " N%d=%lu", n, md->node[n]); |
1040 | out: | 1042 | out: |
1041 | seq_putc(m, '\n'); | 1043 | seq_putc(m, '\n'); |
1042 | 1044 | ||
1043 | if (m->count < m->size) | 1045 | if (m->count < m->size) |
1044 | m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; | 1046 | m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; |
1045 | return 0; | 1047 | return 0; |
1046 | } | 1048 | } |
1047 | 1049 | ||
1048 | static const struct seq_operations proc_pid_numa_maps_op = { | 1050 | static const struct seq_operations proc_pid_numa_maps_op = { |
1049 | .start = m_start, | 1051 | .start = m_start, |
1050 | .next = m_next, | 1052 | .next = m_next, |
1051 | .stop = m_stop, | 1053 | .stop = m_stop, |
1052 | .show = show_numa_map, | 1054 | .show = show_numa_map, |
1053 | }; | 1055 | }; |
1054 | 1056 | ||
1055 | static int numa_maps_open(struct inode *inode, struct file *file) | 1057 | static int numa_maps_open(struct inode *inode, struct file *file) |
1056 | { | 1058 | { |
1057 | struct numa_maps_private *priv; | 1059 | struct numa_maps_private *priv; |
1058 | int ret = -ENOMEM; | 1060 | int ret = -ENOMEM; |
1059 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 1061 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
1060 | if (priv) { | 1062 | if (priv) { |
1061 | priv->proc_maps.pid = proc_pid(inode); | 1063 | priv->proc_maps.pid = proc_pid(inode); |
1062 | ret = seq_open(file, &proc_pid_numa_maps_op); | 1064 | ret = seq_open(file, &proc_pid_numa_maps_op); |
1063 | if (!ret) { | 1065 | if (!ret) { |
1064 | struct seq_file *m = file->private_data; | 1066 | struct seq_file *m = file->private_data; |
1065 | m->private = priv; | 1067 | m->private = priv; |
1066 | } else { | 1068 | } else { |
1067 | kfree(priv); | 1069 | kfree(priv); |
1068 | } | 1070 | } |
1069 | } | 1071 | } |
1070 | return ret; | 1072 | return ret; |
1071 | } | 1073 | } |
1072 | 1074 | ||
1073 | const struct file_operations proc_numa_maps_operations = { | 1075 | const struct file_operations proc_numa_maps_operations = { |
1074 | .open = numa_maps_open, | 1076 | .open = numa_maps_open, |
1075 | .read = seq_read, | 1077 | .read = seq_read, |
1076 | .llseek = seq_lseek, | 1078 | .llseek = seq_lseek, |
1077 | .release = seq_release_private, | 1079 | .release = seq_release_private, |
1078 | }; | 1080 | }; |
1079 | #endif /* CONFIG_NUMA */ | 1081 | #endif /* CONFIG_NUMA */ |
1080 | 1082 |