Blame view
fs/kernfs/file.c
25.8 KB
55716d264
|
1 |
// SPDX-License-Identifier: GPL-2.0-only |
b8441ed27
|
2 3 4 5 6 7 |
/* * fs/kernfs/file.c - kernfs file implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> |
b8441ed27
|
8 |
*/ |
414985ae2
|
9 10 11 12 13 14 |
#include <linux/fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/pagemap.h> |
589ee6284
|
15 |
#include <linux/sched/mm.h> |
d911d9874
|
16 |
#include <linux/fsnotify.h> |
414985ae2
|
17 18 19 20 |
#include "kernfs-internal.h" /* |
c525aaddc
|
21 |
* There's one kernfs_open_file for each open file and one kernfs_open_node |
324a56e16
|
22 |
* for each kernfs_node with one or more open files. |
414985ae2
|
23 |
* |
c525aaddc
|
24 25 |
* kernfs_node->attr.open points to kernfs_open_node. attr.open is * protected by kernfs_open_node_lock. |
414985ae2
|
26 27 |
* * filp->private_data points to seq_file whose ->private points to |
c525aaddc
|
28 29 |
* kernfs_open_file. kernfs_open_files are chained at * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. |
414985ae2
|
30 |
*/ |
c525aaddc
|
31 32 |
static DEFINE_SPINLOCK(kernfs_open_node_lock); static DEFINE_MUTEX(kernfs_open_file_mutex); |
414985ae2
|
33 |
|
c525aaddc
|
34 |
struct kernfs_open_node { |
414985ae2
|
35 36 37 |
atomic_t refcnt; atomic_t event; wait_queue_head_t poll; |
c525aaddc
|
38 |
struct list_head files; /* goes through kernfs_open_file.list */ |
414985ae2
|
39 |
}; |
ecca47ce8
|
40 41 42 43 44 45 46 47 48 49 50 51 |
/* * kernfs_notify() may be called from any context and bounces notifications * through a work item. To minimize space overhead in kernfs_node, the * pending queue is implemented as a singly linked list of kernfs_nodes. * The list is terminated with the self pointer so that whether a * kernfs_node is on the list or not can be determined by testing the next * pointer for NULL. */ #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) static DEFINE_SPINLOCK(kernfs_notify_lock); static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; |
c525aaddc
|
52 |
static struct kernfs_open_file *kernfs_of(struct file *file) |
414985ae2
|
53 54 55 56 57 |
{ return ((struct seq_file *)file->private_data)->private; } /* |
324a56e16
|
58 |
* Determine the kernfs_ops for the given kernfs_node. This function must |
414985ae2
|
59 60 |
* be called while holding an active reference. */ |
324a56e16
|
61 |
static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) |
414985ae2
|
62 |
{ |
df23fc39b
|
63 |
if (kn->flags & KERNFS_LOCKDEP) |
324a56e16
|
64 |
lockdep_assert_held(kn); |
adc5e8b58
|
65 |
return kn->attr.ops; |
414985ae2
|
66 |
} |
bb305947b
|
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
/* * As kernfs_seq_stop() is also called after kernfs_seq_start() or * kernfs_seq_next() failure, it needs to distinguish whether it's stopping * a seq_file iteration which is fully initialized with an active reference * or an aborted kernfs_seq_start() due to get_active failure. The * position pointer is the only context for each seq_file iteration and * thus the stop condition should be encoded in it. As the return value is * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable * choice to indicate get_active failure. * * Unfortunately, this is complicated due to the optional custom seq_file * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or * custom seq_file operations and thus can't decide whether put_active * should be performed or not only on ERR_PTR(-ENODEV). * * This is worked around by factoring out the custom seq_stop() and * put_active part into kernfs_seq_stop_active(), skipping it from * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures * that kernfs_seq_stop_active() is skipped only after get_active failure. */ static void kernfs_seq_stop_active(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; const struct kernfs_ops *ops = kernfs_ops(of->kn); if (ops->seq_stop) ops->seq_stop(sf, v); kernfs_put_active(of->kn); } |
414985ae2
|
98 99 |
static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) { |
c525aaddc
|
100 |
struct kernfs_open_file *of = sf->private; |
414985ae2
|
101 102 103 |
const struct kernfs_ops *ops; /* |
2b75869bb
|
104 |
* @of->mutex nests outside active ref and is primarily to ensure that |
414985ae2
|
105 106 107 |
* the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); |
c637b8acb
|
108 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
109 |
return ERR_PTR(-ENODEV); |
324a56e16
|
110 |
ops = kernfs_ops(of->kn); |
414985ae2
|
111 |
if (ops->seq_start) { |
bb305947b
|
112 113 114 115 116 |
void *next = ops->seq_start(sf, ppos); /* see the comment above kernfs_seq_stop_active() */ if (next == ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, next); return next; |
414985ae2
|
117 118 119 120 121 122 123 124 125 126 127 |
} else { /* * The same behavior and code as single_open(). Returns * !NULL if pos is at the beginning; otherwise, NULL. */ return NULL + !*ppos; } } static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) { |
c525aaddc
|
128 |
struct kernfs_open_file *of = sf->private; |
324a56e16
|
129 |
const struct kernfs_ops *ops = kernfs_ops(of->kn); |
414985ae2
|
130 131 |
if (ops->seq_next) { |
bb305947b
|
132 133 134 135 136 |
void *next = ops->seq_next(sf, v, ppos); /* see the comment above kernfs_seq_stop_active() */ if (next == ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, next); return next; |
414985ae2
|
137 138 139 140 141 142 143 144 145 146 147 148 |
} else { /* * The same behavior and code as single_open(), always * terminate after the initial read. */ ++*ppos; return NULL; } } static void kernfs_seq_stop(struct seq_file *sf, void *v) { |
c525aaddc
|
149 |
struct kernfs_open_file *of = sf->private; |
414985ae2
|
150 |
|
bb305947b
|
151 152 |
if (v != ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, v); |
414985ae2
|
153 154 155 156 157 |
mutex_unlock(&of->mutex); } static int kernfs_seq_show(struct seq_file *sf, void *v) { |
c525aaddc
|
158 |
struct kernfs_open_file *of = sf->private; |
414985ae2
|
159 |
|
adc5e8b58
|
160 |
of->event = atomic_read(&of->kn->attr.open->event); |
414985ae2
|
161 |
|
adc5e8b58
|
162 |
return of->kn->attr.ops->seq_show(sf, v); |
414985ae2
|
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
} static const struct seq_operations kernfs_seq_ops = { .start = kernfs_seq_start, .next = kernfs_seq_next, .stop = kernfs_seq_stop, .show = kernfs_seq_show, }; /* * As reading a bin file can have side-effects, the exact offset and bytes * specified in read(2) call should be passed to the read callback making * it difficult to use seq_file. Implement simplistic custom buffering for * bin files. */ |
c525aaddc
|
178 |
static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, |
414985ae2
|
179 180 181 182 183 184 |
char __user *user_buf, size_t count, loff_t *ppos) { ssize_t len = min_t(size_t, count, PAGE_SIZE); const struct kernfs_ops *ops; char *buf; |
4ef67a8c9
|
185 |
buf = of->prealloc_buf; |
e4234a1fc
|
186 187 188 |
if (buf) mutex_lock(&of->prealloc_mutex); else |
4ef67a8c9
|
189 |
buf = kmalloc(len, GFP_KERNEL); |
414985ae2
|
190 191 192 193 |
if (!buf) return -ENOMEM; /* |
4ef67a8c9
|
194 |
* @of->mutex nests outside active ref and is used both to ensure that |
e4234a1fc
|
195 |
* the ops aren't called concurrently for the same open file. |
414985ae2
|
196 197 |
*/ mutex_lock(&of->mutex); |
c637b8acb
|
198 |
if (!kernfs_get_active(of->kn)) { |
414985ae2
|
199 200 201 202 |
len = -ENODEV; mutex_unlock(&of->mutex); goto out_free; } |
7cff4b183
|
203 |
of->event = atomic_read(&of->kn->attr.open->event); |
324a56e16
|
204 |
ops = kernfs_ops(of->kn); |
414985ae2
|
205 206 207 208 |
if (ops->read) len = ops->read(of, buf, len, *ppos); else len = -EINVAL; |
e4234a1fc
|
209 210 |
kernfs_put_active(of->kn); mutex_unlock(&of->mutex); |
414985ae2
|
211 |
if (len < 0) |
e4234a1fc
|
212 |
goto out_free; |
414985ae2
|
213 214 215 |
if (copy_to_user(user_buf, buf, len)) { len = -EFAULT; |
e4234a1fc
|
216 |
goto out_free; |
414985ae2
|
217 218 219 220 221 |
} *ppos += len; out_free: |
e4234a1fc
|
222 223 224 |
if (buf == of->prealloc_buf) mutex_unlock(&of->prealloc_mutex); else |
4ef67a8c9
|
225 |
kfree(buf); |
414985ae2
|
226 227 228 229 |
return len; } /** |
c637b8acb
|
230 |
* kernfs_fop_read - kernfs vfs read callback |
414985ae2
|
231 232 233 234 235 |
* @file: file pointer * @user_buf: data to write * @count: number of bytes * @ppos: starting offset */ |
c637b8acb
|
236 237 |
static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) |
414985ae2
|
238 |
{ |
c525aaddc
|
239 |
struct kernfs_open_file *of = kernfs_of(file); |
414985ae2
|
240 |
|
df23fc39b
|
241 |
if (of->kn->flags & KERNFS_HAS_SEQ_SHOW) |
414985ae2
|
242 243 244 245 246 247 |
return seq_read(file, user_buf, count, ppos); else return kernfs_file_direct_read(of, user_buf, count, ppos); } /** |
c637b8acb
|
248 |
* kernfs_fop_write - kernfs vfs write callback |
414985ae2
|
249 250 251 252 253 254 255 256 257 258 259 260 261 262 |
* @file: file pointer * @user_buf: data to write * @count: number of bytes * @ppos: starting offset * * Copy data in from userland and pass it to the matching kernfs write * operation. * * There is no easy way for us to know if userspace is only doing a partial * write, so we don't support them. We expect the entire buffer to come on * the first write. Hint: if you're writing a value, first read the file, * modify only the the value you're changing, then write entire buffer * back. */ |
c637b8acb
|
263 264 |
static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) |
414985ae2
|
265 |
{ |
c525aaddc
|
266 |
struct kernfs_open_file *of = kernfs_of(file); |
414985ae2
|
267 |
const struct kernfs_ops *ops; |
ba87977a4
|
268 |
ssize_t len; |
b7ce40cff
|
269 |
char *buf; |
4d3773c4b
|
270 |
|
b7ce40cff
|
271 |
if (of->atomic_write_len) { |
4d3773c4b
|
272 |
len = count; |
b7ce40cff
|
273 274 |
if (len > of->atomic_write_len) return -E2BIG; |
4d3773c4b
|
275 276 277 |
} else { len = min_t(size_t, count, PAGE_SIZE); } |
2b75869bb
|
278 |
buf = of->prealloc_buf; |
e4234a1fc
|
279 280 281 |
if (buf) mutex_lock(&of->prealloc_mutex); else |
2b75869bb
|
282 |
buf = kmalloc(len + 1, GFP_KERNEL); |
b7ce40cff
|
283 284 |
if (!buf) return -ENOMEM; |
414985ae2
|
285 |
|
e4234a1fc
|
286 287 288 289 290 |
if (copy_from_user(buf, user_buf, len)) { len = -EFAULT; goto out_free; } buf[len] = '\0'; /* guarantee string termination */ |
b7ce40cff
|
291 |
/* |
2b75869bb
|
292 |
* @of->mutex nests outside active ref and is used both to ensure that |
e4234a1fc
|
293 |
* the ops aren't called concurrently for the same open file. |
b7ce40cff
|
294 295 296 297 298 299 300 301 302 303 304 305 306 |
*/ mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) { mutex_unlock(&of->mutex); len = -ENODEV; goto out_free; } ops = kernfs_ops(of->kn); if (ops->write) len = ops->write(of, buf, len, *ppos); else len = -EINVAL; |
e4234a1fc
|
307 308 |
kernfs_put_active(of->kn); mutex_unlock(&of->mutex); |
414985ae2
|
309 310 |
if (len > 0) *ppos += len; |
2b75869bb
|
311 |
|
b7ce40cff
|
312 |
out_free: |
e4234a1fc
|
313 314 315 |
if (buf == of->prealloc_buf) mutex_unlock(&of->prealloc_mutex); else |
2b75869bb
|
316 |
kfree(buf); |
414985ae2
|
317 318 319 320 321 322 |
return len; } static void kernfs_vma_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; |
c525aaddc
|
323 |
struct kernfs_open_file *of = kernfs_of(file); |
414985ae2
|
324 325 326 |
if (!of->vm_ops) return; |
c637b8acb
|
327 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
328 329 330 331 |
return; if (of->vm_ops->open) of->vm_ops->open(vma); |
c637b8acb
|
332 |
kernfs_put_active(of->kn); |
414985ae2
|
333 |
} |
9ee84466b
|
334 |
static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) |
414985ae2
|
335 |
{ |
11bac8000
|
336 |
struct file *file = vmf->vma->vm_file; |
c525aaddc
|
337 |
struct kernfs_open_file *of = kernfs_of(file); |
9ee84466b
|
338 |
vm_fault_t ret; |
414985ae2
|
339 340 341 |
if (!of->vm_ops) return VM_FAULT_SIGBUS; |
c637b8acb
|
342 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
343 344 345 346 |
return VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS; if (of->vm_ops->fault) |
11bac8000
|
347 |
ret = of->vm_ops->fault(vmf); |
414985ae2
|
348 |
|
c637b8acb
|
349 |
kernfs_put_active(of->kn); |
414985ae2
|
350 351 |
return ret; } |
9ee84466b
|
352 |
static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) |
414985ae2
|
353 |
{ |
11bac8000
|
354 |
struct file *file = vmf->vma->vm_file; |
c525aaddc
|
355 |
struct kernfs_open_file *of = kernfs_of(file); |
9ee84466b
|
356 |
vm_fault_t ret; |
414985ae2
|
357 358 359 |
if (!of->vm_ops) return VM_FAULT_SIGBUS; |
c637b8acb
|
360 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
361 362 363 364 |
return VM_FAULT_SIGBUS; ret = 0; if (of->vm_ops->page_mkwrite) |
11bac8000
|
365 |
ret = of->vm_ops->page_mkwrite(vmf); |
414985ae2
|
366 367 |
else file_update_time(file); |
c637b8acb
|
368 |
kernfs_put_active(of->kn); |
414985ae2
|
369 370 371 372 373 374 375 |
return ret; } static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { struct file *file = vma->vm_file; |
c525aaddc
|
376 |
struct kernfs_open_file *of = kernfs_of(file); |
414985ae2
|
377 378 379 380 |
int ret; if (!of->vm_ops) return -EINVAL; |
c637b8acb
|
381 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
382 383 384 385 386 |
return -EINVAL; ret = -EINVAL; if (of->vm_ops->access) ret = of->vm_ops->access(vma, addr, buf, len, write); |
c637b8acb
|
387 |
kernfs_put_active(of->kn); |
414985ae2
|
388 389 390 391 392 393 394 395 |
return ret; } #ifdef CONFIG_NUMA static int kernfs_vma_set_policy(struct vm_area_struct *vma, struct mempolicy *new) { struct file *file = vma->vm_file; |
c525aaddc
|
396 |
struct kernfs_open_file *of = kernfs_of(file); |
414985ae2
|
397 398 399 400 |
int ret; if (!of->vm_ops) return 0; |
c637b8acb
|
401 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
402 403 404 405 406 |
return -EINVAL; ret = 0; if (of->vm_ops->set_policy) ret = of->vm_ops->set_policy(vma, new); |
c637b8acb
|
407 |
kernfs_put_active(of->kn); |
414985ae2
|
408 409 410 411 412 413 414 |
return ret; } static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, unsigned long addr) { struct file *file = vma->vm_file; |
c525aaddc
|
415 |
struct kernfs_open_file *of = kernfs_of(file); |
414985ae2
|
416 417 418 419 |
struct mempolicy *pol; if (!of->vm_ops) return vma->vm_policy; |
c637b8acb
|
420 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
421 422 423 424 425 |
return vma->vm_policy; pol = vma->vm_policy; if (of->vm_ops->get_policy) pol = of->vm_ops->get_policy(vma, addr); |
c637b8acb
|
426 |
kernfs_put_active(of->kn); |
414985ae2
|
427 428 |
return pol; } |
414985ae2
|
429 430 431 432 433 434 435 436 437 438 |
#endif static const struct vm_operations_struct kernfs_vm_ops = { .open = kernfs_vma_open, .fault = kernfs_vma_fault, .page_mkwrite = kernfs_vma_page_mkwrite, .access = kernfs_vma_access, #ifdef CONFIG_NUMA .set_policy = kernfs_vma_set_policy, .get_policy = kernfs_vma_get_policy, |
414985ae2
|
439 440 |
#endif }; |
c637b8acb
|
441 |
static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) |
414985ae2
|
442 |
{ |
c525aaddc
|
443 |
struct kernfs_open_file *of = kernfs_of(file); |
414985ae2
|
444 445 |
const struct kernfs_ops *ops; int rc; |
9b2db6e18
|
446 447 448 449 450 451 452 |
/* * mmap path and of->mutex are prone to triggering spurious lockdep * warnings and we don't want to add spurious locking dependency * between the two. Check whether mmap is actually implemented * without grabbing @of->mutex by testing HAS_MMAP flag. See the * comment in kernfs_file_open() for more details. */ |
df23fc39b
|
453 |
if (!(of->kn->flags & KERNFS_HAS_MMAP)) |
9b2db6e18
|
454 |
return -ENODEV; |
414985ae2
|
455 456 457 |
mutex_lock(&of->mutex); rc = -ENODEV; |
c637b8acb
|
458 |
if (!kernfs_get_active(of->kn)) |
414985ae2
|
459 |
goto out_unlock; |
324a56e16
|
460 |
ops = kernfs_ops(of->kn); |
9b2db6e18
|
461 |
rc = ops->mmap(of, vma); |
b44b21402
|
462 463 |
if (rc) goto out_put; |
414985ae2
|
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 |
/* * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() * to satisfy versions of X which crash if the mmap fails: that * substitutes a new vm_file, and we don't then want bin_vm_ops. */ if (vma->vm_file != file) goto out_put; rc = -EINVAL; if (of->mmapped && of->vm_ops != vma->vm_ops) goto out_put; /* * It is not possible to successfully wrap close. * So error if someone is trying to use close. */ rc = -EINVAL; if (vma->vm_ops && vma->vm_ops->close) goto out_put; rc = 0; |
a1d82aff5
|
486 |
of->mmapped = true; |
414985ae2
|
487 488 489 |
of->vm_ops = vma->vm_ops; vma->vm_ops = &kernfs_vm_ops; out_put: |
c637b8acb
|
490 |
kernfs_put_active(of->kn); |
414985ae2
|
491 492 493 494 495 496 497 |
out_unlock: mutex_unlock(&of->mutex); return rc; } /** |
c637b8acb
|
498 |
* kernfs_get_open_node - get or create kernfs_open_node |
324a56e16
|
499 |
* @kn: target kernfs_node |
c525aaddc
|
500 |
* @of: kernfs_open_file for this instance of open |
414985ae2
|
501 |
* |
adc5e8b58
|
502 503 |
* If @kn->attr.open exists, increment its reference count; otherwise, * create one. @of is chained to the files list. |
414985ae2
|
504 505 506 507 508 509 510 |
* * LOCKING: * Kernel thread context (may sleep). * * RETURNS: * 0 on success, -errno on failure. */ |
c637b8acb
|
511 512 |
static int kernfs_get_open_node(struct kernfs_node *kn, struct kernfs_open_file *of) |
414985ae2
|
513 |
{ |
c525aaddc
|
514 |
struct kernfs_open_node *on, *new_on = NULL; |
414985ae2
|
515 516 |
retry: |
c525aaddc
|
517 518 |
mutex_lock(&kernfs_open_file_mutex); spin_lock_irq(&kernfs_open_node_lock); |
414985ae2
|
519 |
|
c525aaddc
|
520 521 522 |
if (!kn->attr.open && new_on) { kn->attr.open = new_on; new_on = NULL; |
414985ae2
|
523 |
} |
c525aaddc
|
524 525 526 527 |
on = kn->attr.open; if (on) { atomic_inc(&on->refcnt); list_add_tail(&of->list, &on->files); |
414985ae2
|
528 |
} |
c525aaddc
|
529 530 |
spin_unlock_irq(&kernfs_open_node_lock); mutex_unlock(&kernfs_open_file_mutex); |
414985ae2
|
531 |
|
c525aaddc
|
532 533 |
if (on) { kfree(new_on); |
414985ae2
|
534 535 536 537 |
return 0; } /* not there, initialize a new one and retry */ |
c525aaddc
|
538 539 |
new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); if (!new_on) |
414985ae2
|
540 |
return -ENOMEM; |
c525aaddc
|
541 542 543 544 |
atomic_set(&new_on->refcnt, 0); atomic_set(&new_on->event, 1); init_waitqueue_head(&new_on->poll); INIT_LIST_HEAD(&new_on->files); |
414985ae2
|
545 546 547 548 |
goto retry; } /** |
c637b8acb
|
549 |
* kernfs_put_open_node - put kernfs_open_node |
324a56e16
|
550 |
* @kn: target kernfs_nodet |
c525aaddc
|
551 |
* @of: associated kernfs_open_file |
414985ae2
|
552 |
* |
adc5e8b58
|
553 |
* Put @kn->attr.open and unlink @of from the files list. If |
414985ae2
|
554 555 556 557 558 |
* reference count reaches zero, disassociate and free it. * * LOCKING: * None. */ |
c637b8acb
|
559 560 |
static void kernfs_put_open_node(struct kernfs_node *kn, struct kernfs_open_file *of) |
414985ae2
|
561 |
{ |
c525aaddc
|
562 |
struct kernfs_open_node *on = kn->attr.open; |
414985ae2
|
563 |
unsigned long flags; |
c525aaddc
|
564 565 |
mutex_lock(&kernfs_open_file_mutex); spin_lock_irqsave(&kernfs_open_node_lock, flags); |
414985ae2
|
566 567 568 |
if (of) list_del(&of->list); |
c525aaddc
|
569 |
if (atomic_dec_and_test(&on->refcnt)) |
adc5e8b58
|
570 |
kn->attr.open = NULL; |
414985ae2
|
571 |
else |
c525aaddc
|
572 |
on = NULL; |
414985ae2
|
573 |
|
c525aaddc
|
574 575 |
spin_unlock_irqrestore(&kernfs_open_node_lock, flags); mutex_unlock(&kernfs_open_file_mutex); |
414985ae2
|
576 |
|
c525aaddc
|
577 |
kfree(on); |
414985ae2
|
578 |
} |
c637b8acb
|
579 |
static int kernfs_fop_open(struct inode *inode, struct file *file) |
414985ae2
|
580 |
{ |
319ba91d3
|
581 |
struct kernfs_node *kn = inode->i_private; |
555724a83
|
582 |
struct kernfs_root *root = kernfs_root(kn); |
414985ae2
|
583 |
const struct kernfs_ops *ops; |
c525aaddc
|
584 |
struct kernfs_open_file *of; |
414985ae2
|
585 586 |
bool has_read, has_write, has_mmap; int error = -EACCES; |
c637b8acb
|
587 |
if (!kernfs_get_active(kn)) |
414985ae2
|
588 |
return -ENODEV; |
324a56e16
|
589 |
ops = kernfs_ops(kn); |
414985ae2
|
590 591 592 593 |
has_read = ops->seq_show || ops->read || ops->mmap; has_write = ops->write || ops->mmap; has_mmap = ops->mmap; |
555724a83
|
594 595 596 597 598 |
/* see the flag definition for details */ if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { if ((file->f_mode & FMODE_WRITE) && (!(inode->i_mode & S_IWUGO) || !has_write)) goto err_out; |
414985ae2
|
599 |
|
555724a83
|
600 601 602 603 |
if ((file->f_mode & FMODE_READ) && (!(inode->i_mode & S_IRUGO) || !has_read)) goto err_out; } |
414985ae2
|
604 |
|
c525aaddc
|
605 |
/* allocate a kernfs_open_file for the file */ |
414985ae2
|
606 |
error = -ENOMEM; |
c525aaddc
|
607 |
of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); |
414985ae2
|
608 609 610 611 612 613 614 615 616 617 618 619 620 621 |
if (!of) goto err_out; /* * The following is done to give a different lockdep key to * @of->mutex for files which implement mmap. This is a rather * crude way to avoid false positive lockdep warning around * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under * which mm->mmap_sem nests, while holding @of->mutex. As each * open file has a separate mutex, it's okay as long as those don't * happen on the same file. At this point, we can't easily give * each file a separate locking class. Let's differentiate on * whether the file has mmap or not for now. |
9b2db6e18
|
622 623 624 |
* * Both paths of the branch look the same. They're supposed to * look that way and give @of->mutex different static lockdep keys. |
414985ae2
|
625 626 627 628 629 |
*/ if (has_mmap) mutex_init(&of->mutex); else mutex_init(&of->mutex); |
324a56e16
|
630 |
of->kn = kn; |
414985ae2
|
631 632 633 |
of->file = file; /* |
b7ce40cff
|
634 635 636 637 |
* Write path needs to atomic_write_len outside active reference. * Cache it in open_file. See kernfs_fop_write() for details. */ of->atomic_write_len = ops->atomic_write_len; |
4ef67a8c9
|
638 639 640 641 642 643 644 645 |
error = -EINVAL; /* * ->seq_show is incompatible with ->prealloc, * as seq_read does its own allocation. * ->read must be used instead. */ if (ops->prealloc && ops->seq_show) goto err_free; |
2b75869bb
|
646 647 648 649 650 651 |
if (ops->prealloc) { int len = of->atomic_write_len ?: PAGE_SIZE; of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); error = -ENOMEM; if (!of->prealloc_buf) goto err_free; |
e4234a1fc
|
652 |
mutex_init(&of->prealloc_mutex); |
2b75869bb
|
653 |
} |
b7ce40cff
|
654 |
/* |
414985ae2
|
655 656 657 658 659 660 661 662 663 664 |
* Always instantiate seq_file even if read access doesn't use * seq_file or is not requested. This unifies private data access * and readable regular files are the vast majority anyway. */ if (ops->seq_show) error = seq_open(file, &kernfs_seq_ops); else error = seq_open(file, NULL); if (error) goto err_free; |
0e67db2f9
|
665 666 |
of->seq_file = file->private_data; of->seq_file->private = of; |
414985ae2
|
667 668 669 670 |
/* seq_file clears PWRITE unconditionally, restore it if WRITE */ if (file->f_mode & FMODE_WRITE) file->f_mode |= FMODE_PWRITE; |
c637b8acb
|
671 672 |
/* make sure we have open node struct */ error = kernfs_get_open_node(kn, of); |
414985ae2
|
673 |
if (error) |
0e67db2f9
|
674 675 676 677 678 679 680 681 |
goto err_seq_release; if (ops->open) { /* nobody has access to @of yet, skip @of->mutex */ error = ops->open(of); if (error) goto err_put_node; } |
414985ae2
|
682 683 |
/* open succeeded, put active references */ |
c637b8acb
|
684 |
kernfs_put_active(kn); |
414985ae2
|
685 |
return 0; |
0e67db2f9
|
686 687 688 |
err_put_node: kernfs_put_open_node(kn, of); err_seq_release: |
414985ae2
|
689 690 |
seq_release(inode, file); err_free: |
2b75869bb
|
691 |
kfree(of->prealloc_buf); |
414985ae2
|
692 693 |
kfree(of); err_out: |
c637b8acb
|
694 |
kernfs_put_active(kn); |
414985ae2
|
695 696 |
return error; } |
0e67db2f9
|
697 698 699 700 |
/* used from release/drain to ensure that ->release() is called exactly once */ static void kernfs_release_file(struct kernfs_node *kn, struct kernfs_open_file *of) { |
f83f3c515
|
701 702 703 704 705 706 707 708 |
/* * @of is guaranteed to have no other file operations in flight and * we just want to synchronize release and drain paths. * @kernfs_open_file_mutex is enough. @of->mutex can't be used * here because drain path may be called from places which can * cause circular dependency. */ lockdep_assert_held(&kernfs_open_file_mutex); |
0e67db2f9
|
709 |
|
0e67db2f9
|
710 711 712 713 714 715 716 717 718 |
if (!of->released) { /* * A file is never detached without being released and we * need to be able to release files which are deactivated * and being drained. Don't use kernfs_ops(). */ kn->attr.ops->release(of); of->released = true; } |
0e67db2f9
|
719 |
} |
c637b8acb
|
720 |
static int kernfs_fop_release(struct inode *inode, struct file *filp) |
414985ae2
|
721 |
{ |
319ba91d3
|
722 |
struct kernfs_node *kn = inode->i_private; |
c525aaddc
|
723 |
struct kernfs_open_file *of = kernfs_of(filp); |
414985ae2
|
724 |
|
f83f3c515
|
725 726 727 728 729 |
if (kn->flags & KERNFS_HAS_RELEASE) { mutex_lock(&kernfs_open_file_mutex); kernfs_release_file(kn, of); mutex_unlock(&kernfs_open_file_mutex); } |
c637b8acb
|
730 |
kernfs_put_open_node(kn, of); |
414985ae2
|
731 |
seq_release(inode, filp); |
2b75869bb
|
732 |
kfree(of->prealloc_buf); |
414985ae2
|
733 734 735 736 |
kfree(of); return 0; } |
0e67db2f9
|
737 |
void kernfs_drain_open_files(struct kernfs_node *kn) |
414985ae2
|
738 |
{ |
c525aaddc
|
739 740 |
struct kernfs_open_node *on; struct kernfs_open_file *of; |
414985ae2
|
741 |
|
0e67db2f9
|
742 |
if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) |
55f6e30d0
|
743 |
return; |
c525aaddc
|
744 745 746 747 748 749 |
spin_lock_irq(&kernfs_open_node_lock); on = kn->attr.open; if (on) atomic_inc(&on->refcnt); spin_unlock_irq(&kernfs_open_node_lock); if (!on) |
414985ae2
|
750 |
return; |
c525aaddc
|
751 |
mutex_lock(&kernfs_open_file_mutex); |
0e67db2f9
|
752 |
|
c525aaddc
|
753 |
list_for_each_entry(of, &on->files, list) { |
414985ae2
|
754 |
struct inode *inode = file_inode(of->file); |
0e67db2f9
|
755 756 757 |
if (kn->flags & KERNFS_HAS_MMAP) unmap_mapping_range(inode->i_mapping, 0, 0, 1); |
966fa72a7
|
758 759 |
if (kn->flags & KERNFS_HAS_RELEASE) kernfs_release_file(kn, of); |
414985ae2
|
760 |
} |
0e67db2f9
|
761 |
|
c525aaddc
|
762 |
mutex_unlock(&kernfs_open_file_mutex); |
414985ae2
|
763 |
|
c637b8acb
|
764 |
kernfs_put_open_node(kn, NULL); |
414985ae2
|
765 |
} |
c637b8acb
|
766 767 |
/* * Kernfs attribute files are pollable. The idea is that you read |
414985ae2
|
768 769 770 |
* the content and then you use 'poll' or 'select' to wait for * the content to change. When the content changes (assuming the * manager for the kobject supports notification), poll will |
a9a08845e
|
771 |
* return EPOLLERR|EPOLLPRI, and select will return the fd whether |
414985ae2
|
772 773 774 775 776 777 778 779 |
* it is waiting for read, write, or exceptions. * Once poll/select indicates that the value has changed, you * need to close and re-open the file, or seek to 0 and read again. * Reminder: this only works for attributes which actively support * it, and it is not possible to test an attribute from userspace * to see if it supports poll (Neither 'poll' nor 'select' return * an appropriate error code). When in doubt, set a suitable timeout value. */ |
147e1a97c
|
780 781 782 783 784 785 786 787 788 789 790 791 |
__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) { struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); struct kernfs_open_node *on = kn->attr.open; poll_wait(of->file, &on->poll, wait); if (of->event != atomic_read(&on->event)) return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; return DEFAULT_POLLMASK; } |
076ccb76e
|
792 |
static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) |
414985ae2
|
793 |
{ |
c525aaddc
|
794 |
struct kernfs_open_file *of = kernfs_of(filp); |
319ba91d3
|
795 |
struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); |
147e1a97c
|
796 |
__poll_t ret; |
414985ae2
|
797 |
|
c637b8acb
|
798 |
if (!kernfs_get_active(kn)) |
147e1a97c
|
799 |
return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; |
414985ae2
|
800 |
|
147e1a97c
|
801 802 803 804 |
if (kn->attr.ops->poll) ret = kn->attr.ops->poll(of, wait); else ret = kernfs_generic_poll(of, wait); |
414985ae2
|
805 |
|
c637b8acb
|
806 |
kernfs_put_active(kn); |
147e1a97c
|
807 |
return ret; |
414985ae2
|
808 |
} |
ecca47ce8
|
809 |
static void kernfs_notify_workfn(struct work_struct *work) |
414985ae2
|
810 |
{ |
ecca47ce8
|
811 |
struct kernfs_node *kn; |
d911d9874
|
812 |
struct kernfs_super_info *info; |
ecca47ce8
|
813 814 815 816 817 818 |
repeat: /* pop one off the notify_list */ spin_lock_irq(&kernfs_notify_lock); kn = kernfs_notify_list; if (kn == KERNFS_NOTIFY_EOL) { spin_unlock_irq(&kernfs_notify_lock); |
d911d9874
|
819 |
return; |
ecca47ce8
|
820 821 822 823 |
} kernfs_notify_list = kn->attr.notify_next; kn->attr.notify_next = NULL; spin_unlock_irq(&kernfs_notify_lock); |
d911d9874
|
824 |
|
d911d9874
|
825 826 |
/* kick fsnotify */ mutex_lock(&kernfs_mutex); |
ecca47ce8
|
827 |
list_for_each_entry(info, &kernfs_root(kn)->supers, node) { |
df6a58c5c
|
828 |
struct kernfs_node *parent; |
d911d9874
|
829 |
struct inode *inode; |
25b229dff
|
830 |
struct qstr name; |
d911d9874
|
831 |
|
df6a58c5c
|
832 833 834 835 836 837 |
/* * We want fsnotify_modify() on @kn but as the * modifications aren't originating from userland don't * have the matching @file available. Look up the inodes * and generate the events manually. */ |
c53cd490b
|
838 |
inode = ilookup(info->sb, kn->id.ino); |
d911d9874
|
839 840 |
if (!inode) continue; |
25b229dff
|
841 |
name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); |
df6a58c5c
|
842 843 844 |
parent = kernfs_get_parent(kn); if (parent) { struct inode *p_inode; |
c53cd490b
|
845 |
p_inode = ilookup(info->sb, parent->id.ino); |
df6a58c5c
|
846 847 |
if (p_inode) { fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD, |
25b229dff
|
848 |
inode, FSNOTIFY_EVENT_INODE, &name, 0); |
df6a58c5c
|
849 850 851 852 |
iput(p_inode); } kernfs_put(parent); |
d911d9874
|
853 |
} |
df6a58c5c
|
854 |
fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, |
25b229dff
|
855 |
&name, 0); |
d911d9874
|
856 857 858 859 |
iput(inode); } mutex_unlock(&kernfs_mutex); |
ecca47ce8
|
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 |
kernfs_put(kn); goto repeat; } /** * kernfs_notify - notify a kernfs file * @kn: file to notify * * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any * context. */ void kernfs_notify(struct kernfs_node *kn) { static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); unsigned long flags; |
03c0a9208
|
875 |
struct kernfs_open_node *on; |
ecca47ce8
|
876 877 878 |
if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) return; |
03c0a9208
|
879 880 881 882 883 884 885 886 887 888 |
/* kick poll immediately */ spin_lock_irqsave(&kernfs_open_node_lock, flags); on = kn->attr.open; if (on) { atomic_inc(&on->event); wake_up_interruptible(&on->poll); } spin_unlock_irqrestore(&kernfs_open_node_lock, flags); /* schedule work to kick fsnotify */ |
ecca47ce8
|
889 890 891 892 893 894 895 896 |
spin_lock_irqsave(&kernfs_notify_lock, flags); if (!kn->attr.notify_next) { kernfs_get(kn); kn->attr.notify_next = kernfs_notify_list; kernfs_notify_list = kn; schedule_work(&kernfs_notify_work); } spin_unlock_irqrestore(&kernfs_notify_lock, flags); |
414985ae2
|
897 898 |
} EXPORT_SYMBOL_GPL(kernfs_notify); |
a797bfc30
|
899 |
const struct file_operations kernfs_file_fops = { |
c637b8acb
|
900 901 |
.read = kernfs_fop_read, .write = kernfs_fop_write, |
414985ae2
|
902 |
.llseek = generic_file_llseek, |
c637b8acb
|
903 904 905 906 |
.mmap = kernfs_fop_mmap, .open = kernfs_fop_open, .release = kernfs_fop_release, .poll = kernfs_fop_poll, |
2a9becdd4
|
907 |
.fsync = noop_fsync, |
414985ae2
|
908 909 910 |
}; /** |
2063d608f
|
911 |
* __kernfs_create_file - kernfs internal function to create a file |
414985ae2
|
912 913 914 |
* @parent: directory to create the file in * @name: name of the file * @mode: mode of the file |
488dee96b
|
915 916 |
* @uid: uid of the file * @gid: gid of the file |
414985ae2
|
917 918 919 920 921 922 923 924 |
* @size: size of the file * @ops: kernfs operations for the file * @priv: private data for the file * @ns: optional namespace tag of the file * @key: lockdep key for the file's active_ref, %NULL to disable lockdep * * Returns the created node on success, ERR_PTR() value on error. */ |
2063d608f
|
925 926 |
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, |
488dee96b
|
927 928 |
umode_t mode, kuid_t uid, kgid_t gid, loff_t size, |
2063d608f
|
929 930 |
const struct kernfs_ops *ops, void *priv, const void *ns, |
2063d608f
|
931 |
struct lock_class_key *key) |
414985ae2
|
932 |
{ |
324a56e16
|
933 |
struct kernfs_node *kn; |
2063d608f
|
934 |
unsigned flags; |
414985ae2
|
935 |
int rc; |
2063d608f
|
936 |
flags = KERNFS_FILE; |
2063d608f
|
937 |
|
488dee96b
|
938 939 |
kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, uid, gid, flags); |
324a56e16
|
940 |
if (!kn) |
414985ae2
|
941 |
return ERR_PTR(-ENOMEM); |
adc5e8b58
|
942 943 944 |
kn->attr.ops = ops; kn->attr.size = size; kn->ns = ns; |
324a56e16
|
945 |
kn->priv = priv; |
414985ae2
|
946 947 948 |
#ifdef CONFIG_DEBUG_LOCK_ALLOC if (key) { |
39bf04db6
|
949 |
lockdep_init_map(&kn->dep_map, "kn->count", key, 0); |
df23fc39b
|
950 |
kn->flags |= KERNFS_LOCKDEP; |
414985ae2
|
951 952 953 954 |
} #endif /* |
adc5e8b58
|
955 |
* kn->attr.ops is accesible only while holding active ref. We |
414985ae2
|
956 957 958 959 |
* need to know whether some ops are implemented outside active * ref. Cache their existence in flags. */ if (ops->seq_show) |
df23fc39b
|
960 |
kn->flags |= KERNFS_HAS_SEQ_SHOW; |
414985ae2
|
961 |
if (ops->mmap) |
df23fc39b
|
962 |
kn->flags |= KERNFS_HAS_MMAP; |
0e67db2f9
|
963 964 |
if (ops->release) kn->flags |= KERNFS_HAS_RELEASE; |
414985ae2
|
965 |
|
988cd7afb
|
966 |
rc = kernfs_add_one(kn); |
414985ae2
|
967 |
if (rc) { |
324a56e16
|
968 |
kernfs_put(kn); |
414985ae2
|
969 970 |
return ERR_PTR(rc); } |
324a56e16
|
971 |
return kn; |
414985ae2
|
972 |
} |