Commit b7ce40cff0b9f6597f8318fd761accd92727f61f
Committed by
Greg Kroah-Hartman
1 parent
92d585ef06
Exists in
master
and in
13 other branches
kernfs: cache atomic_write_len in kernfs_open_file
While implementing atomic_write_len, 4d3773c4bb41 ("kernfs: implement kernfs_ops->atomic_write_len") moved data copy from userland inside kernfs_get_active() and kernfs_open_file->mutex so that kernfs_ops->atomic_write_len can be accessed before copying buffer from userland; unfortunately, this could lead to locking order inversion involving mmap_sem if copy_from_user() takes a page fault. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 Tainted: G W ------------------------------------------------------- trinity-c236/10658 is trying to acquire lock: (&of->mutex#2){+.+.+.}, at: [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [<mm/util.c:397>] vm_mmap_pgoff+0x6e/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<mm/memory.c:4188>] might_fault+0x7e/0xb0 [<arch/x86/include/asm/uaccess.h:713 fs/kernfs/file.c:291>] kernfs_fop_write+0xd8/0x190 [<fs/read_write.c:473>] vfs_write+0xe3/0x1d0 [<fs/read_write.c:523 fs/read_write.c:515>] SyS_write+0x5d/0xa0 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 -> #0 (&of->mutex#2){+.+.+.}: [<kernel/locking/lockdep.c:1840>] check_prev_add+0x13f/0x560 [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<kernel/locking/mutex.c:470 kernel/locking/mutex.c:571>] mutex_lock_nested+0x6a/0x510 [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 [<mm/mmap.c:1573>] mmap_region+0x310/0x5c0 [<mm/mmap.c:1365>] do_mmap_pgoff+0x385/0x430 [<mm/util.c:399>] vm_mmap_pgoff+0x8f/0xe0 [<mm/mmap.c:1416 mm/mmap.c:1374>] SyS_mmap_pgoff+0x1b0/0x210 [<arch/x86/kernel/sys_x86_64.c:72>] SyS_mmap+0x1d/0x20 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&of->mutex#2); lock(&mm->mmap_sem); lock(&of->mutex#2); *** DEADLOCK *** 1 lock held by trinity-c236/10658: #0: (&mm->mmap_sem){++++++}, at: [<mm/util.c:397>] vm_mmap_pgoff+0x6e/0xe0 stack backtrace: CPU: 2 PID: 10658 Comm: trinity-c236 Tainted: G W 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 0000000000000000 ffff88011911fa48 ffffffff8438e945 0000000000000000 0000000000000000 ffff88011911fa98 ffffffff811a0109 ffff88011911fab8 ffff88011911fab8 ffff88011911fa98 ffff880119128cc0 ffff880119128cf8 Call Trace: [<lib/dump_stack.c:52>] dump_stack+0x52/0x7f [<kernel/locking/lockdep.c:1213>] print_circular_bug+0x129/0x160 [<kernel/locking/lockdep.c:1840>] check_prev_add+0x13f/0x560 [<include/linux/spinlock.h:343 mm/slub.c:1933>] ? deactivate_slab+0x511/0x550 [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<mm/mmap.c:1552>] ? mmap_region+0x24a/0x5c0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<kernel/locking/mutex.c:470 kernel/locking/mutex.c:571>] mutex_lock_nested+0x6a/0x510 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<kernel/sched/core.c:2477>] ? get_parent_ip+0x11/0x50 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 [<mm/mmap.c:1573>] mmap_region+0x310/0x5c0 [<mm/mmap.c:1365>] do_mmap_pgoff+0x385/0x430 [<mm/util.c:397>] ? vm_mmap_pgoff+0x6e/0xe0 [<mm/util.c:399>] vm_mmap_pgoff+0x8f/0xe0 [<kernel/rcu/update.c:97>] ? __rcu_read_unlock+0x44/0xb0 [<fs/file.c:641>] ? dup_fd+0x3c0/0x3c0 [<mm/mmap.c:1416 mm/mmap.c:1374>] SyS_mmap_pgoff+0x1b0/0x210 [<arch/x86/kernel/sys_x86_64.c:72>] SyS_mmap+0x1d/0x20 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 Fix it by caching atomic_write_len in kernfs_open_file during open so that it can be determined without accessing kernfs_ops in kernfs_fop_write(). This restores the structure of kernfs_fop_write() before 4d3773c4bb41 with updated @len determination logic. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Sasha Levin <sasha.levin@oracle.com> References: http://lkml.kernel.org/g/53113485.2090407@oracle.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Showing 2 changed files with 33 additions and 31 deletions Side-by-side Diff
fs/kernfs/file.c
... | ... | @@ -253,55 +253,50 @@ |
253 | 253 | { |
254 | 254 | struct kernfs_open_file *of = kernfs_of(file); |
255 | 255 | const struct kernfs_ops *ops; |
256 | - char *buf = NULL; | |
257 | - ssize_t len; | |
256 | + size_t len; | |
257 | + char *buf; | |
258 | 258 | |
259 | - /* | |
260 | - * @of->mutex nests outside active ref and is just to ensure that | |
261 | - * the ops aren't called concurrently for the same open file. | |
262 | - */ | |
263 | - mutex_lock(&of->mutex); | |
264 | - if (!kernfs_get_active(of->kn)) { | |
265 | - mutex_unlock(&of->mutex); | |
266 | - return -ENODEV; | |
267 | - } | |
268 | - | |
269 | - ops = kernfs_ops(of->kn); | |
270 | - if (!ops->write) { | |
271 | - len = -EINVAL; | |
272 | - goto out_unlock; | |
273 | - } | |
274 | - | |
275 | - if (ops->atomic_write_len) { | |
259 | + if (of->atomic_write_len) { | |
276 | 260 | len = count; |
277 | - if (len > ops->atomic_write_len) { | |
278 | - len = -E2BIG; | |
279 | - goto out_unlock; | |
280 | - } | |
261 | + if (len > of->atomic_write_len) | |
262 | + return -E2BIG; | |
281 | 263 | } else { |
282 | 264 | len = min_t(size_t, count, PAGE_SIZE); |
283 | 265 | } |
284 | 266 | |
285 | 267 | buf = kmalloc(len + 1, GFP_KERNEL); |
286 | - if (!buf) { | |
287 | - len = -ENOMEM; | |
288 | - goto out_unlock; | |
289 | - } | |
268 | + if (!buf) | |
269 | + return -ENOMEM; | |
290 | 270 | |
291 | 271 | if (copy_from_user(buf, user_buf, len)) { |
292 | 272 | len = -EFAULT; |
293 | - goto out_unlock; | |
273 | + goto out_free; | |
294 | 274 | } |
295 | 275 | buf[len] = '\0'; /* guarantee string termination */ |
296 | 276 | |
297 | - len = ops->write(of, buf, len, *ppos); | |
298 | -out_unlock: | |
277 | + /* | |
278 | + * @of->mutex nests outside active ref and is just to ensure that | |
279 | + * the ops aren't called concurrently for the same open file. | |
280 | + */ | |
281 | + mutex_lock(&of->mutex); | |
282 | + if (!kernfs_get_active(of->kn)) { | |
283 | + mutex_unlock(&of->mutex); | |
284 | + len = -ENODEV; | |
285 | + goto out_free; | |
286 | + } | |
287 | + | |
288 | + ops = kernfs_ops(of->kn); | |
289 | + if (ops->write) | |
290 | + len = ops->write(of, buf, len, *ppos); | |
291 | + else | |
292 | + len = -EINVAL; | |
293 | + | |
299 | 294 | kernfs_put_active(of->kn); |
300 | 295 | mutex_unlock(&of->mutex); |
301 | 296 | |
302 | 297 | if (len > 0) |
303 | 298 | *ppos += len; |
304 | - | |
299 | +out_free: | |
305 | 300 | kfree(buf); |
306 | 301 | return len; |
307 | 302 | } |
... | ... | @@ -664,6 +659,12 @@ |
664 | 659 | |
665 | 660 | of->kn = kn; |
666 | 661 | of->file = file; |
662 | + | |
663 | + /* | |
664 | + * Write path needs to atomic_write_len outside active reference. | |
665 | + * Cache it in open_file. See kernfs_fop_write() for details. | |
666 | + */ | |
667 | + of->atomic_write_len = ops->atomic_write_len; | |
667 | 668 | |
668 | 669 | /* |
669 | 670 | * Always instantiate seq_file even if read access doesn't use |