Commit 76398425bb06b07cc3a3b1ce169c67dc9d6874ed
1 parent
db1dd4d376
Exists in
master
and in
20 other branches
Move FASYNC bit handling to f_op->fasync()
Removing the BKL from FASYNC handling ran into the challenge of keeping the setting of the FASYNC bit in filp->f_flags atomic with regard to calls to the underlying fasync() function. Andi Kleen suggested moving the handling of that bit into fasync(); this patch does exactly that. As a result, we have a couple of internal API changes: fasync() must now manage the FASYNC bit, and it will be called without the BKL held. As it happens, every fasync() implementation in the kernel with one exception calls fasync_helper(). So, if we make fasync_helper() set the FASYNC bit, we can avoid making any changes to the other fasync() functions - as long as those functions, themselves, have proper locking. Most fasync() implementations do nothing but call fasync_helper() - which has its own lock - so they are easily verified as correct. The BKL had already been pushed down into the rest. The networking code has its own version of fasync_helper(), so that code has been augmented with explicit FASYNC bit handling. Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: David Miller <davem@davemloft.net> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Showing 4 changed files with 29 additions and 27 deletions Side-by-side Diff
Documentation/filesystems/Locking
... | ... | @@ -437,8 +437,11 @@ |
437 | 437 | can and should be done using the internal locking with smaller critical areas). |
438 | 438 | Current worst offender is ext2_get_block()... |
439 | 439 | |
440 | -->fasync() is a mess. This area needs a big cleanup and that will probably | |
441 | -affect locking. | |
440 | +->fasync() is called without BKL protection, and is responsible for | |
441 | +maintaining the FASYNC bit in filp->f_flags. Most instances call | |
442 | +fasync_helper(), which does that maintenance, so it's not normally | |
443 | +something one needs to worry about. Return values > 0 will be mapped to | |
444 | +zero in the VFS layer. | |
442 | 445 | |
443 | 446 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would |
444 | 447 | move ->readdir() to inode_operations and use a separate method for directory |
fs/fcntl.c
... | ... | @@ -141,7 +141,7 @@ |
141 | 141 | return ret; |
142 | 142 | } |
143 | 143 | |
144 | -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) | |
144 | +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) | |
145 | 145 | |
146 | 146 | static int setfl(int fd, struct file * filp, unsigned long arg) |
147 | 147 | { |
148 | 148 | |
149 | 149 | |
150 | 150 | |
151 | 151 | |
... | ... | @@ -177,23 +177,19 @@ |
177 | 177 | return error; |
178 | 178 | |
179 | 179 | /* |
180 | - * We still need a lock here for now to keep multiple FASYNC calls | |
181 | - * from racing with each other. | |
180 | + * ->fasync() is responsible for setting the FASYNC bit. | |
182 | 181 | */ |
183 | - lock_kernel(); | |
184 | - if ((arg ^ filp->f_flags) & FASYNC) { | |
185 | - if (filp->f_op && filp->f_op->fasync) { | |
186 | - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); | |
187 | - if (error < 0) | |
188 | - goto out; | |
189 | - } | |
182 | + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && | |
183 | + filp->f_op->fasync) { | |
184 | + error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); | |
185 | + if (error < 0) | |
186 | + goto out; | |
190 | 187 | } |
191 | - | |
192 | 188 | spin_lock(&filp->f_lock); |
193 | 189 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); |
194 | 190 | spin_unlock(&filp->f_lock); |
191 | + | |
195 | 192 | out: |
196 | - unlock_kernel(); | |
197 | 193 | return error; |
198 | 194 | } |
199 | 195 | |
... | ... | @@ -518,7 +514,7 @@ |
518 | 514 | static struct kmem_cache *fasync_cache __read_mostly; |
519 | 515 | |
520 | 516 | /* |
521 | - * fasync_helper() is used by some character device drivers (mainly mice) | |
517 | + * fasync_helper() is used by almost all character device drivers | |
522 | 518 | * to set up the fasync queue. It returns negative on error, 0 if it did |
523 | 519 | * no changes and positive if it added/deleted the entry. |
524 | 520 | */ |
... | ... | @@ -557,6 +553,13 @@ |
557 | 553 | result = 1; |
558 | 554 | } |
559 | 555 | out: |
556 | + /* Fix up FASYNC bit while still holding fasync_lock */ | |
557 | + spin_lock(&filp->f_lock); | |
558 | + if (on) | |
559 | + filp->f_flags |= FASYNC; | |
560 | + else | |
561 | + filp->f_flags &= ~FASYNC; | |
562 | + spin_unlock(&filp->f_lock); | |
560 | 563 | write_unlock_irq(&fasync_lock); |
561 | 564 | return result; |
562 | 565 | } |
fs/ioctl.c
... | ... | @@ -427,19 +427,11 @@ |
427 | 427 | /* Did FASYNC state change ? */ |
428 | 428 | if ((flag ^ filp->f_flags) & FASYNC) { |
429 | 429 | if (filp->f_op && filp->f_op->fasync) |
430 | + /* fasync() adjusts filp->f_flags */ | |
430 | 431 | error = filp->f_op->fasync(fd, filp, on); |
431 | 432 | else |
432 | 433 | error = -ENOTTY; |
433 | 434 | } |
434 | - if (error) | |
435 | - return error; | |
436 | - | |
437 | - spin_lock(&filp->f_lock); | |
438 | - if (on) | |
439 | - filp->f_flags |= FASYNC; | |
440 | - else | |
441 | - filp->f_flags &= ~FASYNC; | |
442 | - spin_unlock(&filp->f_lock); | |
443 | 435 | return error; |
444 | 436 | } |
445 | 437 | |
446 | 438 | |
... | ... | @@ -507,10 +499,7 @@ |
507 | 499 | break; |
508 | 500 | |
509 | 501 | case FIOASYNC: |
510 | - /* BKL needed to avoid races tweaking f_flags */ | |
511 | - lock_kernel(); | |
512 | 502 | error = ioctl_fioasync(fd, filp, argp); |
513 | - unlock_kernel(); | |
514 | 503 | break; |
515 | 504 | |
516 | 505 | case FIOQSIZE: |
net/socket.c
... | ... | @@ -1030,6 +1030,13 @@ |
1030 | 1030 | |
1031 | 1031 | lock_sock(sk); |
1032 | 1032 | |
1033 | + spin_lock(&filp->f_lock); | |
1034 | + if (on) | |
1035 | + filp->f_flags |= FASYNC; | |
1036 | + else | |
1037 | + filp->f_flags &= ~FASYNC; | |
1038 | + spin_unlock(&filp->f_lock); | |
1039 | + | |
1033 | 1040 | prev = &(sock->fasync_list); |
1034 | 1041 | |
1035 | 1042 | for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) |