Commit 76398425bb06b07cc3a3b1ce169c67dc9d6874ed

Authored by Jonathan Corbet
1 parent db1dd4d376

Move FASYNC bit handling to f_op->fasync()

Removing the BKL from FASYNC handling ran into the challenge of keeping the
setting of the FASYNC bit in filp->f_flags atomic with regard to calls to
the underlying fasync() function.  Andi Kleen suggested moving the handling
of that bit into fasync(); this patch does exactly that.  As a result, we
have a couple of internal API changes: fasync() must now manage the FASYNC
bit, and it will be called without the BKL held.

As it happens, every fasync() implementation in the kernel with one
exception calls fasync_helper().  So, if we make fasync_helper() set the
FASYNC bit, we can avoid making any changes to the other fasync()
functions - as long as those functions, themselves, have proper locking.
Most fasync() implementations do nothing but call fasync_helper() - which
has its own lock - so they are easily verified as correct.  The BKL had
already been pushed down into the rest.

The networking code has its own version of fasync_helper(), so that code
has been augmented with explicit FASYNC bit handling.

Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: David Miller <davem@davemloft.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>

Showing 4 changed files with 29 additions and 27 deletions Side-by-side Diff

Documentation/filesystems/Locking
... ... @@ -437,8 +437,11 @@
437 437 can and should be done using the internal locking with smaller critical areas).
438 438 Current worst offender is ext2_get_block()...
439 439  
440   -->fasync() is a mess. This area needs a big cleanup and that will probably
441   -affect locking.
  440 +->fasync() is called without BKL protection, and is responsible for
  441 +maintaining the FASYNC bit in filp->f_flags. Most instances call
  442 +fasync_helper(), which does that maintenance, so it's not normally
  443 +something one needs to worry about. Return values > 0 will be mapped to
  444 +zero in the VFS layer.
442 445  
443 446 ->readdir() and ->ioctl() on directories must be changed. Ideally we would
444 447 move ->readdir() to inode_operations and use a separate method for directory
... ... @@ -141,7 +141,7 @@
141 141 return ret;
142 142 }
143 143  
144   -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
  144 +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
145 145  
146 146 static int setfl(int fd, struct file * filp, unsigned long arg)
147 147 {
148 148  
149 149  
150 150  
151 151  
... ... @@ -177,23 +177,19 @@
177 177 return error;
178 178  
179 179 /*
180   - * We still need a lock here for now to keep multiple FASYNC calls
181   - * from racing with each other.
  180 + * ->fasync() is responsible for setting the FASYNC bit.
182 181 */
183   - lock_kernel();
184   - if ((arg ^ filp->f_flags) & FASYNC) {
185   - if (filp->f_op && filp->f_op->fasync) {
186   - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
187   - if (error < 0)
188   - goto out;
189   - }
  182 + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
  183 + filp->f_op->fasync) {
  184 + error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
  185 + if (error < 0)
  186 + goto out;
190 187 }
191   -
192 188 spin_lock(&filp->f_lock);
193 189 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
194 190 spin_unlock(&filp->f_lock);
  191 +
195 192 out:
196   - unlock_kernel();
197 193 return error;
198 194 }
199 195  
... ... @@ -518,7 +514,7 @@
518 514 static struct kmem_cache *fasync_cache __read_mostly;
519 515  
520 516 /*
521   - * fasync_helper() is used by some character device drivers (mainly mice)
  517 + * fasync_helper() is used by almost all character device drivers
522 518 * to set up the fasync queue. It returns negative on error, 0 if it did
523 519 * no changes and positive if it added/deleted the entry.
524 520 */
... ... @@ -557,6 +553,13 @@
557 553 result = 1;
558 554 }
559 555 out:
  556 + /* Fix up FASYNC bit while still holding fasync_lock */
  557 + spin_lock(&filp->f_lock);
  558 + if (on)
  559 + filp->f_flags |= FASYNC;
  560 + else
  561 + filp->f_flags &= ~FASYNC;
  562 + spin_unlock(&filp->f_lock);
560 563 write_unlock_irq(&fasync_lock);
561 564 return result;
562 565 }
... ... @@ -427,19 +427,11 @@
427 427 /* Did FASYNC state change ? */
428 428 if ((flag ^ filp->f_flags) & FASYNC) {
429 429 if (filp->f_op && filp->f_op->fasync)
  430 + /* fasync() adjusts filp->f_flags */
430 431 error = filp->f_op->fasync(fd, filp, on);
431 432 else
432 433 error = -ENOTTY;
433 434 }
434   - if (error)
435   - return error;
436   -
437   - spin_lock(&filp->f_lock);
438   - if (on)
439   - filp->f_flags |= FASYNC;
440   - else
441   - filp->f_flags &= ~FASYNC;
442   - spin_unlock(&filp->f_lock);
443 435 return error;
444 436 }
445 437  
446 438  
... ... @@ -507,10 +499,7 @@
507 499 break;
508 500  
509 501 case FIOASYNC:
510   - /* BKL needed to avoid races tweaking f_flags */
511   - lock_kernel();
512 502 error = ioctl_fioasync(fd, filp, argp);
513   - unlock_kernel();
514 503 break;
515 504  
516 505 case FIOQSIZE:
... ... @@ -1030,6 +1030,13 @@
1030 1030  
1031 1031 lock_sock(sk);
1032 1032  
  1033 + spin_lock(&filp->f_lock);
  1034 + if (on)
  1035 + filp->f_flags |= FASYNC;
  1036 + else
  1037 + filp->f_flags &= ~FASYNC;
  1038 + spin_unlock(&filp->f_lock);
  1039 +
1033 1040 prev = &(sock->fasync_list);
1034 1041  
1035 1042 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)