Commit 989a2979205dd34269382b357e6d4b4b6956b889

Authored by Eric Dumazet
Committed by David S. Miller
1 parent e5700aff14

fasync: RCU and fine grained locking

kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.

fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.

Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.

We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 3 changed files with 59 additions and 92 deletions Side-by-side Diff

... ... @@ -614,9 +614,15 @@
614 614 return ret;
615 615 }
616 616  
617   -static DEFINE_RWLOCK(fasync_lock);
  617 +static DEFINE_SPINLOCK(fasync_lock);
618 618 static struct kmem_cache *fasync_cache __read_mostly;
619 619  
  620 +static void fasync_free_rcu(struct rcu_head *head)
  621 +{
  622 + kmem_cache_free(fasync_cache,
  623 + container_of(head, struct fasync_struct, fa_rcu));
  624 +}
  625 +
620 626 /*
621 627 * Remove a fasync entry. If successfully removed, return
622 628 * positive and clear the FASYNC flag. If no entry exists,
... ... @@ -625,8 +631,6 @@
625 631 * NOTE! It is very important that the FASYNC flag always
626 632 * match the state "is the filp on a fasync list".
627 633 *
628   - * We always take the 'filp->f_lock', in since fasync_lock
629   - * needs to be irq-safe.
630 634 */
631 635 static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
632 636 {
633 637  
634 638  
635 639  
... ... @@ -634,17 +638,22 @@
634 638 int result = 0;
635 639  
636 640 spin_lock(&filp->f_lock);
637   - write_lock_irq(&fasync_lock);
  641 + spin_lock(&fasync_lock);
638 642 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
639 643 if (fa->fa_file != filp)
640 644 continue;
  645 +
  646 + spin_lock_irq(&fa->fa_lock);
  647 + fa->fa_file = NULL;
  648 + spin_unlock_irq(&fa->fa_lock);
  649 +
641 650 *fp = fa->fa_next;
642   - kmem_cache_free(fasync_cache, fa);
  651 + call_rcu(&fa->fa_rcu, fasync_free_rcu);
643 652 filp->f_flags &= ~FASYNC;
644 653 result = 1;
645 654 break;
646 655 }
647   - write_unlock_irq(&fasync_lock);
  656 + spin_unlock(&fasync_lock);
648 657 spin_unlock(&filp->f_lock);
649 658 return result;
650 659 }
651 660  
652 661  
653 662  
654 663  
655 664  
... ... @@ -666,25 +675,30 @@
666 675 return -ENOMEM;
667 676  
668 677 spin_lock(&filp->f_lock);
669   - write_lock_irq(&fasync_lock);
  678 + spin_lock(&fasync_lock);
670 679 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
671 680 if (fa->fa_file != filp)
672 681 continue;
  682 +
  683 + spin_lock_irq(&fa->fa_lock);
673 684 fa->fa_fd = fd;
  685 + spin_unlock_irq(&fa->fa_lock);
  686 +
674 687 kmem_cache_free(fasync_cache, new);
675 688 goto out;
676 689 }
677 690  
  691 + spin_lock_init(&new->fa_lock);
678 692 new->magic = FASYNC_MAGIC;
679 693 new->fa_file = filp;
680 694 new->fa_fd = fd;
681 695 new->fa_next = *fapp;
682   - *fapp = new;
  696 + rcu_assign_pointer(*fapp, new);
683 697 result = 1;
684 698 filp->f_flags |= FASYNC;
685 699  
686 700 out:
687   - write_unlock_irq(&fasync_lock);
  701 + spin_unlock(&fasync_lock);
688 702 spin_unlock(&filp->f_lock);
689 703 return result;
690 704 }
691 705  
692 706  
693 707  
694 708  
... ... @@ -704,37 +718,41 @@
704 718  
705 719 EXPORT_SYMBOL(fasync_helper);
706 720  
707   -void __kill_fasync(struct fasync_struct *fa, int sig, int band)
  721 +/*
  722 + * rcu_read_lock() is held
  723 + */
  724 +static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
708 725 {
709 726 while (fa) {
710   - struct fown_struct * fown;
  727 + struct fown_struct *fown;
711 728 if (fa->magic != FASYNC_MAGIC) {
712 729 printk(KERN_ERR "kill_fasync: bad magic number in "
713 730 "fasync_struct!\n");
714 731 return;
715 732 }
716   - fown = &fa->fa_file->f_owner;
717   - /* Don't send SIGURG to processes which have not set a
718   - queued signum: SIGURG has its own default signalling
719   - mechanism. */
720   - if (!(sig == SIGURG && fown->signum == 0))
721   - send_sigio(fown, fa->fa_fd, band);
722   - fa = fa->fa_next;
  733 + spin_lock(&fa->fa_lock);
  734 + if (fa->fa_file) {
  735 + fown = &fa->fa_file->f_owner;
  736 + /* Don't send SIGURG to processes which have not set a
  737 + queued signum: SIGURG has its own default signalling
  738 + mechanism. */
  739 + if (!(sig == SIGURG && fown->signum == 0))
  740 + send_sigio(fown, fa->fa_fd, band);
  741 + }
  742 + spin_unlock(&fa->fa_lock);
  743 + fa = rcu_dereference(fa->fa_next);
723 744 }
724 745 }
725 746  
726   -EXPORT_SYMBOL(__kill_fasync);
727   -
728 747 void kill_fasync(struct fasync_struct **fp, int sig, int band)
729 748 {
730 749 /* First a quick test without locking: usually
731 750 * the list is empty.
732 751 */
733 752 if (*fp) {
734   - read_lock(&fasync_lock);
735   - /* reread *fp after obtaining the lock */
736   - __kill_fasync(*fp, sig, band);
737   - read_unlock(&fasync_lock);
  753 + rcu_read_lock();
  754 + kill_fasync_rcu(rcu_dereference(*fp), sig, band);
  755 + rcu_read_unlock();
738 756 }
739 757 }
740 758 EXPORT_SYMBOL(kill_fasync);
... ... @@ -1280,10 +1280,12 @@
1280 1280  
1281 1281  
1282 1282 struct fasync_struct {
1283   - int magic;
1284   - int fa_fd;
1285   - struct fasync_struct *fa_next; /* singly linked list */
1286   - struct file *fa_file;
  1283 + spinlock_t fa_lock;
  1284 + int magic;
  1285 + int fa_fd;
  1286 + struct fasync_struct *fa_next; /* singly linked list */
  1287 + struct file *fa_file;
  1288 + struct rcu_head fa_rcu;
1287 1289 };
1288 1290  
1289 1291 #define FASYNC_MAGIC 0x4601
... ... @@ -1292,8 +1294,6 @@
1292 1294 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1293 1295 /* can be called from interrupts */
1294 1296 extern void kill_fasync(struct fasync_struct **, int, int);
1295   -/* only for net: no internal synchronization */
1296   -extern void __kill_fasync(struct fasync_struct *, int, int);
1297 1297  
1298 1298 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1299 1299 extern int f_setown(struct file *filp, unsigned long arg, int force);
... ... @@ -1067,78 +1067,27 @@
1067 1067 * 1. fasync_list is modified only under process context socket lock
1068 1068 * i.e. under semaphore.
1069 1069 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1070   - * or under socket lock.
1071   - * 3. fasync_list can be used from softirq context, so that
1072   - * modification under socket lock have to be enhanced with
1073   - * write_lock_bh(&sk->sk_callback_lock).
1074   - * --ANK (990710)
  1070 + * or under socket lock
1075 1071 */
1076 1072  
1077 1073 static int sock_fasync(int fd, struct file *filp, int on)
1078 1074 {
1079   - struct fasync_struct *fa, *fna = NULL, **prev;
1080   - struct socket *sock;
1081   - struct sock *sk;
  1075 + struct socket *sock = filp->private_data;
  1076 + struct sock *sk = sock->sk;
1082 1077  
1083   - if (on) {
1084   - fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
1085   - if (fna == NULL)
1086   - return -ENOMEM;
1087   - }
1088   -
1089   - sock = filp->private_data;
1090   -
1091   - sk = sock->sk;
1092   - if (sk == NULL) {
1093   - kfree(fna);
  1078 + if (sk == NULL)
1094 1079 return -EINVAL;
1095   - }
1096 1080  
1097 1081 lock_sock(sk);
1098 1082  
1099   - spin_lock(&filp->f_lock);
1100   - if (on)
1101   - filp->f_flags |= FASYNC;
1102   - else
1103   - filp->f_flags &= ~FASYNC;
1104   - spin_unlock(&filp->f_lock);
  1083 + fasync_helper(fd, filp, on, &sock->fasync_list);
1105 1084  
1106   - prev = &(sock->fasync_list);
1107   -
1108   - for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1109   - if (fa->fa_file == filp)
1110   - break;
1111   -
1112   - if (on) {
1113   - if (fa != NULL) {
1114   - write_lock_bh(&sk->sk_callback_lock);
1115   - fa->fa_fd = fd;
1116   - write_unlock_bh(&sk->sk_callback_lock);
1117   -
1118   - kfree(fna);
1119   - goto out;
1120   - }
1121   - fna->fa_file = filp;
1122   - fna->fa_fd = fd;
1123   - fna->magic = FASYNC_MAGIC;
1124   - fna->fa_next = sock->fasync_list;
1125   - write_lock_bh(&sk->sk_callback_lock);
1126   - sock->fasync_list = fna;
  1085 + if (!sock->fasync_list)
  1086 + sock_reset_flag(sk, SOCK_FASYNC);
  1087 + else
1127 1088 sock_set_flag(sk, SOCK_FASYNC);
1128   - write_unlock_bh(&sk->sk_callback_lock);
1129   - } else {
1130   - if (fa != NULL) {
1131   - write_lock_bh(&sk->sk_callback_lock);
1132   - *prev = fa->fa_next;
1133   - if (!sock->fasync_list)
1134   - sock_reset_flag(sk, SOCK_FASYNC);
1135   - write_unlock_bh(&sk->sk_callback_lock);
1136   - kfree(fa);
1137   - }
1138   - }
1139 1089  
1140   -out:
1141   - release_sock(sock->sk);
  1090 + release_sock(sk);
1142 1091 return 0;
1143 1092 }
1144 1093  
1145 1094  
... ... @@ -1159,10 +1108,10 @@
1159 1108 /* fall through */
1160 1109 case SOCK_WAKE_IO:
1161 1110 call_kill:
1162   - __kill_fasync(sock->fasync_list, SIGIO, band);
  1111 + kill_fasync(&sock->fasync_list, SIGIO, band);
1163 1112 break;
1164 1113 case SOCK_WAKE_URG:
1165   - __kill_fasync(sock->fasync_list, SIGURG, band);
  1114 + kill_fasync(&sock->fasync_list, SIGURG, band);
1166 1115 }
1167 1116 return 0;
1168 1117 }