Commit 989a2979205dd34269382b357e6d4b4b6956b889
Committed by
David S. Miller
1 parent
e5700aff14
Exists in
master
and in
7 other branches
fasync: RCU and fine grained locking
kill_fasync() uses a central rwlock, candidate for RCU conversion, to avoid cache line ping pongs on SMP. fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short section instead during whole list scan. Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync() doesnt need its own implementation and can use fasync_helper(), to reduce code size and complexity. We can remove __kill_fasync() direct use in net/socket.c, and rename it to kill_fasync_rcu(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 3 changed files with 59 additions and 92 deletions Side-by-side Diff
fs/fcntl.c
... | ... | @@ -614,9 +614,15 @@ |
614 | 614 | return ret; |
615 | 615 | } |
616 | 616 | |
617 | -static DEFINE_RWLOCK(fasync_lock); | |
617 | +static DEFINE_SPINLOCK(fasync_lock); | |
618 | 618 | static struct kmem_cache *fasync_cache __read_mostly; |
619 | 619 | |
620 | +static void fasync_free_rcu(struct rcu_head *head) | |
621 | +{ | |
622 | + kmem_cache_free(fasync_cache, | |
623 | + container_of(head, struct fasync_struct, fa_rcu)); | |
624 | +} | |
625 | + | |
620 | 626 | /* |
621 | 627 | * Remove a fasync entry. If successfully removed, return |
622 | 628 | * positive and clear the FASYNC flag. If no entry exists, |
... | ... | @@ -625,8 +631,6 @@ |
625 | 631 | * NOTE! It is very important that the FASYNC flag always |
626 | 632 | * match the state "is the filp on a fasync list". |
627 | 633 | * |
628 | - * We always take the 'filp->f_lock', in since fasync_lock | |
629 | - * needs to be irq-safe. | |
630 | 634 | */ |
631 | 635 | static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) |
632 | 636 | { |
633 | 637 | |
634 | 638 | |
635 | 639 | |
... | ... | @@ -634,17 +638,22 @@ |
634 | 638 | int result = 0; |
635 | 639 | |
636 | 640 | spin_lock(&filp->f_lock); |
637 | - write_lock_irq(&fasync_lock); | |
641 | + spin_lock(&fasync_lock); | |
638 | 642 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
639 | 643 | if (fa->fa_file != filp) |
640 | 644 | continue; |
645 | + | |
646 | + spin_lock_irq(&fa->fa_lock); | |
647 | + fa->fa_file = NULL; | |
648 | + spin_unlock_irq(&fa->fa_lock); | |
649 | + | |
641 | 650 | *fp = fa->fa_next; |
642 | - kmem_cache_free(fasync_cache, fa); | |
651 | + call_rcu(&fa->fa_rcu, fasync_free_rcu); | |
643 | 652 | filp->f_flags &= ~FASYNC; |
644 | 653 | result = 1; |
645 | 654 | break; |
646 | 655 | } |
647 | - write_unlock_irq(&fasync_lock); | |
656 | + spin_unlock(&fasync_lock); | |
648 | 657 | spin_unlock(&filp->f_lock); |
649 | 658 | return result; |
650 | 659 | } |
651 | 660 | |
652 | 661 | |
653 | 662 | |
654 | 663 | |
655 | 664 | |
... | ... | @@ -666,25 +675,30 @@ |
666 | 675 | return -ENOMEM; |
667 | 676 | |
668 | 677 | spin_lock(&filp->f_lock); |
669 | - write_lock_irq(&fasync_lock); | |
678 | + spin_lock(&fasync_lock); | |
670 | 679 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
671 | 680 | if (fa->fa_file != filp) |
672 | 681 | continue; |
682 | + | |
683 | + spin_lock_irq(&fa->fa_lock); | |
673 | 684 | fa->fa_fd = fd; |
685 | + spin_unlock_irq(&fa->fa_lock); | |
686 | + | |
674 | 687 | kmem_cache_free(fasync_cache, new); |
675 | 688 | goto out; |
676 | 689 | } |
677 | 690 | |
691 | + spin_lock_init(&new->fa_lock); | |
678 | 692 | new->magic = FASYNC_MAGIC; |
679 | 693 | new->fa_file = filp; |
680 | 694 | new->fa_fd = fd; |
681 | 695 | new->fa_next = *fapp; |
682 | - *fapp = new; | |
696 | + rcu_assign_pointer(*fapp, new); | |
683 | 697 | result = 1; |
684 | 698 | filp->f_flags |= FASYNC; |
685 | 699 | |
686 | 700 | out: |
687 | - write_unlock_irq(&fasync_lock); | |
701 | + spin_unlock(&fasync_lock); | |
688 | 702 | spin_unlock(&filp->f_lock); |
689 | 703 | return result; |
690 | 704 | } |
691 | 705 | |
692 | 706 | |
693 | 707 | |
694 | 708 | |
... | ... | @@ -704,37 +718,41 @@ |
704 | 718 | |
705 | 719 | EXPORT_SYMBOL(fasync_helper); |
706 | 720 | |
707 | -void __kill_fasync(struct fasync_struct *fa, int sig, int band) | |
721 | +/* | |
722 | + * rcu_read_lock() is held | |
723 | + */ | |
724 | +static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) | |
708 | 725 | { |
709 | 726 | while (fa) { |
710 | - struct fown_struct * fown; | |
727 | + struct fown_struct *fown; | |
711 | 728 | if (fa->magic != FASYNC_MAGIC) { |
712 | 729 | printk(KERN_ERR "kill_fasync: bad magic number in " |
713 | 730 | "fasync_struct!\n"); |
714 | 731 | return; |
715 | 732 | } |
716 | - fown = &fa->fa_file->f_owner; | |
717 | - /* Don't send SIGURG to processes which have not set a | |
718 | - queued signum: SIGURG has its own default signalling | |
719 | - mechanism. */ | |
720 | - if (!(sig == SIGURG && fown->signum == 0)) | |
721 | - send_sigio(fown, fa->fa_fd, band); | |
722 | - fa = fa->fa_next; | |
733 | + spin_lock(&fa->fa_lock); | |
734 | + if (fa->fa_file) { | |
735 | + fown = &fa->fa_file->f_owner; | |
736 | + /* Don't send SIGURG to processes which have not set a | |
737 | + queued signum: SIGURG has its own default signalling | |
738 | + mechanism. */ | |
739 | + if (!(sig == SIGURG && fown->signum == 0)) | |
740 | + send_sigio(fown, fa->fa_fd, band); | |
741 | + } | |
742 | + spin_unlock(&fa->fa_lock); | |
743 | + fa = rcu_dereference(fa->fa_next); | |
723 | 744 | } |
724 | 745 | } |
725 | 746 | |
726 | -EXPORT_SYMBOL(__kill_fasync); | |
727 | - | |
728 | 747 | void kill_fasync(struct fasync_struct **fp, int sig, int band) |
729 | 748 | { |
730 | 749 | /* First a quick test without locking: usually |
731 | 750 | * the list is empty. |
732 | 751 | */ |
733 | 752 | if (*fp) { |
734 | - read_lock(&fasync_lock); | |
735 | - /* reread *fp after obtaining the lock */ | |
736 | - __kill_fasync(*fp, sig, band); | |
737 | - read_unlock(&fasync_lock); | |
753 | + rcu_read_lock(); | |
754 | + kill_fasync_rcu(rcu_dereference(*fp), sig, band); | |
755 | + rcu_read_unlock(); | |
738 | 756 | } |
739 | 757 | } |
740 | 758 | EXPORT_SYMBOL(kill_fasync); |
include/linux/fs.h
... | ... | @@ -1280,10 +1280,12 @@ |
1280 | 1280 | |
1281 | 1281 | |
1282 | 1282 | struct fasync_struct { |
1283 | - int magic; | |
1284 | - int fa_fd; | |
1285 | - struct fasync_struct *fa_next; /* singly linked list */ | |
1286 | - struct file *fa_file; | |
1283 | + spinlock_t fa_lock; | |
1284 | + int magic; | |
1285 | + int fa_fd; | |
1286 | + struct fasync_struct *fa_next; /* singly linked list */ | |
1287 | + struct file *fa_file; | |
1288 | + struct rcu_head fa_rcu; | |
1287 | 1289 | }; |
1288 | 1290 | |
1289 | 1291 | #define FASYNC_MAGIC 0x4601 |
... | ... | @@ -1292,8 +1294,6 @@ |
1292 | 1294 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); |
1293 | 1295 | /* can be called from interrupts */ |
1294 | 1296 | extern void kill_fasync(struct fasync_struct **, int, int); |
1295 | -/* only for net: no internal synchronization */ | |
1296 | -extern void __kill_fasync(struct fasync_struct *, int, int); | |
1297 | 1297 | |
1298 | 1298 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); |
1299 | 1299 | extern int f_setown(struct file *filp, unsigned long arg, int force); |
net/socket.c
... | ... | @@ -1067,78 +1067,27 @@ |
1067 | 1067 | * 1. fasync_list is modified only under process context socket lock |
1068 | 1068 | * i.e. under semaphore. |
1069 | 1069 | * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) |
1070 | - * or under socket lock. | |
1071 | - * 3. fasync_list can be used from softirq context, so that | |
1072 | - * modification under socket lock have to be enhanced with | |
1073 | - * write_lock_bh(&sk->sk_callback_lock). | |
1074 | - * --ANK (990710) | |
1070 | + * or under socket lock | |
1075 | 1071 | */ |
1076 | 1072 | |
1077 | 1073 | static int sock_fasync(int fd, struct file *filp, int on) |
1078 | 1074 | { |
1079 | - struct fasync_struct *fa, *fna = NULL, **prev; | |
1080 | - struct socket *sock; | |
1081 | - struct sock *sk; | |
1075 | + struct socket *sock = filp->private_data; | |
1076 | + struct sock *sk = sock->sk; | |
1082 | 1077 | |
1083 | - if (on) { | |
1084 | - fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); | |
1085 | - if (fna == NULL) | |
1086 | - return -ENOMEM; | |
1087 | - } | |
1088 | - | |
1089 | - sock = filp->private_data; | |
1090 | - | |
1091 | - sk = sock->sk; | |
1092 | - if (sk == NULL) { | |
1093 | - kfree(fna); | |
1078 | + if (sk == NULL) | |
1094 | 1079 | return -EINVAL; |
1095 | - } | |
1096 | 1080 | |
1097 | 1081 | lock_sock(sk); |
1098 | 1082 | |
1099 | - spin_lock(&filp->f_lock); | |
1100 | - if (on) | |
1101 | - filp->f_flags |= FASYNC; | |
1102 | - else | |
1103 | - filp->f_flags &= ~FASYNC; | |
1104 | - spin_unlock(&filp->f_lock); | |
1083 | + fasync_helper(fd, filp, on, &sock->fasync_list); | |
1105 | 1084 | |
1106 | - prev = &(sock->fasync_list); | |
1107 | - | |
1108 | - for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) | |
1109 | - if (fa->fa_file == filp) | |
1110 | - break; | |
1111 | - | |
1112 | - if (on) { | |
1113 | - if (fa != NULL) { | |
1114 | - write_lock_bh(&sk->sk_callback_lock); | |
1115 | - fa->fa_fd = fd; | |
1116 | - write_unlock_bh(&sk->sk_callback_lock); | |
1117 | - | |
1118 | - kfree(fna); | |
1119 | - goto out; | |
1120 | - } | |
1121 | - fna->fa_file = filp; | |
1122 | - fna->fa_fd = fd; | |
1123 | - fna->magic = FASYNC_MAGIC; | |
1124 | - fna->fa_next = sock->fasync_list; | |
1125 | - write_lock_bh(&sk->sk_callback_lock); | |
1126 | - sock->fasync_list = fna; | |
1085 | + if (!sock->fasync_list) | |
1086 | + sock_reset_flag(sk, SOCK_FASYNC); | |
1087 | + else | |
1127 | 1088 | sock_set_flag(sk, SOCK_FASYNC); |
1128 | - write_unlock_bh(&sk->sk_callback_lock); | |
1129 | - } else { | |
1130 | - if (fa != NULL) { | |
1131 | - write_lock_bh(&sk->sk_callback_lock); | |
1132 | - *prev = fa->fa_next; | |
1133 | - if (!sock->fasync_list) | |
1134 | - sock_reset_flag(sk, SOCK_FASYNC); | |
1135 | - write_unlock_bh(&sk->sk_callback_lock); | |
1136 | - kfree(fa); | |
1137 | - } | |
1138 | - } | |
1139 | 1089 | |
1140 | -out: | |
1141 | - release_sock(sock->sk); | |
1090 | + release_sock(sk); | |
1142 | 1091 | return 0; |
1143 | 1092 | } |
1144 | 1093 | |
1145 | 1094 | |
... | ... | @@ -1159,10 +1108,10 @@ |
1159 | 1108 | /* fall through */ |
1160 | 1109 | case SOCK_WAKE_IO: |
1161 | 1110 | call_kill: |
1162 | - __kill_fasync(sock->fasync_list, SIGIO, band); | |
1111 | + kill_fasync(&sock->fasync_list, SIGIO, band); | |
1163 | 1112 | break; |
1164 | 1113 | case SOCK_WAKE_URG: |
1165 | - __kill_fasync(sock->fasync_list, SIGURG, band); | |
1114 | + kill_fasync(&sock->fasync_list, SIGURG, band); | |
1166 | 1115 | } |
1167 | 1116 | return 0; |
1168 | 1117 | } |