Commit 75c78500ddad74b229cd0691496b8549490496a2

Authored by Moni Shoua
Committed by David S. Miller
1 parent 481a819914

bonding: remap muticast addresses without using dev_close() and dev_open()

This patch fixes commit e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10. The approach
there is to call dev_close()/dev_open() whenever the device type is changed in
order to remap the device IP multicast addresses to HW multicast addresses.
This approach suffers from 2 drawbacks:

*. It assumes tha the device is UP when calling dev_close(), or otherwise
   dev_close() has no affect. It is worth to mention that initscripts (Redhat)
   and sysconfig (Suse) doesn't act the same in this matter.
*. dev_close() has other side affects, like deleting entries from the routing
   table, which might be unnecessary.

The fix here is to directly remap the IP multicast addresses to HW multicast
addresses for a bonding device that changes its type, and nothing else.

Reported-by:   Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Moni Shoua <monis@voltaire.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 10 changed files with 82 additions and 6 deletions Side-by-side Diff

drivers/net/bonding/bond_main.c
... ... @@ -1211,7 +1211,7 @@
1211 1211 write_unlock_bh(&bond->curr_slave_lock);
1212 1212 read_unlock(&bond->lock);
1213 1213  
1214   - netdev_bonding_change(bond->dev);
  1214 + netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
1215 1215  
1216 1216 read_lock(&bond->lock);
1217 1217 write_lock_bh(&bond->curr_slave_lock);
1218 1218  
1219 1219  
... ... @@ -1469,14 +1469,17 @@
1469 1469 */
1470 1470 if (bond->slave_cnt == 0) {
1471 1471 if (bond_dev->type != slave_dev->type) {
1472   - dev_close(bond_dev);
1473 1472 pr_debug("%s: change device type from %d to %d\n",
1474 1473 bond_dev->name, bond_dev->type, slave_dev->type);
  1474 +
  1475 + netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
  1476 +
1475 1477 if (slave_dev->type != ARPHRD_ETHER)
1476 1478 bond_setup_by_slave(bond_dev, slave_dev);
1477 1479 else
1478 1480 ether_setup(bond_dev);
1479   - dev_open(bond_dev);
  1481 +
  1482 + netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
1480 1483 }
1481 1484 } else if (bond_dev->type != slave_dev->type) {
1482 1485 pr_err(DRV_NAME ": %s ether type (%d) is different "
include/linux/igmp.h
... ... @@ -233,6 +233,8 @@
233 233 extern void ip_mc_destroy_dev(struct in_device *);
234 234 extern void ip_mc_up(struct in_device *);
235 235 extern void ip_mc_down(struct in_device *);
  236 +extern void ip_mc_unmap(struct in_device *);
  237 +extern void ip_mc_remap(struct in_device *);
236 238 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
237 239 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
238 240 extern void ip_mc_rejoin_group(struct ip_mc_list *im);
include/linux/netdevice.h
... ... @@ -1873,7 +1873,8 @@
1873 1873 extern int dev_set_promiscuity(struct net_device *dev, int inc);
1874 1874 extern int dev_set_allmulti(struct net_device *dev, int inc);
1875 1875 extern void netdev_state_change(struct net_device *dev);
1876   -extern void netdev_bonding_change(struct net_device *dev);
  1876 +extern void netdev_bonding_change(struct net_device *dev,
  1877 + unsigned long event);
1877 1878 extern void netdev_features_change(struct net_device *dev);
1878 1879 /* Load a device via the kmod */
1879 1880 extern void dev_load(struct net *net, const char *name);
include/linux/notifier.h
... ... @@ -199,6 +199,8 @@
199 199 #define NETDEV_FEAT_CHANGE 0x000B
200 200 #define NETDEV_BONDING_FAILOVER 0x000C
201 201 #define NETDEV_PRE_UP 0x000D
  202 +#define NETDEV_BONDING_OLDTYPE 0x000E
  203 +#define NETDEV_BONDING_NEWTYPE 0x000F
202 204  
203 205 #define SYS_DOWN 0x0001 /* Notify of system down */
204 206 #define SYS_RESTART SYS_DOWN
include/net/addrconf.h
... ... @@ -143,6 +143,8 @@
143 143 extern int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr);
144 144 extern void ipv6_mc_up(struct inet6_dev *idev);
145 145 extern void ipv6_mc_down(struct inet6_dev *idev);
  146 +extern void ipv6_mc_unmap(struct inet6_dev *idev);
  147 +extern void ipv6_mc_remap(struct inet6_dev *idev);
146 148 extern void ipv6_mc_init_dev(struct inet6_dev *idev);
147 149 extern void ipv6_mc_destroy_dev(struct inet6_dev *idev);
148 150 extern void addrconf_dad_failure(struct inet6_ifaddr *ifp);
... ... @@ -1017,9 +1017,9 @@
1017 1017 }
1018 1018 EXPORT_SYMBOL(netdev_state_change);
1019 1019  
1020   -void netdev_bonding_change(struct net_device *dev)
  1020 +void netdev_bonding_change(struct net_device *dev, unsigned long event)
1021 1021 {
1022   - call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
  1022 + call_netdevice_notifiers(event, dev);
1023 1023 }
1024 1024 EXPORT_SYMBOL(netdev_bonding_change);
1025 1025  
... ... @@ -1087,6 +1087,12 @@
1087 1087 case NETDEV_DOWN:
1088 1088 ip_mc_down(in_dev);
1089 1089 break;
  1090 + case NETDEV_BONDING_OLDTYPE:
  1091 + ip_mc_unmap(in_dev);
  1092 + break;
  1093 + case NETDEV_BONDING_NEWTYPE:
  1094 + ip_mc_remap(in_dev);
  1095 + break;
1090 1096 case NETDEV_CHANGEMTU:
1091 1097 if (inetdev_valid_mtu(dev->mtu))
1092 1098 break;
... ... @@ -1298,6 +1298,28 @@
1298 1298 }
1299 1299 }
1300 1300  
  1301 +/* Device changing type */
  1302 +
  1303 +void ip_mc_unmap(struct in_device *in_dev)
  1304 +{
  1305 + struct ip_mc_list *i;
  1306 +
  1307 + ASSERT_RTNL();
  1308 +
  1309 + for (i = in_dev->mc_list; i; i = i->next)
  1310 + igmp_group_dropped(i);
  1311 +}
  1312 +
  1313 +void ip_mc_remap(struct in_device *in_dev)
  1314 +{
  1315 + struct ip_mc_list *i;
  1316 +
  1317 + ASSERT_RTNL();
  1318 +
  1319 + for (i = in_dev->mc_list; i; i = i->next)
  1320 + igmp_group_added(i);
  1321 +}
  1322 +
1301 1323 /* Device going down */
1302 1324  
1303 1325 void ip_mc_down(struct in_device *in_dev)
... ... @@ -137,6 +137,8 @@
137 137 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
138 138 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
139 139  
  140 +static void addrconf_bonding_change(struct net_device *dev,
  141 + unsigned long event);
140 142 static int addrconf_ifdown(struct net_device *dev, int how);
141 143  
142 144 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
... ... @@ -2582,6 +2584,10 @@
2582 2584 return notifier_from_errno(err);
2583 2585 }
2584 2586 break;
  2587 + case NETDEV_BONDING_OLDTYPE:
  2588 + case NETDEV_BONDING_NEWTYPE:
  2589 + addrconf_bonding_change(dev, event);
  2590 + break;
2585 2591 }
2586 2592  
2587 2593 return NOTIFY_OK;
... ... @@ -2594,6 +2600,19 @@
2594 2600 .notifier_call = addrconf_notify,
2595 2601 .priority = 0
2596 2602 };
  2603 +
  2604 +static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
  2605 +{
  2606 + struct inet6_dev *idev;
  2607 + ASSERT_RTNL();
  2608 +
  2609 + idev = __in6_dev_get(dev);
  2610 +
  2611 + if (event == NETDEV_BONDING_NEWTYPE)
  2612 + ipv6_mc_remap(idev);
  2613 + else if (event == NETDEV_BONDING_OLDTYPE)
  2614 + ipv6_mc_unmap(idev);
  2615 +}
2597 2616  
2598 2617 static int addrconf_ifdown(struct net_device *dev, int how)
2599 2618 {
... ... @@ -2249,6 +2249,25 @@
2249 2249 ma_put(ma);
2250 2250 }
2251 2251  
  2252 +/* Device changing type */
  2253 +
  2254 +void ipv6_mc_unmap(struct inet6_dev *idev)
  2255 +{
  2256 + struct ifmcaddr6 *i;
  2257 +
  2258 + /* Install multicast list, except for all-nodes (already installed) */
  2259 +
  2260 + read_lock_bh(&idev->lock);
  2261 + for (i = idev->mc_list; i; i = i->next)
  2262 + igmp6_group_dropped(i);
  2263 + read_unlock_bh(&idev->lock);
  2264 +}
  2265 +
  2266 +void ipv6_mc_remap(struct inet6_dev *idev)
  2267 +{
  2268 + ipv6_mc_up(idev);
  2269 +}
  2270 +
2252 2271 /* Device going down */
2253 2272  
2254 2273 void ipv6_mc_down(struct inet6_dev *idev)