Commit c3ade5cad07f4d67f2e16a28f3c73d9483a55e0e
Committed by
Jeff Garzik
1 parent
12755c16a9
Exists in
master
and in
4 other branches
bonding: gratuitous ARP
Add support for generating gratuitous ARPs in bonding active-backup mode when failovers occur. Includes support for VLAN tagging the ARPs as needed. Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Showing 2 changed files with 255 additions and 34 deletions Side-by-side Diff
drivers/net/bonding/bond_main.c
... | ... | @@ -475,7 +475,10 @@ |
475 | 475 | * Solution is to move call to dev_remove_pack outside of the |
476 | 476 | * spinlock. |
477 | 477 | * Set version to 2.6.1. |
478 | - * | |
478 | + * 2005/06/05 - Jay Vosburgh <fubar@us.ibm.com> | |
479 | + * - Support for generating gratuitous ARPs in active-backup mode. | |
480 | + * Includes support for VLAN tagging all bonding-generated ARPs | |
481 | + * as needed. Set version to 2.6.2. | |
479 | 482 | */ |
480 | 483 | |
481 | 484 | //#define BONDING_DEBUG 1 |
... | ... | @@ -519,6 +522,7 @@ |
519 | 522 | #include <linux/ethtool.h> |
520 | 523 | #include <linux/if_vlan.h> |
521 | 524 | #include <linux/if_bonding.h> |
525 | +#include <net/route.h> | |
522 | 526 | #include "bonding.h" |
523 | 527 | #include "bond_3ad.h" |
524 | 528 | #include "bond_alb.h" |
... | ... | @@ -574,7 +578,6 @@ |
574 | 578 | |
575 | 579 | static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; |
576 | 580 | static int arp_ip_count = 0; |
577 | -static u32 my_ip = 0; | |
578 | 581 | static int bond_mode = BOND_MODE_ROUNDROBIN; |
579 | 582 | static int lacp_fast = 0; |
580 | 583 | static int app_abi_ver = 0; |
... | ... | @@ -611,6 +614,7 @@ |
611 | 614 | /*-------------------------- Forward declarations ---------------------------*/ |
612 | 615 | |
613 | 616 | static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode); |
617 | +static void bond_send_gratuitous_arp(struct bonding *bond); | |
614 | 618 | |
615 | 619 | /*---------------------------- General routines -----------------------------*/ |
616 | 620 | |
... | ... | @@ -659,6 +663,7 @@ |
659 | 663 | |
660 | 664 | INIT_LIST_HEAD(&vlan->vlan_list); |
661 | 665 | vlan->vlan_id = vlan_id; |
666 | + vlan->vlan_ip = 0; | |
662 | 667 | |
663 | 668 | write_lock_bh(&bond->lock); |
664 | 669 | |
... | ... | @@ -1468,16 +1473,6 @@ |
1468 | 1473 | } |
1469 | 1474 | } |
1470 | 1475 | |
1471 | - if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { | |
1472 | - if (old_active) { | |
1473 | - bond_set_slave_inactive_flags(old_active); | |
1474 | - } | |
1475 | - | |
1476 | - if (new_active) { | |
1477 | - bond_set_slave_active_flags(new_active); | |
1478 | - } | |
1479 | - } | |
1480 | - | |
1481 | 1476 | if (USES_PRIMARY(bond->params.mode)) { |
1482 | 1477 | bond_mc_swap(bond, new_active, old_active); |
1483 | 1478 | } |
... | ... | @@ -1488,6 +1483,17 @@ |
1488 | 1483 | } else { |
1489 | 1484 | bond->curr_active_slave = new_active; |
1490 | 1485 | } |
1486 | + | |
1487 | + if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { | |
1488 | + if (old_active) { | |
1489 | + bond_set_slave_inactive_flags(old_active); | |
1490 | + } | |
1491 | + | |
1492 | + if (new_active) { | |
1493 | + bond_set_slave_active_flags(new_active); | |
1494 | + } | |
1495 | + bond_send_gratuitous_arp(bond); | |
1496 | + } | |
1491 | 1497 | } |
1492 | 1498 | |
1493 | 1499 | /** |
1494 | 1500 | |
1495 | 1501 | |
1496 | 1502 | |
1497 | 1503 | |
... | ... | @@ -2694,19 +2700,184 @@ |
2694 | 2700 | read_unlock(&bond->lock); |
2695 | 2701 | } |
2696 | 2702 | |
2703 | + | |
2704 | +static u32 bond_glean_dev_ip(struct net_device *dev) | |
2705 | +{ | |
2706 | + struct in_device *idev; | |
2707 | + struct in_ifaddr *ifa; | |
2708 | + u32 addr = 0; | |
2709 | + | |
2710 | + if (!dev) | |
2711 | + return 0; | |
2712 | + | |
2713 | + rcu_read_lock(); | |
2714 | + idev = __in_dev_get(dev); | |
2715 | + if (!idev) | |
2716 | + goto out; | |
2717 | + | |
2718 | + ifa = idev->ifa_list; | |
2719 | + if (!ifa) | |
2720 | + goto out; | |
2721 | + | |
2722 | + addr = ifa->ifa_local; | |
2723 | +out: | |
2724 | + rcu_read_unlock(); | |
2725 | + return addr; | |
2726 | +} | |
2727 | + | |
2728 | +static int bond_has_ip(struct bonding *bond) | |
2729 | +{ | |
2730 | + struct vlan_entry *vlan, *vlan_next; | |
2731 | + | |
2732 | + if (bond->master_ip) | |
2733 | + return 1; | |
2734 | + | |
2735 | + if (list_empty(&bond->vlan_list)) | |
2736 | + return 0; | |
2737 | + | |
2738 | + list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, | |
2739 | + vlan_list) { | |
2740 | + if (vlan->vlan_ip) | |
2741 | + return 1; | |
2742 | + } | |
2743 | + | |
2744 | + return 0; | |
2745 | +} | |
2746 | + | |
2747 | +/* | |
2748 | + * We go to the (large) trouble of VLAN tagging ARP frames because | |
2749 | + * switches in VLAN mode (especially if ports are configured as | |
2750 | + * "native" to a VLAN) might not pass non-tagged frames. | |
2751 | + */ | |
2752 | +static void bond_arp_send(struct net_device *slave_dev, int arp_op, u32 dest_ip, u32 src_ip, unsigned short vlan_id) | |
2753 | +{ | |
2754 | + struct sk_buff *skb; | |
2755 | + | |
2756 | + dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, | |
2757 | + slave_dev->name, dest_ip, src_ip, vlan_id); | |
2758 | + | |
2759 | + skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, | |
2760 | + NULL, slave_dev->dev_addr, NULL); | |
2761 | + | |
2762 | + if (!skb) { | |
2763 | + printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n"); | |
2764 | + return; | |
2765 | + } | |
2766 | + if (vlan_id) { | |
2767 | + skb = vlan_put_tag(skb, vlan_id); | |
2768 | + if (!skb) { | |
2769 | + printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); | |
2770 | + return; | |
2771 | + } | |
2772 | + } | |
2773 | + arp_xmit(skb); | |
2774 | +} | |
2775 | + | |
2776 | + | |
2697 | 2777 | static void bond_arp_send_all(struct bonding *bond, struct slave *slave) |
2698 | 2778 | { |
2699 | - int i; | |
2779 | + int i, vlan_id, rv; | |
2700 | 2780 | u32 *targets = bond->params.arp_targets; |
2781 | + struct vlan_entry *vlan, *vlan_next; | |
2782 | + struct net_device *vlan_dev; | |
2783 | + struct flowi fl; | |
2784 | + struct rtable *rt; | |
2701 | 2785 | |
2702 | 2786 | for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { |
2703 | - arp_send(ARPOP_REQUEST, ETH_P_ARP, targets[i], slave->dev, | |
2704 | - my_ip, NULL, slave->dev->dev_addr, | |
2705 | - NULL); | |
2787 | + dprintk("basa: target %x\n", targets[i]); | |
2788 | + if (list_empty(&bond->vlan_list)) { | |
2789 | + dprintk("basa: empty vlan: arp_send\n"); | |
2790 | + bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], | |
2791 | + bond->master_ip, 0); | |
2792 | + continue; | |
2793 | + } | |
2794 | + | |
2795 | + /* | |
2796 | + * If VLANs are configured, we do a route lookup to | |
2797 | + * determine which VLAN interface would be used, so we | |
2798 | + * can tag the ARP with the proper VLAN tag. | |
2799 | + */ | |
2800 | + memset(&fl, 0, sizeof(fl)); | |
2801 | + fl.fl4_dst = targets[i]; | |
2802 | + fl.fl4_tos = RTO_ONLINK; | |
2803 | + | |
2804 | + rv = ip_route_output_key(&rt, &fl); | |
2805 | + if (rv) { | |
2806 | + if (net_ratelimit()) { | |
2807 | + printk(KERN_WARNING DRV_NAME | |
2808 | + ": %s: no route to arp_ip_target %u.%u.%u.%u\n", | |
2809 | + bond->dev->name, NIPQUAD(fl.fl4_dst)); | |
2810 | + } | |
2811 | + continue; | |
2812 | + } | |
2813 | + | |
2814 | + /* | |
2815 | + * This target is not on a VLAN | |
2816 | + */ | |
2817 | + if (rt->u.dst.dev == bond->dev) { | |
2818 | + dprintk("basa: rtdev == bond->dev: arp_send\n"); | |
2819 | + bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], | |
2820 | + bond->master_ip, 0); | |
2821 | + continue; | |
2822 | + } | |
2823 | + | |
2824 | + vlan_id = 0; | |
2825 | + list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, | |
2826 | + vlan_list) { | |
2827 | + vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; | |
2828 | + if (vlan_dev == rt->u.dst.dev) { | |
2829 | + vlan_id = vlan->vlan_id; | |
2830 | + dprintk("basa: vlan match on %s %d\n", | |
2831 | + vlan_dev->name, vlan_id); | |
2832 | + break; | |
2833 | + } | |
2834 | + } | |
2835 | + | |
2836 | + if (vlan_id) { | |
2837 | + bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], | |
2838 | + vlan->vlan_ip, vlan_id); | |
2839 | + continue; | |
2840 | + } | |
2841 | + | |
2842 | + if (net_ratelimit()) { | |
2843 | + printk(KERN_WARNING DRV_NAME | |
2844 | + ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n", | |
2845 | + bond->dev->name, NIPQUAD(fl.fl4_dst), | |
2846 | + rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); | |
2847 | + } | |
2706 | 2848 | } |
2707 | 2849 | } |
2708 | 2850 | |
2709 | 2851 | /* |
2852 | + * Kick out a gratuitous ARP for an IP on the bonding master plus one | |
2853 | + * for each VLAN above us. | |
2854 | + */ | |
2855 | +static void bond_send_gratuitous_arp(struct bonding *bond) | |
2856 | +{ | |
2857 | + struct slave *slave = bond->curr_active_slave; | |
2858 | + struct vlan_entry *vlan; | |
2859 | + struct net_device *vlan_dev; | |
2860 | + | |
2861 | + dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, | |
2862 | + slave ? slave->dev->name : "NULL"); | |
2863 | + if (!slave) | |
2864 | + return; | |
2865 | + | |
2866 | + if (bond->master_ip) { | |
2867 | + bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, | |
2868 | + bond->master_ip, 0); | |
2869 | + } | |
2870 | + | |
2871 | + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { | |
2872 | + vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; | |
2873 | + if (vlan->vlan_ip) { | |
2874 | + bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, | |
2875 | + vlan->vlan_ip, vlan->vlan_id); | |
2876 | + } | |
2877 | + } | |
2878 | +} | |
2879 | + | |
2880 | +/* | |
2710 | 2881 | * this function is called regularly to monitor each slave's link |
2711 | 2882 | * ensuring that traffic is being sent and received when arp monitoring |
2712 | 2883 | * is used in load-balancing mode. if the adapter has been dormant, then an |
... | ... | @@ -2781,7 +2952,7 @@ |
2781 | 2952 | */ |
2782 | 2953 | if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || |
2783 | 2954 | (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && |
2784 | - my_ip)) { | |
2955 | + bond_has_ip(bond))) { | |
2785 | 2956 | |
2786 | 2957 | slave->link = BOND_LINK_DOWN; |
2787 | 2958 | slave->state = BOND_STATE_BACKUP; |
... | ... | @@ -2920,7 +3091,7 @@ |
2920 | 3091 | if ((slave != bond->curr_active_slave) && |
2921 | 3092 | (!bond->current_arp_slave) && |
2922 | 3093 | (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) && |
2923 | - my_ip)) { | |
3094 | + bond_has_ip(bond))) { | |
2924 | 3095 | /* a backup slave has gone down; three times |
2925 | 3096 | * the delta allows the current slave to be |
2926 | 3097 | * taken out before the backup slave. |
... | ... | @@ -2966,8 +3137,8 @@ |
2966 | 3137 | * if it is up and needs to take over as the curr_active_slave |
2967 | 3138 | */ |
2968 | 3139 | if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || |
2969 | - (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && | |
2970 | - my_ip)) && | |
3140 | + (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && | |
3141 | + bond_has_ip(bond))) && | |
2971 | 3142 | ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { |
2972 | 3143 | |
2973 | 3144 | slave->link = BOND_LINK_DOWN; |
... | ... | @@ -3019,7 +3190,7 @@ |
3019 | 3190 | /* the current slave must tx an arp to ensure backup slaves |
3020 | 3191 | * rx traffic |
3021 | 3192 | */ |
3022 | - if (slave && my_ip) { | |
3193 | + if (slave && bond_has_ip(bond)) { | |
3023 | 3194 | bond_arp_send_all(bond, slave); |
3024 | 3195 | } |
3025 | 3196 | } |
3026 | 3197 | |
... | ... | @@ -3471,10 +3642,67 @@ |
3471 | 3642 | return NOTIFY_DONE; |
3472 | 3643 | } |
3473 | 3644 | |
3645 | +/* | |
3646 | + * bond_inetaddr_event: handle inetaddr notifier chain events. | |
3647 | + * | |
3648 | + * We keep track of device IPs primarily to use as source addresses in | |
3649 | + * ARP monitor probes (rather than spewing out broadcasts all the time). | |
3650 | + * | |
3651 | + * We track one IP for the main device (if it has one), plus one per VLAN. | |
3652 | + */ | |
3653 | +static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) | |
3654 | +{ | |
3655 | + struct in_ifaddr *ifa = ptr; | |
3656 | + struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; | |
3657 | + struct bonding *bond, *bond_next; | |
3658 | + struct vlan_entry *vlan, *vlan_next; | |
3659 | + | |
3660 | + list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { | |
3661 | + if (bond->dev == event_dev) { | |
3662 | + switch (event) { | |
3663 | + case NETDEV_UP: | |
3664 | + bond->master_ip = ifa->ifa_local; | |
3665 | + return NOTIFY_OK; | |
3666 | + case NETDEV_DOWN: | |
3667 | + bond->master_ip = bond_glean_dev_ip(bond->dev); | |
3668 | + return NOTIFY_OK; | |
3669 | + default: | |
3670 | + return NOTIFY_DONE; | |
3671 | + } | |
3672 | + } | |
3673 | + | |
3674 | + if (list_empty(&bond->vlan_list)) | |
3675 | + continue; | |
3676 | + | |
3677 | + list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, | |
3678 | + vlan_list) { | |
3679 | + vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; | |
3680 | + if (vlan_dev == event_dev) { | |
3681 | + switch (event) { | |
3682 | + case NETDEV_UP: | |
3683 | + vlan->vlan_ip = ifa->ifa_local; | |
3684 | + return NOTIFY_OK; | |
3685 | + case NETDEV_DOWN: | |
3686 | + vlan->vlan_ip = | |
3687 | + bond_glean_dev_ip(vlan_dev); | |
3688 | + return NOTIFY_OK; | |
3689 | + default: | |
3690 | + return NOTIFY_DONE; | |
3691 | + } | |
3692 | + } | |
3693 | + } | |
3694 | + } | |
3695 | + return NOTIFY_DONE; | |
3696 | +} | |
3697 | + | |
3474 | 3698 | static struct notifier_block bond_netdev_notifier = { |
3475 | 3699 | .notifier_call = bond_netdev_event, |
3476 | 3700 | }; |
3477 | 3701 | |
3702 | +static struct notifier_block bond_inetaddr_notifier = { | |
3703 | + .notifier_call = bond_inetaddr_event, | |
3704 | +}; | |
3705 | + | |
3478 | 3706 | /*-------------------------- Packet type handling ---------------------------*/ |
3479 | 3707 | |
3480 | 3708 | /* register to receive lacpdus on a bond */ |
... | ... | @@ -4060,17 +4288,6 @@ |
4060 | 4288 | struct bonding *bond = bond_dev->priv; |
4061 | 4289 | int res = 1; |
4062 | 4290 | |
4063 | - /* if we are sending arp packets, try to at least | |
4064 | - identify our own ip address */ | |
4065 | - if (bond->params.arp_interval && !my_ip && | |
4066 | - (skb->protocol == __constant_htons(ETH_P_ARP))) { | |
4067 | - char *the_ip = (char *)skb->data + | |
4068 | - sizeof(struct ethhdr) + | |
4069 | - sizeof(struct arphdr) + | |
4070 | - ETH_ALEN; | |
4071 | - memcpy(&my_ip, the_ip, 4); | |
4072 | - } | |
4073 | - | |
4074 | 4291 | read_lock(&bond->lock); |
4075 | 4292 | read_lock(&bond->curr_slave_lock); |
4076 | 4293 | |
... | ... | @@ -4669,6 +4886,7 @@ |
4669 | 4886 | |
4670 | 4887 | rtnl_unlock(); |
4671 | 4888 | register_netdevice_notifier(&bond_netdev_notifier); |
4889 | + register_inetaddr_notifier(&bond_inetaddr_notifier); | |
4672 | 4890 | |
4673 | 4891 | return 0; |
4674 | 4892 | |
... | ... | @@ -4684,6 +4902,7 @@ |
4684 | 4902 | static void __exit bonding_exit(void) |
4685 | 4903 | { |
4686 | 4904 | unregister_netdevice_notifier(&bond_netdev_notifier); |
4905 | + unregister_inetaddr_notifier(&bond_inetaddr_notifier); | |
4687 | 4906 | |
4688 | 4907 | rtnl_lock(); |
4689 | 4908 | bond_free_all(); |
drivers/net/bonding/bonding.h
... | ... | @@ -36,8 +36,8 @@ |
36 | 36 | #include "bond_3ad.h" |
37 | 37 | #include "bond_alb.h" |
38 | 38 | |
39 | -#define DRV_VERSION "2.6.1" | |
40 | -#define DRV_RELDATE "October 29, 2004" | |
39 | +#define DRV_VERSION "2.6.2" | |
40 | +#define DRV_RELDATE "June 5, 2005" | |
41 | 41 | #define DRV_NAME "bonding" |
42 | 42 | #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" |
43 | 43 | |
... | ... | @@ -149,6 +149,7 @@ |
149 | 149 | |
150 | 150 | struct vlan_entry { |
151 | 151 | struct list_head vlan_list; |
152 | + u32 vlan_ip; | |
152 | 153 | unsigned short vlan_id; |
153 | 154 | }; |
154 | 155 | |
... | ... | @@ -197,6 +198,7 @@ |
197 | 198 | #endif /* CONFIG_PROC_FS */ |
198 | 199 | struct list_head bond_list; |
199 | 200 | struct dev_mc_list *mc_list; |
201 | + u32 master_ip; | |
200 | 202 | u16 flags; |
201 | 203 | struct ad_bond_info ad_info; |
202 | 204 | struct alb_bond_info alb_info; |