All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
@ 2011-04-15 23:47 Ben Hutchings
  2011-04-16  0:30 ` Jay Vosburgh
  0 siblings, 1 reply; 10+ messages in thread
From: Ben Hutchings @ 2011-04-15 23:47 UTC (permalink / raw)
  To: David Miller, Jay Vosburgh, Andy Gospodarek, Patrick McHardy; +Cc: netdev

It is undesirable for the bonding driver to be poking into higher
level protocols, and notifiers provide a way to avoid that.  This does
mean removing the ability to configure reptitition of gratuitous ARPs
and unsolicited NAs.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/bonding/Makefile     |    3 -
 drivers/net/bonding/bond_ipv6.c  |  225 --------------------------------------
 drivers/net/bonding/bond_main.c  |   96 ----------------
 drivers/net/bonding/bond_sysfs.c |   80 --------------
 drivers/net/bonding/bonding.h    |   29 -----
 net/8021q/vlan.c                 |    3 +-
 net/ipv4/devinet.c               |    1 +
 net/ipv6/ndisc.c                 |    1 +
 8 files changed, 4 insertions(+), 434 deletions(-)
 delete mode 100644 drivers/net/bonding/bond_ipv6.c

diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
index 3c5c014..4c21bf6 100644
--- a/drivers/net/bonding/Makefile
+++ b/drivers/net/bonding/Makefile
@@ -9,6 +9,3 @@ bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o
 proc-$(CONFIG_PROC_FS) += bond_procfs.o
 bonding-objs += $(proc-y)
 
-ipv6-$(subst m,y,$(CONFIG_IPV6)) += bond_ipv6.o
-bonding-objs += $(ipv6-y)
-
diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
deleted file mode 100644
index 84fbd4e..0000000
--- a/drivers/net/bonding/bond_ipv6.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright(c) 2008 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called LICENSE.
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/types.h>
-#include <linux/if_vlan.h>
-#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/addrconf.h>
-#include <net/netns/generic.h>
-#include "bonding.h"
-
-/*
- * Assign bond->master_ipv6 to the next IPv6 address in the list, or
- * zero it out if there are none.
- */
-static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr)
-{
-	struct inet6_dev *idev;
-
-	if (!dev)
-		return;
-
-	idev = in6_dev_get(dev);
-	if (!idev)
-		return;
-
-	read_lock_bh(&idev->lock);
-	if (!list_empty(&idev->addr_list)) {
-		struct inet6_ifaddr *ifa
-			= list_first_entry(&idev->addr_list,
-					   struct inet6_ifaddr, if_list);
-		ipv6_addr_copy(addr, &ifa->addr);
-	} else
-		ipv6_addr_set(addr, 0, 0, 0, 0);
-
-	read_unlock_bh(&idev->lock);
-
-	in6_dev_put(idev);
-}
-
-static void bond_na_send(struct net_device *slave_dev,
-			 struct in6_addr *daddr,
-			 int router,
-			 unsigned short vlan_id)
-{
-	struct in6_addr mcaddr;
-	struct icmp6hdr icmp6h = {
-		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
-	};
-	struct sk_buff *skb;
-
-	icmp6h.icmp6_router = router;
-	icmp6h.icmp6_solicited = 0;
-	icmp6h.icmp6_override = 1;
-
-	addrconf_addr_solict_mult(daddr, &mcaddr);
-
-	pr_debug("ipv6 na on slave %s: dest %pI6, src %pI6\n",
-		 slave_dev->name, &mcaddr, daddr);
-
-	skb = ndisc_build_skb(slave_dev, &mcaddr, daddr, &icmp6h, daddr,
-			      ND_OPT_TARGET_LL_ADDR);
-
-	if (!skb) {
-		pr_err("NA packet allocation failed\n");
-		return;
-	}
-
-	if (vlan_id) {
-		/* The Ethernet header is not present yet, so it is
-		 * too early to insert a VLAN tag.  Force use of an
-		 * out-of-line tag here and let dev_hard_start_xmit()
-		 * insert it if the slave hardware can't.
-		 */
-		skb = __vlan_hwaccel_put_tag(skb, vlan_id);
-		if (!skb) {
-			pr_err("failed to insert VLAN tag\n");
-			return;
-		}
-	}
-
-	ndisc_send_skb(skb, slave_dev, NULL, &mcaddr, daddr, &icmp6h);
-}
-
-/*
- * Kick out an unsolicited Neighbor Advertisement for an IPv6 address on
- * the bonding master.  This will help the switch learn our address
- * if in active-backup mode.
- *
- * Caller must hold curr_slave_lock for read or better
- */
-void bond_send_unsolicited_na(struct bonding *bond)
-{
-	struct slave *slave = bond->curr_active_slave;
-	struct vlan_entry *vlan;
-	struct inet6_dev *idev;
-	int is_router;
-
-	pr_debug("%s: bond %s slave %s\n", bond->dev->name,
-		 __func__, slave ? slave->dev->name : "NULL");
-
-	if (!slave || !bond->send_unsol_na ||
-	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
-		return;
-
-	bond->send_unsol_na--;
-
-	idev = in6_dev_get(bond->dev);
-	if (!idev)
-		return;
-
-	is_router = !!idev->cnf.forwarding;
-
-	in6_dev_put(idev);
-
-	if (!ipv6_addr_any(&bond->master_ipv6))
-		bond_na_send(slave->dev, &bond->master_ipv6, is_router, 0);
-
-	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-		if (!ipv6_addr_any(&vlan->vlan_ipv6)) {
-			bond_na_send(slave->dev, &vlan->vlan_ipv6, is_router,
-				     vlan->vlan_id);
-		}
-	}
-}
-
-/*
- * bond_inet6addr_event: handle inet6addr notifier chain events.
- *
- * We keep track of device IPv6 addresses primarily to use as source
- * addresses in NS probes.
- *
- * We track one IPv6 for the main device (if it has one).
- */
-static int bond_inet6addr_event(struct notifier_block *this,
-				unsigned long event,
-				void *ptr)
-{
-	struct inet6_ifaddr *ifa = ptr;
-	struct net_device *vlan_dev, *event_dev = ifa->idev->dev;
-	struct bonding *bond;
-	struct vlan_entry *vlan;
-	struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id);
-
-	list_for_each_entry(bond, &bn->dev_list, bond_list) {
-		if (bond->dev == event_dev) {
-			switch (event) {
-			case NETDEV_UP:
-				if (ipv6_addr_any(&bond->master_ipv6))
-					ipv6_addr_copy(&bond->master_ipv6,
-						       &ifa->addr);
-				return NOTIFY_OK;
-			case NETDEV_DOWN:
-				if (ipv6_addr_equal(&bond->master_ipv6,
-						    &ifa->addr))
-					bond_glean_dev_ipv6(bond->dev,
-							    &bond->master_ipv6);
-				return NOTIFY_OK;
-			default:
-				return NOTIFY_DONE;
-			}
-		}
-
-		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-			if (!bond->vlgrp)
-				continue;
-			vlan_dev = vlan_group_get_device(bond->vlgrp,
-							 vlan->vlan_id);
-			if (vlan_dev == event_dev) {
-				switch (event) {
-				case NETDEV_UP:
-					if (ipv6_addr_any(&vlan->vlan_ipv6))
-						ipv6_addr_copy(&vlan->vlan_ipv6,
-							       &ifa->addr);
-					return NOTIFY_OK;
-				case NETDEV_DOWN:
-					if (ipv6_addr_equal(&vlan->vlan_ipv6,
-							    &ifa->addr))
-						bond_glean_dev_ipv6(vlan_dev,
-								    &vlan->vlan_ipv6);
-					return NOTIFY_OK;
-				default:
-					return NOTIFY_DONE;
-				}
-			}
-		}
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block bond_inet6addr_notifier = {
-	.notifier_call = bond_inet6addr_event,
-};
-
-void bond_register_ipv6_notifier(void)
-{
-	register_inet6addr_notifier(&bond_inet6addr_notifier);
-}
-
-void bond_unregister_ipv6_notifier(void)
-{
-	unregister_inet6addr_notifier(&bond_inet6addr_notifier);
-}
-
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b51e021..5cd4766 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -89,8 +89,6 @@
 
 static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
 static int tx_queues	= BOND_DEFAULT_TX_QUEUES;
-static int num_grat_arp = 1;
-static int num_unsol_na = 1;
 static int miimon	= BOND_LINK_MON_INTERV;
 static int updelay;
 static int downdelay;
@@ -113,10 +111,6 @@ module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
 module_param(tx_queues, int, 0);
 MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
-module_param(num_grat_arp, int, 0644);
-MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
-module_param(num_unsol_na, int, 0644);
-MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event");
 module_param(miimon, int, 0);
 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
 module_param(updelay, int, 0);
@@ -234,7 +228,6 @@ struct bond_parm_tbl ad_select_tbl[] = {
 
 /*-------------------------- Forward declarations ---------------------------*/
 
-static void bond_send_gratuitous_arp(struct bonding *bond);
 static int bond_init(struct net_device *bond_dev);
 static void bond_uninit(struct net_device *bond_dev);
 
@@ -1160,14 +1153,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 				bond_do_fail_over_mac(bond, new_active,
 						      old_active);
 
-			if (netif_running(bond->dev)) {
-				bond->send_grat_arp = bond->params.num_grat_arp;
-				bond_send_gratuitous_arp(bond);
-
-				bond->send_unsol_na = bond->params.num_unsol_na;
-				bond_send_unsolicited_na(bond);
-			}
-
 			write_unlock_bh(&bond->curr_slave_lock);
 			read_unlock(&bond->lock);
 
@@ -2578,18 +2563,6 @@ void bond_mii_monitor(struct work_struct *work)
 	if (bond->slave_cnt == 0)
 		goto re_arm;
 
-	if (bond->send_grat_arp) {
-		read_lock(&bond->curr_slave_lock);
-		bond_send_gratuitous_arp(bond);
-		read_unlock(&bond->curr_slave_lock);
-	}
-
-	if (bond->send_unsol_na) {
-		read_lock(&bond->curr_slave_lock);
-		bond_send_unsolicited_na(bond);
-		read_unlock(&bond->curr_slave_lock);
-	}
-
 	if (bond_miimon_inspect(bond)) {
 		read_unlock(&bond->lock);
 		rtnl_lock();
@@ -2751,44 +2724,6 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 	}
 }
 
-/*
- * Kick out a gratuitous ARP for an IP on the bonding master plus one
- * for each VLAN above us.
- *
- * Caller must hold curr_slave_lock for read or better
- */
-static void bond_send_gratuitous_arp(struct bonding *bond)
-{
-	struct slave *slave = bond->curr_active_slave;
-	struct vlan_entry *vlan;
-	struct net_device *vlan_dev;
-
-	pr_debug("bond_send_grat_arp: bond %s slave %s\n",
-		 bond->dev->name, slave ? slave->dev->name : "NULL");
-
-	if (!slave || !bond->send_grat_arp ||
-	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
-		return;
-
-	bond->send_grat_arp--;
-
-	if (bond->master_ip) {
-		bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
-				bond->master_ip, 0);
-	}
-
-	if (!bond->vlgrp)
-		return;
-
-	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
-		if (vlan->vlan_ip) {
-			bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
-				      vlan->vlan_ip, vlan->vlan_id);
-		}
-	}
-}
-
 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
 {
 	int i;
@@ -3255,18 +3190,6 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 	if (bond->slave_cnt == 0)
 		goto re_arm;
 
-	if (bond->send_grat_arp) {
-		read_lock(&bond->curr_slave_lock);
-		bond_send_gratuitous_arp(bond);
-		read_unlock(&bond->curr_slave_lock);
-	}
-
-	if (bond->send_unsol_na) {
-		read_lock(&bond->curr_slave_lock);
-		bond_send_unsolicited_na(bond);
-		read_unlock(&bond->curr_slave_lock);
-	}
-
 	if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
 		read_unlock(&bond->lock);
 		rtnl_lock();
@@ -3645,9 +3568,6 @@ static int bond_close(struct net_device *bond_dev)
 
 	write_lock_bh(&bond->lock);
 
-	bond->send_grat_arp = 0;
-	bond->send_unsol_na = 0;
-
 	/* signal timers not to re-arm */
 	bond->kill_timers = 1;
 
@@ -4724,18 +4644,6 @@ static int bond_check_params(struct bond_params *params)
 		use_carrier = 1;
 	}
 
-	if (num_grat_arp < 0 || num_grat_arp > 255) {
-		pr_warning("Warning: num_grat_arp (%d) not in range 0-255 so it was reset to 1\n",
-			   num_grat_arp);
-		num_grat_arp = 1;
-	}
-
-	if (num_unsol_na < 0 || num_unsol_na > 255) {
-		pr_warning("Warning: num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
-			   num_unsol_na);
-		num_unsol_na = 1;
-	}
-
 	/* reset values for 802.3ad */
 	if (bond_mode == BOND_MODE_8023AD) {
 		if (!miimon) {
@@ -4925,8 +4833,6 @@ static int bond_check_params(struct bond_params *params)
 	params->mode = bond_mode;
 	params->xmit_policy = xmit_hashtype;
 	params->miimon = miimon;
-	params->num_grat_arp = num_grat_arp;
-	params->num_unsol_na = num_unsol_na;
 	params->arp_interval = arp_interval;
 	params->arp_validate = arp_validate_value;
 	params->updelay = updelay;
@@ -5121,7 +5027,6 @@ static int __init bonding_init(void)
 
 	register_netdevice_notifier(&bond_netdev_notifier);
 	register_inetaddr_notifier(&bond_inetaddr_notifier);
-	bond_register_ipv6_notifier();
 out:
 	return res;
 err:
@@ -5136,7 +5041,6 @@ static void __exit bonding_exit(void)
 {
 	unregister_netdevice_notifier(&bond_netdev_notifier);
 	unregister_inetaddr_notifier(&bond_inetaddr_notifier);
-	bond_unregister_ipv6_notifier();
 
 	bond_destroy_sysfs();
 	bond_destroy_debugfs();
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index de87aea..259ff32 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -874,84 +874,6 @@ static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
 		   bonding_show_ad_select, bonding_store_ad_select);
 
 /*
- * Show and set the number of grat ARP to send after a failover event.
- */
-static ssize_t bonding_show_n_grat_arp(struct device *d,
-				   struct device_attribute *attr,
-				   char *buf)
-{
-	struct bonding *bond = to_bond(d);
-
-	return sprintf(buf, "%d\n", bond->params.num_grat_arp);
-}
-
-static ssize_t bonding_store_n_grat_arp(struct device *d,
-				    struct device_attribute *attr,
-				    const char *buf, size_t count)
-{
-	int new_value, ret = count;
-	struct bonding *bond = to_bond(d);
-
-	if (sscanf(buf, "%d", &new_value) != 1) {
-		pr_err("%s: no num_grat_arp value specified.\n",
-		       bond->dev->name);
-		ret = -EINVAL;
-		goto out;
-	}
-	if (new_value < 0 || new_value > 255) {
-		pr_err("%s: Invalid num_grat_arp value %d not in range 0-255; rejected.\n",
-		       bond->dev->name, new_value);
-		ret = -EINVAL;
-		goto out;
-	} else {
-		bond->params.num_grat_arp = new_value;
-	}
-out:
-	return ret;
-}
-static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
-		   bonding_show_n_grat_arp, bonding_store_n_grat_arp);
-
-/*
- * Show and set the number of unsolicited NA's to send after a failover event.
- */
-static ssize_t bonding_show_n_unsol_na(struct device *d,
-				       struct device_attribute *attr,
-				       char *buf)
-{
-	struct bonding *bond = to_bond(d);
-
-	return sprintf(buf, "%d\n", bond->params.num_unsol_na);
-}
-
-static ssize_t bonding_store_n_unsol_na(struct device *d,
-					struct device_attribute *attr,
-					const char *buf, size_t count)
-{
-	int new_value, ret = count;
-	struct bonding *bond = to_bond(d);
-
-	if (sscanf(buf, "%d", &new_value) != 1) {
-		pr_err("%s: no num_unsol_na value specified.\n",
-		       bond->dev->name);
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (new_value < 0 || new_value > 255) {
-		pr_err("%s: Invalid num_unsol_na value %d not in range 0-255; rejected.\n",
-		       bond->dev->name, new_value);
-		ret = -EINVAL;
-		goto out;
-	} else
-		bond->params.num_unsol_na = new_value;
-out:
-	return ret;
-}
-static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
-		   bonding_show_n_unsol_na, bonding_store_n_unsol_na);
-
-/*
  * Show and set the MII monitor interval.  There are two tricky bits
  * here.  First, if MII monitoring is activated, then we must disable
  * ARP monitoring.  Second, if the timer isn't running, we must
@@ -1650,8 +1572,6 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_lacp_rate.attr,
 	&dev_attr_ad_select.attr,
 	&dev_attr_xmit_hash_policy.attr,
-	&dev_attr_num_grat_arp.attr,
-	&dev_attr_num_unsol_na.attr,
 	&dev_attr_miimon.attr,
 	&dev_attr_primary.attr,
 	&dev_attr_primary_reselect.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 90736cb..77180b1 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -149,8 +149,6 @@ struct bond_params {
 	int mode;
 	int xmit_policy;
 	int miimon;
-	int num_grat_arp;
-	int num_unsol_na;
 	int arp_interval;
 	int arp_validate;
 	int use_carrier;
@@ -178,9 +176,6 @@ struct vlan_entry {
 	struct list_head vlan_list;
 	__be32 vlan_ip;
 	unsigned short vlan_id;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct in6_addr vlan_ipv6;
-#endif
 };
 
 struct slave {
@@ -234,8 +229,6 @@ struct bonding {
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;
 	s8       kill_timers;
-	s8	 send_grat_arp;
-	s8	 send_unsol_na;
 	s8	 setup_by_slave;
 	s8       igmp_retrans;
 #ifdef CONFIG_PROC_FS
@@ -260,9 +253,6 @@ struct bonding {
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
 	struct   delayed_work mcast_work;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct   in6_addr master_ipv6;
-#endif
 #ifdef CONFIG_DEBUG_FS
 	/* debugging suport via debugfs */
 	struct	 dentry *debug_dir;
@@ -459,23 +449,4 @@ extern const struct bond_parm_tbl fail_over_mac_tbl[];
 extern const struct bond_parm_tbl pri_reselect_tbl[];
 extern struct bond_parm_tbl ad_select_tbl[];
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-void bond_send_unsolicited_na(struct bonding *bond);
-void bond_register_ipv6_notifier(void);
-void bond_unregister_ipv6_notifier(void);
-#else
-static inline void bond_send_unsolicited_na(struct bonding *bond)
-{
-	return;
-}
-static inline void bond_register_ipv6_notifier(void)
-{
-	return;
-}
-static inline void bond_unregister_ipv6_notifier(void)
-{
-	return;
-}
-#endif
-
 #endif /* _LINUX_BONDING_H */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index b2ff70f..969e700 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -501,13 +501,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		return NOTIFY_BAD;
 
 	case NETDEV_NOTIFY_PEERS:
+	case NETDEV_BONDING_FAILOVER:
 		/* Propagate to vlan devices */
 		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
 
-			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vlandev);
+			call_netdevice_notifiers(event, vlandev);
 		}
 		break;
 	}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5345b0b..acf553f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1203,6 +1203,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 			break;
 		/* fall through */
 	case NETDEV_NOTIFY_PEERS:
+	case NETDEV_BONDING_FAILOVER:
 		/* Send gratuitous ARP to notify of link change */
 		inetdev_send_gratuitous_arp(dev, in_dev);
 		break;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a51fa74c..6f7d491 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1746,6 +1746,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 		fib6_run_gc(~0UL, net);
 		break;
 	case NETDEV_NOTIFY_PEERS:
+	case NETDEV_BONDING_FAILOVER:
 		ndisc_send_unsol_na(dev);
 		break;
 	default:
-- 
1.7.4


-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-15 23:47 [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS Ben Hutchings
@ 2011-04-16  0:30 ` Jay Vosburgh
  2011-04-16  1:51   ` Brian Haley
  2011-04-18 19:09   ` Ben Hutchings
  0 siblings, 2 replies; 10+ messages in thread
From: Jay Vosburgh @ 2011-04-16  0:30 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: David Miller, Andy Gospodarek, Patrick McHardy, netdev, Brian Haley

Ben Hutchings <bhutchings@solarflare.com> wrote:

>It is undesirable for the bonding driver to be poking into higher
>level protocols, and notifiers provide a way to avoid that.  This does
>mean removing the ability to configure reptitition of gratuitous ARPs
>and unsolicited NAs.

	In principle I think this is a good thing (getting rid of some
of those dependencies, duplicated code, etc).

	However, the removal of the multiple grat ARP and NAs may be an
issue for some users.  I don't know that we can just remove this (along
with its API) without going through the feature removal process.

	As I recall, the multiple gratuitous ARP stuff was added for
Infiniband, because it is dependent on the grat ARP for a smooth
failover.

	There is also currently logic to check the linkwatch link state
to wait for the link to go up prior to sending a grat ARP; this is also
for IB.

	Brian Haley added the unsolicited NAs; I've added him to the cc
so perhaps he (or somebody else) can comment on the necessity of keeping
the ability to send multiple NAs.

	-J

>Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
>---
> drivers/net/bonding/Makefile     |    3 -
> drivers/net/bonding/bond_ipv6.c  |  225 --------------------------------------
> drivers/net/bonding/bond_main.c  |   96 ----------------
> drivers/net/bonding/bond_sysfs.c |   80 --------------
> drivers/net/bonding/bonding.h    |   29 -----
> net/8021q/vlan.c                 |    3 +-
> net/ipv4/devinet.c               |    1 +
> net/ipv6/ndisc.c                 |    1 +
> 8 files changed, 4 insertions(+), 434 deletions(-)
> delete mode 100644 drivers/net/bonding/bond_ipv6.c
>
>diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
>index 3c5c014..4c21bf6 100644
>--- a/drivers/net/bonding/Makefile
>+++ b/drivers/net/bonding/Makefile
>@@ -9,6 +9,3 @@ bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o
> proc-$(CONFIG_PROC_FS) += bond_procfs.o
> bonding-objs += $(proc-y)
>
>-ipv6-$(subst m,y,$(CONFIG_IPV6)) += bond_ipv6.o
>-bonding-objs += $(ipv6-y)
>-
>diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
>deleted file mode 100644
>index 84fbd4e..0000000
>--- a/drivers/net/bonding/bond_ipv6.c
>+++ /dev/null
>@@ -1,225 +0,0 @@
>-/*
>- * Copyright(c) 2008 Hewlett-Packard Development Company, L.P.
>- *
>- * This program is free software; you can redistribute it and/or modify it
>- * under the terms of the GNU General Public License as published by the
>- * Free Software Foundation; either version 2 of the License, or
>- * (at your option) any later version.
>- *
>- * This program is distributed in the hope that it will be useful, but
>- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
>- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
>- * for more details.
>- *
>- * You should have received a copy of the GNU General Public License along
>- * with this program; if not, write to the Free Software Foundation, Inc.,
>- * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
>- *
>- * The full GNU General Public License is included in this distribution in the
>- * file called LICENSE.
>- *
>- */
>-
>-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>-
>-#include <linux/types.h>
>-#include <linux/if_vlan.h>
>-#include <net/ipv6.h>
>-#include <net/ndisc.h>
>-#include <net/addrconf.h>
>-#include <net/netns/generic.h>
>-#include "bonding.h"
>-
>-/*
>- * Assign bond->master_ipv6 to the next IPv6 address in the list, or
>- * zero it out if there are none.
>- */
>-static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr)
>-{
>-	struct inet6_dev *idev;
>-
>-	if (!dev)
>-		return;
>-
>-	idev = in6_dev_get(dev);
>-	if (!idev)
>-		return;
>-
>-	read_lock_bh(&idev->lock);
>-	if (!list_empty(&idev->addr_list)) {
>-		struct inet6_ifaddr *ifa
>-			= list_first_entry(&idev->addr_list,
>-					   struct inet6_ifaddr, if_list);
>-		ipv6_addr_copy(addr, &ifa->addr);
>-	} else
>-		ipv6_addr_set(addr, 0, 0, 0, 0);
>-
>-	read_unlock_bh(&idev->lock);
>-
>-	in6_dev_put(idev);
>-}
>-
>-static void bond_na_send(struct net_device *slave_dev,
>-			 struct in6_addr *daddr,
>-			 int router,
>-			 unsigned short vlan_id)
>-{
>-	struct in6_addr mcaddr;
>-	struct icmp6hdr icmp6h = {
>-		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
>-	};
>-	struct sk_buff *skb;
>-
>-	icmp6h.icmp6_router = router;
>-	icmp6h.icmp6_solicited = 0;
>-	icmp6h.icmp6_override = 1;
>-
>-	addrconf_addr_solict_mult(daddr, &mcaddr);
>-
>-	pr_debug("ipv6 na on slave %s: dest %pI6, src %pI6\n",
>-		 slave_dev->name, &mcaddr, daddr);
>-
>-	skb = ndisc_build_skb(slave_dev, &mcaddr, daddr, &icmp6h, daddr,
>-			      ND_OPT_TARGET_LL_ADDR);
>-
>-	if (!skb) {
>-		pr_err("NA packet allocation failed\n");
>-		return;
>-	}
>-
>-	if (vlan_id) {
>-		/* The Ethernet header is not present yet, so it is
>-		 * too early to insert a VLAN tag.  Force use of an
>-		 * out-of-line tag here and let dev_hard_start_xmit()
>-		 * insert it if the slave hardware can't.
>-		 */
>-		skb = __vlan_hwaccel_put_tag(skb, vlan_id);
>-		if (!skb) {
>-			pr_err("failed to insert VLAN tag\n");
>-			return;
>-		}
>-	}
>-
>-	ndisc_send_skb(skb, slave_dev, NULL, &mcaddr, daddr, &icmp6h);
>-}
>-
>-/*
>- * Kick out an unsolicited Neighbor Advertisement for an IPv6 address on
>- * the bonding master.  This will help the switch learn our address
>- * if in active-backup mode.
>- *
>- * Caller must hold curr_slave_lock for read or better
>- */
>-void bond_send_unsolicited_na(struct bonding *bond)
>-{
>-	struct slave *slave = bond->curr_active_slave;
>-	struct vlan_entry *vlan;
>-	struct inet6_dev *idev;
>-	int is_router;
>-
>-	pr_debug("%s: bond %s slave %s\n", bond->dev->name,
>-		 __func__, slave ? slave->dev->name : "NULL");
>-
>-	if (!slave || !bond->send_unsol_na ||
>-	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
>-		return;
>-
>-	bond->send_unsol_na--;
>-
>-	idev = in6_dev_get(bond->dev);
>-	if (!idev)
>-		return;
>-
>-	is_router = !!idev->cnf.forwarding;
>-
>-	in6_dev_put(idev);
>-
>-	if (!ipv6_addr_any(&bond->master_ipv6))
>-		bond_na_send(slave->dev, &bond->master_ipv6, is_router, 0);
>-
>-	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
>-		if (!ipv6_addr_any(&vlan->vlan_ipv6)) {
>-			bond_na_send(slave->dev, &vlan->vlan_ipv6, is_router,
>-				     vlan->vlan_id);
>-		}
>-	}
>-}
>-
>-/*
>- * bond_inet6addr_event: handle inet6addr notifier chain events.
>- *
>- * We keep track of device IPv6 addresses primarily to use as source
>- * addresses in NS probes.
>- *
>- * We track one IPv6 for the main device (if it has one).
>- */
>-static int bond_inet6addr_event(struct notifier_block *this,
>-				unsigned long event,
>-				void *ptr)
>-{
>-	struct inet6_ifaddr *ifa = ptr;
>-	struct net_device *vlan_dev, *event_dev = ifa->idev->dev;
>-	struct bonding *bond;
>-	struct vlan_entry *vlan;
>-	struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id);
>-
>-	list_for_each_entry(bond, &bn->dev_list, bond_list) {
>-		if (bond->dev == event_dev) {
>-			switch (event) {
>-			case NETDEV_UP:
>-				if (ipv6_addr_any(&bond->master_ipv6))
>-					ipv6_addr_copy(&bond->master_ipv6,
>-						       &ifa->addr);
>-				return NOTIFY_OK;
>-			case NETDEV_DOWN:
>-				if (ipv6_addr_equal(&bond->master_ipv6,
>-						    &ifa->addr))
>-					bond_glean_dev_ipv6(bond->dev,
>-							    &bond->master_ipv6);
>-				return NOTIFY_OK;
>-			default:
>-				return NOTIFY_DONE;
>-			}
>-		}
>-
>-		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
>-			if (!bond->vlgrp)
>-				continue;
>-			vlan_dev = vlan_group_get_device(bond->vlgrp,
>-							 vlan->vlan_id);
>-			if (vlan_dev == event_dev) {
>-				switch (event) {
>-				case NETDEV_UP:
>-					if (ipv6_addr_any(&vlan->vlan_ipv6))
>-						ipv6_addr_copy(&vlan->vlan_ipv6,
>-							       &ifa->addr);
>-					return NOTIFY_OK;
>-				case NETDEV_DOWN:
>-					if (ipv6_addr_equal(&vlan->vlan_ipv6,
>-							    &ifa->addr))
>-						bond_glean_dev_ipv6(vlan_dev,
>-								    &vlan->vlan_ipv6);
>-					return NOTIFY_OK;
>-				default:
>-					return NOTIFY_DONE;
>-				}
>-			}
>-		}
>-	}
>-	return NOTIFY_DONE;
>-}
>-
>-static struct notifier_block bond_inet6addr_notifier = {
>-	.notifier_call = bond_inet6addr_event,
>-};
>-
>-void bond_register_ipv6_notifier(void)
>-{
>-	register_inet6addr_notifier(&bond_inet6addr_notifier);
>-}
>-
>-void bond_unregister_ipv6_notifier(void)
>-{
>-	unregister_inet6addr_notifier(&bond_inet6addr_notifier);
>-}
>-
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index b51e021..5cd4766 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -89,8 +89,6 @@
>
> static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
> static int tx_queues	= BOND_DEFAULT_TX_QUEUES;
>-static int num_grat_arp = 1;
>-static int num_unsol_na = 1;
> static int miimon	= BOND_LINK_MON_INTERV;
> static int updelay;
> static int downdelay;
>@@ -113,10 +111,6 @@ module_param(max_bonds, int, 0);
> MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
> module_param(tx_queues, int, 0);
> MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
>-module_param(num_grat_arp, int, 0644);
>-MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
>-module_param(num_unsol_na, int, 0644);
>-MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event");
> module_param(miimon, int, 0);
> MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
> module_param(updelay, int, 0);
>@@ -234,7 +228,6 @@ struct bond_parm_tbl ad_select_tbl[] = {
>
> /*-------------------------- Forward declarations ---------------------------*/
>
>-static void bond_send_gratuitous_arp(struct bonding *bond);
> static int bond_init(struct net_device *bond_dev);
> static void bond_uninit(struct net_device *bond_dev);
>
>@@ -1160,14 +1153,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> 				bond_do_fail_over_mac(bond, new_active,
> 						      old_active);
>
>-			if (netif_running(bond->dev)) {
>-				bond->send_grat_arp = bond->params.num_grat_arp;
>-				bond_send_gratuitous_arp(bond);
>-
>-				bond->send_unsol_na = bond->params.num_unsol_na;
>-				bond_send_unsolicited_na(bond);
>-			}
>-
> 			write_unlock_bh(&bond->curr_slave_lock);
> 			read_unlock(&bond->lock);
>
>@@ -2578,18 +2563,6 @@ void bond_mii_monitor(struct work_struct *work)
> 	if (bond->slave_cnt == 0)
> 		goto re_arm;
>
>-	if (bond->send_grat_arp) {
>-		read_lock(&bond->curr_slave_lock);
>-		bond_send_gratuitous_arp(bond);
>-		read_unlock(&bond->curr_slave_lock);
>-	}
>-
>-	if (bond->send_unsol_na) {
>-		read_lock(&bond->curr_slave_lock);
>-		bond_send_unsolicited_na(bond);
>-		read_unlock(&bond->curr_slave_lock);
>-	}
>-
> 	if (bond_miimon_inspect(bond)) {
> 		read_unlock(&bond->lock);
> 		rtnl_lock();
>@@ -2751,44 +2724,6 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
> 	}
> }
>
>-/*
>- * Kick out a gratuitous ARP for an IP on the bonding master plus one
>- * for each VLAN above us.
>- *
>- * Caller must hold curr_slave_lock for read or better
>- */
>-static void bond_send_gratuitous_arp(struct bonding *bond)
>-{
>-	struct slave *slave = bond->curr_active_slave;
>-	struct vlan_entry *vlan;
>-	struct net_device *vlan_dev;
>-
>-	pr_debug("bond_send_grat_arp: bond %s slave %s\n",
>-		 bond->dev->name, slave ? slave->dev->name : "NULL");
>-
>-	if (!slave || !bond->send_grat_arp ||
>-	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
>-		return;
>-
>-	bond->send_grat_arp--;
>-
>-	if (bond->master_ip) {
>-		bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
>-				bond->master_ip, 0);
>-	}
>-
>-	if (!bond->vlgrp)
>-		return;
>-
>-	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
>-		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
>-		if (vlan->vlan_ip) {
>-			bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
>-				      vlan->vlan_ip, vlan->vlan_id);
>-		}
>-	}
>-}
>-
> static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
> {
> 	int i;
>@@ -3255,18 +3190,6 @@ void bond_activebackup_arp_mon(struct work_struct *work)
> 	if (bond->slave_cnt == 0)
> 		goto re_arm;
>
>-	if (bond->send_grat_arp) {
>-		read_lock(&bond->curr_slave_lock);
>-		bond_send_gratuitous_arp(bond);
>-		read_unlock(&bond->curr_slave_lock);
>-	}
>-
>-	if (bond->send_unsol_na) {
>-		read_lock(&bond->curr_slave_lock);
>-		bond_send_unsolicited_na(bond);
>-		read_unlock(&bond->curr_slave_lock);
>-	}
>-
> 	if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
> 		read_unlock(&bond->lock);
> 		rtnl_lock();
>@@ -3645,9 +3568,6 @@ static int bond_close(struct net_device *bond_dev)
>
> 	write_lock_bh(&bond->lock);
>
>-	bond->send_grat_arp = 0;
>-	bond->send_unsol_na = 0;
>-
> 	/* signal timers not to re-arm */
> 	bond->kill_timers = 1;
>
>@@ -4724,18 +4644,6 @@ static int bond_check_params(struct bond_params *params)
> 		use_carrier = 1;
> 	}
>
>-	if (num_grat_arp < 0 || num_grat_arp > 255) {
>-		pr_warning("Warning: num_grat_arp (%d) not in range 0-255 so it was reset to 1\n",
>-			   num_grat_arp);
>-		num_grat_arp = 1;
>-	}
>-
>-	if (num_unsol_na < 0 || num_unsol_na > 255) {
>-		pr_warning("Warning: num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
>-			   num_unsol_na);
>-		num_unsol_na = 1;
>-	}
>-
> 	/* reset values for 802.3ad */
> 	if (bond_mode == BOND_MODE_8023AD) {
> 		if (!miimon) {
>@@ -4925,8 +4833,6 @@ static int bond_check_params(struct bond_params *params)
> 	params->mode = bond_mode;
> 	params->xmit_policy = xmit_hashtype;
> 	params->miimon = miimon;
>-	params->num_grat_arp = num_grat_arp;
>-	params->num_unsol_na = num_unsol_na;
> 	params->arp_interval = arp_interval;
> 	params->arp_validate = arp_validate_value;
> 	params->updelay = updelay;
>@@ -5121,7 +5027,6 @@ static int __init bonding_init(void)
>
> 	register_netdevice_notifier(&bond_netdev_notifier);
> 	register_inetaddr_notifier(&bond_inetaddr_notifier);
>-	bond_register_ipv6_notifier();
> out:
> 	return res;
> err:
>@@ -5136,7 +5041,6 @@ static void __exit bonding_exit(void)
> {
> 	unregister_netdevice_notifier(&bond_netdev_notifier);
> 	unregister_inetaddr_notifier(&bond_inetaddr_notifier);
>-	bond_unregister_ipv6_notifier();
>
> 	bond_destroy_sysfs();
> 	bond_destroy_debugfs();
>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>index de87aea..259ff32 100644
>--- a/drivers/net/bonding/bond_sysfs.c
>+++ b/drivers/net/bonding/bond_sysfs.c
>@@ -874,84 +874,6 @@ static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
> 		   bonding_show_ad_select, bonding_store_ad_select);
>
> /*
>- * Show and set the number of grat ARP to send after a failover event.
>- */
>-static ssize_t bonding_show_n_grat_arp(struct device *d,
>-				   struct device_attribute *attr,
>-				   char *buf)
>-{
>-	struct bonding *bond = to_bond(d);
>-
>-	return sprintf(buf, "%d\n", bond->params.num_grat_arp);
>-}
>-
>-static ssize_t bonding_store_n_grat_arp(struct device *d,
>-				    struct device_attribute *attr,
>-				    const char *buf, size_t count)
>-{
>-	int new_value, ret = count;
>-	struct bonding *bond = to_bond(d);
>-
>-	if (sscanf(buf, "%d", &new_value) != 1) {
>-		pr_err("%s: no num_grat_arp value specified.\n",
>-		       bond->dev->name);
>-		ret = -EINVAL;
>-		goto out;
>-	}
>-	if (new_value < 0 || new_value > 255) {
>-		pr_err("%s: Invalid num_grat_arp value %d not in range 0-255; rejected.\n",
>-		       bond->dev->name, new_value);
>-		ret = -EINVAL;
>-		goto out;
>-	} else {
>-		bond->params.num_grat_arp = new_value;
>-	}
>-out:
>-	return ret;
>-}
>-static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
>-		   bonding_show_n_grat_arp, bonding_store_n_grat_arp);
>-
>-/*
>- * Show and set the number of unsolicited NA's to send after a failover event.
>- */
>-static ssize_t bonding_show_n_unsol_na(struct device *d,
>-				       struct device_attribute *attr,
>-				       char *buf)
>-{
>-	struct bonding *bond = to_bond(d);
>-
>-	return sprintf(buf, "%d\n", bond->params.num_unsol_na);
>-}
>-
>-static ssize_t bonding_store_n_unsol_na(struct device *d,
>-					struct device_attribute *attr,
>-					const char *buf, size_t count)
>-{
>-	int new_value, ret = count;
>-	struct bonding *bond = to_bond(d);
>-
>-	if (sscanf(buf, "%d", &new_value) != 1) {
>-		pr_err("%s: no num_unsol_na value specified.\n",
>-		       bond->dev->name);
>-		ret = -EINVAL;
>-		goto out;
>-	}
>-
>-	if (new_value < 0 || new_value > 255) {
>-		pr_err("%s: Invalid num_unsol_na value %d not in range 0-255; rejected.\n",
>-		       bond->dev->name, new_value);
>-		ret = -EINVAL;
>-		goto out;
>-	} else
>-		bond->params.num_unsol_na = new_value;
>-out:
>-	return ret;
>-}
>-static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
>-		   bonding_show_n_unsol_na, bonding_store_n_unsol_na);
>-
>-/*
>  * Show and set the MII monitor interval.  There are two tricky bits
>  * here.  First, if MII monitoring is activated, then we must disable
>  * ARP monitoring.  Second, if the timer isn't running, we must
>@@ -1650,8 +1572,6 @@ static struct attribute *per_bond_attrs[] = {
> 	&dev_attr_lacp_rate.attr,
> 	&dev_attr_ad_select.attr,
> 	&dev_attr_xmit_hash_policy.attr,
>-	&dev_attr_num_grat_arp.attr,
>-	&dev_attr_num_unsol_na.attr,
> 	&dev_attr_miimon.attr,
> 	&dev_attr_primary.attr,
> 	&dev_attr_primary_reselect.attr,
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 90736cb..77180b1 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -149,8 +149,6 @@ struct bond_params {
> 	int mode;
> 	int xmit_policy;
> 	int miimon;
>-	int num_grat_arp;
>-	int num_unsol_na;
> 	int arp_interval;
> 	int arp_validate;
> 	int use_carrier;
>@@ -178,9 +176,6 @@ struct vlan_entry {
> 	struct list_head vlan_list;
> 	__be32 vlan_ip;
> 	unsigned short vlan_id;
>-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>-	struct in6_addr vlan_ipv6;
>-#endif
> };
>
> struct slave {
>@@ -234,8 +229,6 @@ struct bonding {
> 	rwlock_t lock;
> 	rwlock_t curr_slave_lock;
> 	s8       kill_timers;
>-	s8	 send_grat_arp;
>-	s8	 send_unsol_na;
> 	s8	 setup_by_slave;
> 	s8       igmp_retrans;
> #ifdef CONFIG_PROC_FS
>@@ -260,9 +253,6 @@ struct bonding {
> 	struct   delayed_work alb_work;
> 	struct   delayed_work ad_work;
> 	struct   delayed_work mcast_work;
>-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>-	struct   in6_addr master_ipv6;
>-#endif
> #ifdef CONFIG_DEBUG_FS
> 	/* debugging suport via debugfs */
> 	struct	 dentry *debug_dir;
>@@ -459,23 +449,4 @@ extern const struct bond_parm_tbl fail_over_mac_tbl[];
> extern const struct bond_parm_tbl pri_reselect_tbl[];
> extern struct bond_parm_tbl ad_select_tbl[];
>
>-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>-void bond_send_unsolicited_na(struct bonding *bond);
>-void bond_register_ipv6_notifier(void);
>-void bond_unregister_ipv6_notifier(void);
>-#else
>-static inline void bond_send_unsolicited_na(struct bonding *bond)
>-{
>-	return;
>-}
>-static inline void bond_register_ipv6_notifier(void)
>-{
>-	return;
>-}
>-static inline void bond_unregister_ipv6_notifier(void)
>-{
>-	return;
>-}
>-#endif
>-
> #endif /* _LINUX_BONDING_H */
>diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
>index b2ff70f..969e700 100644
>--- a/net/8021q/vlan.c
>+++ b/net/8021q/vlan.c
>@@ -501,13 +501,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
> 		return NOTIFY_BAD;
>
> 	case NETDEV_NOTIFY_PEERS:
>+	case NETDEV_BONDING_FAILOVER:
> 		/* Propagate to vlan devices */
> 		for (i = 0; i < VLAN_N_VID; i++) {
> 			vlandev = vlan_group_get_device(grp, i);
> 			if (!vlandev)
> 				continue;
>
>-			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vlandev);
>+			call_netdevice_notifiers(event, vlandev);
> 		}
> 		break;
> 	}
>diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
>index 5345b0b..acf553f 100644
>--- a/net/ipv4/devinet.c
>+++ b/net/ipv4/devinet.c
>@@ -1203,6 +1203,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
> 			break;
> 		/* fall through */
> 	case NETDEV_NOTIFY_PEERS:
>+	case NETDEV_BONDING_FAILOVER:
> 		/* Send gratuitous ARP to notify of link change */
> 		inetdev_send_gratuitous_arp(dev, in_dev);
> 		break;
>diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
>index a51fa74c..6f7d491 100644
>--- a/net/ipv6/ndisc.c
>+++ b/net/ipv6/ndisc.c
>@@ -1746,6 +1746,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
> 		fib6_run_gc(~0UL, net);
> 		break;
> 	case NETDEV_NOTIFY_PEERS:
>+	case NETDEV_BONDING_FAILOVER:
> 		ndisc_send_unsol_na(dev);
> 		break;
> 	default:
>-- 
>1.7.4
>
>
>-- 
>Ben Hutchings, Senior Software Engineer, Solarflare
>Not speaking for my employer; that's the marketing department's job.
>They asked us to note that Solarflare product names are trademarked.
>

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-16  0:30 ` Jay Vosburgh
@ 2011-04-16  1:51   ` Brian Haley
  2011-04-16  2:53     ` Ben Hutchings
  2011-04-18 19:09   ` Ben Hutchings
  1 sibling, 1 reply; 10+ messages in thread
From: Brian Haley @ 2011-04-16  1:51 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Ben Hutchings, David Miller, Andy Gospodarek, Patrick McHardy, netdev

On 04/15/2011 08:30 PM, Jay Vosburgh wrote:
> Ben Hutchings <bhutchings@solarflare.com> wrote:
> 
>> It is undesirable for the bonding driver to be poking into higher
>> level protocols, and notifiers provide a way to avoid that.  This does
>> mean removing the ability to configure reptitition of gratuitous ARPs
>> and unsolicited NAs.
> 
> 	In principle I think this is a good thing (getting rid of some
> of those dependencies, duplicated code, etc).
> 
> 	However, the removal of the multiple grat ARP and NAs may be an
> issue for some users.  I don't know that we can just remove this (along
> with its API) without going through the feature removal process.

Right, I don't know how many people are using these, they might not be
happy, especially since specifying an unknown parameter will cause a
module load to fail:

--> modprobe bonding foobar=27
FATAL: Error inserting bonding (/lib/modules/2.6.32-31-generic/kernel/drivers/net/bonding/bonding.ko): Unknown symbol in module, or unknown parameter (see dmesg)

When these params are stuffed in /etc/modprobe.d/options, a reboot to
a kernel without them will cause some swearing :)

BTW, if this is accepted you need to update the documentation as well.

> 	As I recall, the multiple gratuitous ARP stuff was added for
> Infiniband, because it is dependent on the grat ARP for a smooth
> failover.
> 
> 	There is also currently logic to check the linkwatch link state
> to wait for the link to go up prior to sending a grat ARP; this is also
> for IB.
> 
> 	Brian Haley added the unsolicited NAs; I've added him to the cc
> so perhaps he (or somebody else) can comment on the necessity of keeping
> the ability to send multiple NAs.

I added it because in an IPv6-only environment I was seeing really long
failover times on bonds.  I believe this was a customer-reported issue, so
there *might* be someone setting it, but I think my testing always showed
one was enough to wake-up the switch.

Is it useful to call netdev_bonding_change() multiple times from within
bond_change_active_slave(), like MAX(arp, na) times?

One comment below...

>> -/*
>> - * Kick out a gratuitous ARP for an IP on the bonding master plus one
>> - * for each VLAN above us.
>> - *
>> - * Caller must hold curr_slave_lock for read or better
>> - */
>> -static void bond_send_gratuitous_arp(struct bonding *bond)
>> -{
>> -	struct slave *slave = bond->curr_active_slave;
>> -	struct vlan_entry *vlan;
>> -	struct net_device *vlan_dev;
>> -
>> -	pr_debug("bond_send_grat_arp: bond %s slave %s\n",
>> -		 bond->dev->name, slave ? slave->dev->name : "NULL");
>> -
>> -	if (!slave || !bond->send_grat_arp ||
>> -	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
>> -		return;
>> -
>> -	bond->send_grat_arp--;
>> -
>> -	if (bond->master_ip) {
>> -		bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
>> -				bond->master_ip, 0);
>> -	}
>> -
>> -	if (!bond->vlgrp)
>> -		return;
>> -
>> -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
>> -		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
>> -		if (vlan->vlan_ip) {
>> -			bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
>> -				      vlan->vlan_ip, vlan->vlan_id);
>> -		}
>> -	}
>> -}

Does your change also cover this case with multiple VLAN IDs?  Is that covered in
the vlan.c code below?

>> diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
>> index b2ff70f..969e700 100644
>> --- a/net/8021q/vlan.c
>> +++ b/net/8021q/vlan.c
>> @@ -501,13 +501,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
>> 		return NOTIFY_BAD;
>>
>> 	case NETDEV_NOTIFY_PEERS:
>> +	case NETDEV_BONDING_FAILOVER:
>> 		/* Propagate to vlan devices */
>> 		for (i = 0; i < VLAN_N_VID; i++) {
>> 			vlandev = vlan_group_get_device(grp, i);
>> 			if (!vlandev)
>> 				continue;
>>
>> -			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vlandev);
>> +			call_netdevice_notifiers(event, vlandev);
>> 		}
>> 		break;
>> 	}

Thanks,

-Brian

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-16  1:51   ` Brian Haley
@ 2011-04-16  2:53     ` Ben Hutchings
  0 siblings, 0 replies; 10+ messages in thread
From: Ben Hutchings @ 2011-04-16  2:53 UTC (permalink / raw)
  To: Brian Haley
  Cc: Jay Vosburgh, David Miller, Andy Gospodarek, Patrick McHardy, netdev

On Fri, 2011-04-15 at 21:51 -0400, Brian Haley wrote:
> On 04/15/2011 08:30 PM, Jay Vosburgh wrote:
> > Ben Hutchings <bhutchings@solarflare.com> wrote:
> > 
> >> It is undesirable for the bonding driver to be poking into higher
> >> level protocols, and notifiers provide a way to avoid that.  This does
> >> mean removing the ability to configure reptitition of gratuitous ARPs
> >> and unsolicited NAs.
> > 
> > 	In principle I think this is a good thing (getting rid of some
> > of those dependencies, duplicated code, etc).
> > 
> > 	However, the removal of the multiple grat ARP and NAs may be an
> > issue for some users.  I don't know that we can just remove this (along
> > with its API) without going through the feature removal process.
> 
> Right, I don't know how many people are using these, they might not be
> happy, especially since specifying an unknown parameter will cause a
> module load to fail:
> 
> --> modprobe bonding foobar=27
> FATAL: Error inserting bonding (/lib/modules/2.6.32-31-generic/kernel/drivers/net/bonding/bonding.ko): Unknown symbol in module, or unknown parameter (see dmesg)
> 
> When these params are stuffed in /etc/modprobe.d/options, a reboot to
> a kernel without them will cause some swearing :)

Yes, this is a problem.

If the feature of repeated advertismenets is implemented in ipv4/ipv6
then we could propagate the parameters there, logging a warning, up to
some deprecation deadline.

> BTW, if this is accepted you need to update the documentation as well.
> 
> > 	As I recall, the multiple gratuitous ARP stuff was added for
> > Infiniband, because it is dependent on the grat ARP for a smooth
> > failover.
> > 
> > 	There is also currently logic to check the linkwatch link state
> > to wait for the link to go up prior to sending a grat ARP; this is also
> > for IB.
> > 
> > 	Brian Haley added the unsolicited NAs; I've added him to the cc
> > so perhaps he (or somebody else) can comment on the necessity of keeping
> > the ability to send multiple NAs.
> 
> I added it because in an IPv6-only environment I was seeing really long
> failover times on bonds.  I believe this was a customer-reported issue, so
> there *might* be someone setting it, but I think my testing always showed
> one was enough to wake-up the switch.
> 
> Is it useful to call netdev_bonding_change() multiple times from within
> bond_change_active_slave(), like MAX(arp, na) times?

We can't wait around to do that.  And it would be abusing the notifier
based on assumptions about what other components do, which I'm trying to
get away from.

> One comment below...
[...]
> Does your change also cover this case with multiple VLAN IDs?  Is that covered in
> the vlan.c code below?
[...]

Should do.  I didn't specifically test multiple VLAN IDs but I'm looping
over them.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-16  0:30 ` Jay Vosburgh
  2011-04-16  1:51   ` Brian Haley
@ 2011-04-18 19:09   ` Ben Hutchings
  2011-04-19  1:32     ` Brian Haley
  2011-04-19 19:12     ` Jay Vosburgh
  1 sibling, 2 replies; 10+ messages in thread
From: Ben Hutchings @ 2011-04-18 19:09 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: David Miller, Andy Gospodarek, Patrick McHardy, netdev, Brian Haley

On Fri, 2011-04-15 at 17:30 -0700, Jay Vosburgh wrote:
> Ben Hutchings <bhutchings@solarflare.com> wrote:
> 
> >It is undesirable for the bonding driver to be poking into higher
> >level protocols, and notifiers provide a way to avoid that.  This does
> >mean removing the ability to configure reptitition of gratuitous ARPs
> >and unsolicited NAs.
> 
> 	In principle I think this is a good thing (getting rid of some
> of those dependencies, duplicated code, etc).
> 
> 	However, the removal of the multiple grat ARP and NAs may be an
> issue for some users.  I don't know that we can just remove this (along
> with its API) without going through the feature removal process.

Right.

> 	As I recall, the multiple gratuitous ARP stuff was added for
> Infiniband, because it is dependent on the grat ARP for a smooth
> failover.
> 
> 	There is also currently logic to check the linkwatch link state
> to wait for the link to go up prior to sending a grat ARP; this is also
> for IB.

Why would we activate a slave without link up?  Perhaps if the previous
active slave is removed?

> 	Brian Haley added the unsolicited NAs; I've added him to the cc
> so perhaps he (or somebody else) can comment on the necessity of keeping
> the ability to send multiple NAs.
[...]

How about restoring the parameters like this:

---
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 18 Apr 2011 19:36:48 +0100
Subject: [PATCH net-next-2.6] ipv4,ipv6,bonding: Restore control over number of peer notifications

For backward compatibility, we should retain the module parameters and
sysfs attributes to control the number of peer notifications
(gratuitous ARPs and unsolicited NAs) sent after bonding failover.
Also, it is possible for failover to take place even though the new
active slave does not have link up, and in that case the peer
notification should be deferred until it does.

Change ipv4 and ipv6 so they do not automatically send peer
notifications on bonding failover.  Change the bonding driver to send
separate NETDEV_NOTIFY_PEERS notifications when the link is up, as
many times as requested.  Since it does not directly control which
protocols send notifications, make num_grat_arp and num_unsol_na
aliases for a single parameter.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 Documentation/networking/bonding.txt |   31 ++++++++----------
 drivers/net/bonding/bond_main.c      |   59 ++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_sysfs.c     |   26 +++++++++++++++
 drivers/net/bonding/bonding.h        |    2 +
 net/ipv4/devinet.c                   |    1 -
 net/ipv6/ndisc.c                     |    1 -
 6 files changed, 101 insertions(+), 19 deletions(-)

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index e27202b..511b4e5 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -1,7 +1,7 @@
 
 		Linux Ethernet Bonding Driver HOWTO
 
-		Latest update: 23 September 2009
+		Latest update: 18 April 2011
 
 Initial release : Thomas Davis <tadavis at lbl.gov>
 Corrections, HA extensions : 2000/10/03-15 :
@@ -585,25 +585,22 @@ mode
 		chosen.
 
 num_grat_arp
-
-	Specifies the number of gratuitous ARPs to be issued after a
-	failover event.  One gratuitous ARP is issued immediately after
-	the failover, subsequent ARPs are sent at a rate of one per link
-	monitor interval (arp_interval or miimon, whichever is active).
-
-	The valid range is 0 - 255; the default value is 1.  This option
-	affects only the active-backup mode.  This option was added for
-	bonding version 3.3.0.
-
 num_unsol_na
 
-	Specifies the number of unsolicited IPv6 Neighbor Advertisements
-	to be issued after a failover event.  One unsolicited NA is issued
-	immediately after the failover.
+	Specify the number of peer notifications (gratuitous ARPs and
+	unsolicited IPv6 Neighbor Advertisements) to be issued after a
+	failover event.  As soon as the link is up on the new slave
+	(possibly immediately) a peer notification is sent on the
+	bonding device and each VLAN sub-device.  This is repeated at
+	each link monitor interval (arp_interval or miimon, whichever
+	is active) if the number is greater than 1.
+
+	These notifications are now generated by the ipv4 and ipv6 code
+	and the numbers of repetitions cannot be set independently.
 
-	The valid range is 0 - 255; the default value is 1.  This option
-	affects only the active-backup mode.  This option was added for
-	bonding version 3.4.0.
+	The valid range is 0 - 255; the default value is 1.  These options
+	affect only the active-backup mode.  These options were added for
+	bonding versions 3.3.0 and 3.4.0 respectively.
 
 primary
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5cd4766..c9043db 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -89,6 +89,7 @@
 
 static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
 static int tx_queues	= BOND_DEFAULT_TX_QUEUES;
+static int num_peer_notif = 1;
 static int miimon	= BOND_LINK_MON_INTERV;
 static int updelay;
 static int downdelay;
@@ -111,6 +112,10 @@ module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
 module_param(tx_queues, int, 0);
 MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
+module_param_named(num_grat_arp, num_peer_notif, int, 0644);
+MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on failover event");
+module_param_named(num_unsol_na, num_peer_notif, int, 0644);
+MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on failover event");
 module_param(miimon, int, 0);
 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
 module_param(updelay, int, 0);
@@ -1080,6 +1085,21 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
 	return bestslave;
 }
 
+static bool bond_should_notify_peers(struct bonding *bond)
+{
+	struct slave *slave = bond->curr_active_slave;
+
+	pr_debug("bond_should_notify_peers: bond %s slave %s\n",
+		 bond->dev->name, slave ? slave->dev->name : "NULL");
+
+	if (!slave || !bond->send_peer_notif ||
+	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
+		return false;
+
+	bond->send_peer_notif--;
+	return true;
+}
+
 /**
  * change_active_interface - change the active slave into the specified one
  * @bond: our bonding struct
@@ -1147,16 +1167,28 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 			bond_set_slave_inactive_flags(old_active);
 
 		if (new_active) {
+			bool should_notify_peers = false;
+
 			bond_set_slave_active_flags(new_active);
 
 			if (bond->params.fail_over_mac)
 				bond_do_fail_over_mac(bond, new_active,
 						      old_active);
 
+			if (netif_running(bond->dev)) {
+				bond->send_peer_notif =
+					bond->params.num_peer_notif;
+				should_notify_peers =
+					bond_should_notify_peers(bond);
+			}
+
 			write_unlock_bh(&bond->curr_slave_lock);
 			read_unlock(&bond->lock);
 
 			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
+			if (should_notify_peers)
+				netdev_bonding_change(bond->dev,
+						      NETDEV_NOTIFY_PEERS);
 
 			read_lock(&bond->lock);
 			write_lock_bh(&bond->curr_slave_lock);
@@ -2555,6 +2587,7 @@ void bond_mii_monitor(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
 					    mii_work.work);
+	bool should_notify_peers = false;
 
 	read_lock(&bond->lock);
 	if (bond->kill_timers)
@@ -2563,6 +2596,8 @@ void bond_mii_monitor(struct work_struct *work)
 	if (bond->slave_cnt == 0)
 		goto re_arm;
 
+	should_notify_peers = bond_should_notify_peers(bond);
+
 	if (bond_miimon_inspect(bond)) {
 		read_unlock(&bond->lock);
 		rtnl_lock();
@@ -2581,6 +2616,12 @@ re_arm:
 				   msecs_to_jiffies(bond->params.miimon));
 out:
 	read_unlock(&bond->lock);
+
+	if (should_notify_peers) {
+		rtnl_lock();
+		netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
+		rtnl_unlock();
+	}
 }
 
 static __be32 bond_glean_dev_ip(struct net_device *dev)
@@ -3178,6 +3219,7 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
 					    arp_work.work);
+	bool should_notify_peers = false;
 	int delta_in_ticks;
 
 	read_lock(&bond->lock);
@@ -3190,6 +3232,8 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 	if (bond->slave_cnt == 0)
 		goto re_arm;
 
+	should_notify_peers = bond_should_notify_peers(bond);
+
 	if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
 		read_unlock(&bond->lock);
 		rtnl_lock();
@@ -3209,6 +3253,12 @@ re_arm:
 		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
+
+	if (should_notify_peers) {
+		rtnl_lock();
+		netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
+		rtnl_unlock();
+	}
 }
 
 /*-------------------------- netdev event handling --------------------------*/
@@ -3568,6 +3618,8 @@ static int bond_close(struct net_device *bond_dev)
 
 	write_lock_bh(&bond->lock);
 
+	bond->send_peer_notif = 0;
+
 	/* signal timers not to re-arm */
 	bond->kill_timers = 1;
 
@@ -4644,6 +4696,12 @@ static int bond_check_params(struct bond_params *params)
 		use_carrier = 1;
 	}
 
+	if (num_peer_notif < 0 || num_peer_notif > 255) {
+		pr_warning("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
+			   num_peer_notif);
+		num_peer_notif = 1;
+	}
+
 	/* reset values for 802.3ad */
 	if (bond_mode == BOND_MODE_8023AD) {
 		if (!miimon) {
@@ -4833,6 +4891,7 @@ static int bond_check_params(struct bond_params *params)
 	params->mode = bond_mode;
 	params->xmit_policy = xmit_hashtype;
 	params->miimon = miimon;
+	params->num_peer_notif = num_peer_notif;
 	params->arp_interval = arp_interval;
 	params->arp_validate = arp_validate_value;
 	params->updelay = updelay;
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 259ff32..58fb3e9 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -874,6 +874,30 @@ static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
 		   bonding_show_ad_select, bonding_store_ad_select);
 
 /*
+ * Show and set the number of peer notifications to send after a failover event.
+ */
+static ssize_t bonding_show_num_peer_notif(struct device *d,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct bonding *bond = to_bond(d);
+	return sprintf(buf, "%d\n", bond->params.num_peer_notif);
+}
+
+static ssize_t bonding_store_num_peer_notif(struct device *d,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct bonding *bond = to_bond(d);
+	int err = kstrtou8(buf, 10, &bond->params.num_peer_notif);
+	return err ? err : count;
+}
+static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
+		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
+static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
+		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
+
+/*
  * Show and set the MII monitor interval.  There are two tricky bits
  * here.  First, if MII monitoring is activated, then we must disable
  * ARP monitoring.  Second, if the timer isn't running, we must
@@ -1572,6 +1596,8 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_lacp_rate.attr,
 	&dev_attr_ad_select.attr,
 	&dev_attr_xmit_hash_policy.attr,
+	&dev_attr_num_grat_arp.attr,
+	&dev_attr_num_unsol_na.attr,
 	&dev_attr_miimon.attr,
 	&dev_attr_primary.attr,
 	&dev_attr_primary_reselect.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 77180b1..5bf3b89 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -149,6 +149,7 @@ struct bond_params {
 	int mode;
 	int xmit_policy;
 	int miimon;
+	u8 num_peer_notif;
 	int arp_interval;
 	int arp_validate;
 	int use_carrier;
@@ -229,6 +230,7 @@ struct bonding {
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;
 	s8       kill_timers;
+	u8	 send_peer_notif;
 	s8	 setup_by_slave;
 	s8       igmp_retrans;
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index acf553f..5345b0b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1203,7 +1203,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 			break;
 		/* fall through */
 	case NETDEV_NOTIFY_PEERS:
-	case NETDEV_BONDING_FAILOVER:
 		/* Send gratuitous ARP to notify of link change */
 		inetdev_send_gratuitous_arp(dev, in_dev);
 		break;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 6f7d491..a51fa74c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1746,7 +1746,6 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 		fib6_run_gc(~0UL, net);
 		break;
 	case NETDEV_NOTIFY_PEERS:
-	case NETDEV_BONDING_FAILOVER:
 		ndisc_send_unsol_na(dev);
 		break;
 	default:
-- 
1.7.4


-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-18 19:09   ` Ben Hutchings
@ 2011-04-19  1:32     ` Brian Haley
  2011-04-19 14:56       ` Ben Hutchings
  2011-04-19 19:12     ` Jay Vosburgh
  1 sibling, 1 reply; 10+ messages in thread
From: Brian Haley @ 2011-04-19  1:32 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Jay Vosburgh, David Miller, Andy Gospodarek, Patrick McHardy, netdev

On 04/18/2011 03:09 PM, Ben Hutchings wrote:
> How about restoring the parameters like this:
> 
> ---
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Mon, 18 Apr 2011 19:36:48 +0100
> Subject: [PATCH net-next-2.6] ipv4,ipv6,bonding: Restore control over number of peer notifications
> 
> For backward compatibility, we should retain the module parameters and
> sysfs attributes to control the number of peer notifications
> (gratuitous ARPs and unsolicited NAs) sent after bonding failover.
> Also, it is possible for failover to take place even though the new
> active slave does not have link up, and in that case the peer
> notification should be deferred until it does.
> 
> Change ipv4 and ipv6 so they do not automatically send peer
> notifications on bonding failover.  Change the bonding driver to send
> separate NETDEV_NOTIFY_PEERS notifications when the link is up, as
> many times as requested.  Since it does not directly control which
> protocols send notifications, make num_grat_arp and num_unsol_na
> aliases for a single parameter.

Hi Ben,

I think this looks good, I'll try and get this tested here when I have
a chance, but for now I can:

Acked-by: Brian Haley <brian.haley@hp.com>

Should we just go ahead and make a new parameter for peer notification?
Compiled but untested patch below.

Thanks,

-Brian

--
Make a new bonding parameter, called num_peer_notif, to control how
many peer notifications are sent on fail-over.  Mark the old values,
num_grat_arp and num_unsol_na, as deprecated in the documentation.

Signed-off-by: Brian Haley <brian.haley@hp.com>

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 511b4e5..8b16beb 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -585,8 +585,15 @@ mode
 		chosen.
 
 num_grat_arp
+
+	Deprecated.  Use num_peer_notif instead.
+
 num_unsol_na
 
+	Deprecated.  Use num_peer_notif instead.
+
+num_peer_notif
+
 	Specify the number of peer notifications (gratuitous ARPs and
 	unsolicited IPv6 Neighbor Advertisements) to be issued after a
 	failover event.  As soon as the link is up on the new slave
@@ -595,7 +602,7 @@ num_unsol_na
 	each link monitor interval (arp_interval or miimon, whichever
 	is active) if the number is greater than 1.
 
-	These notifications are now generated by the ipv4 and ipv6 code
+	These notifications are now generated by the IPv4 and IPv6 code
 	and the numbers of repetitions cannot be set independently.
 
 	The valid range is 0 - 255; the default value is 1.  These options
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 956a6f7..631ca9e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -116,6 +116,8 @@ module_param_named(num_grat_arp, num_peer_notif, int, 0644);
 MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on failover event");
 module_param_named(num_unsol_na, num_peer_notif, int, 0644);
 MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on failover event");
+module_param_named(num_peer_notif, num_peer_notif, int, 0644);
+MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on failover event");
 module_param(miimon, int, 0);
 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
 module_param(updelay, int, 0);
@@ -4699,7 +4701,7 @@ static int bond_check_params(struct bond_params *params)
 	}
 
 	if (num_peer_notif < 0 || num_peer_notif > 255) {
-		pr_warning("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
+		pr_warning("Warning: num_peer_notif (%d) not in range 0-255 so it was reset to 1\n",
 			   num_peer_notif);
 		num_peer_notif = 1;
 	}
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 58fb3e9..b03e7be 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -896,6 +896,8 @@ static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
 		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
 static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
 		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
+static DEVICE_ATTR(num_peer_notif, S_IRUGO | S_IWUSR,
+		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
 
 /*
  * Show and set the MII monitor interval.  There are two tricky bits
@@ -1598,6 +1600,7 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_xmit_hash_policy.attr,
 	&dev_attr_num_grat_arp.attr,
 	&dev_attr_num_unsol_na.attr,
+	&dev_attr_num_peer_notif.attr,
 	&dev_attr_miimon.attr,
 	&dev_attr_primary.attr,
 	&dev_attr_primary_reselect.attr,

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-19  1:32     ` Brian Haley
@ 2011-04-19 14:56       ` Ben Hutchings
  0 siblings, 0 replies; 10+ messages in thread
From: Ben Hutchings @ 2011-04-19 14:56 UTC (permalink / raw)
  To: Brian Haley
  Cc: Jay Vosburgh, David Miller, Andy Gospodarek, Patrick McHardy, netdev

On Mon, 2011-04-18 at 21:32 -0400, Brian Haley wrote:
> On 04/18/2011 03:09 PM, Ben Hutchings wrote:
> > How about restoring the parameters like this:
> > 
> > ---
> > From: Ben Hutchings <bhutchings@solarflare.com>
> > Date: Mon, 18 Apr 2011 19:36:48 +0100
> > Subject: [PATCH net-next-2.6] ipv4,ipv6,bonding: Restore control over number of peer notifications
> > 
> > For backward compatibility, we should retain the module parameters and
> > sysfs attributes to control the number of peer notifications
> > (gratuitous ARPs and unsolicited NAs) sent after bonding failover.
> > Also, it is possible for failover to take place even though the new
> > active slave does not have link up, and in that case the peer
> > notification should be deferred until it does.
> > 
> > Change ipv4 and ipv6 so they do not automatically send peer
> > notifications on bonding failover.  Change the bonding driver to send
> > separate NETDEV_NOTIFY_PEERS notifications when the link is up, as
> > many times as requested.  Since it does not directly control which
> > protocols send notifications, make num_grat_arp and num_unsol_na
> > aliases for a single parameter.
> 
> Hi Ben,
> 
> I think this looks good, I'll try and get this tested here when I have
> a chance, but for now I can:
> 
> Acked-by: Brian Haley <brian.haley@hp.com>
> 
> Should we just go ahead and make a new parameter for peer notification?
> Compiled but untested patch below.
[...]

If everyone is in agreement to deprecate the old parameters in favour of
a single parameter (or none) I think there should be a target date for
removal, documented in Documentation/feature-removal-schedule.txt.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-18 19:09   ` Ben Hutchings
  2011-04-19  1:32     ` Brian Haley
@ 2011-04-19 19:12     ` Jay Vosburgh
  2011-04-19 19:34       ` Brian Haley
  2011-04-22  4:13       ` David Miller
  1 sibling, 2 replies; 10+ messages in thread
From: Jay Vosburgh @ 2011-04-19 19:12 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: David Miller, Andy Gospodarek, Patrick McHardy, netdev, Brian Haley

Ben Hutchings <bhutchings@solarflare.com> wrote:

>On Fri, 2011-04-15 at 17:30 -0700, Jay Vosburgh wrote:
>> Ben Hutchings <bhutchings@solarflare.com> wrote:
>> 
>> >It is undesirable for the bonding driver to be poking into higher
>> >level protocols, and notifiers provide a way to avoid that.  This does
>> >mean removing the ability to configure reptitition of gratuitous ARPs
>> >and unsolicited NAs.
>> 
>> 	In principle I think this is a good thing (getting rid of some
>> of those dependencies, duplicated code, etc).
>> 
>> 	However, the removal of the multiple grat ARP and NAs may be an
>> issue for some users.  I don't know that we can just remove this (along
>> with its API) without going through the feature removal process.
>
>Right.
>
>> 	As I recall, the multiple gratuitous ARP stuff was added for
>> Infiniband, because it is dependent on the grat ARP for a smooth
>> failover.
>> 
>> 	There is also currently logic to check the linkwatch link state
>> to wait for the link to go up prior to sending a grat ARP; this is also
>> for IB.
>
>Why would we activate a slave without link up?  Perhaps if the previous
>active slave is removed?

	It's special sauce for Infiniband; I don't recall the details
except that the submitter said that without it the initial gratuitous
ARP could be lost.  I didn't (and still don't) have IB hardware to test
this on.

	Ideally, I'd like to test the current situation (no checking of
linkwatch) for IB and see if the linkwatch trick is still necessary.  It
may have been due to some behavior of the device driver that is now
different.  For now, though, keeping it (as you've done) just in case
seems prudent.

>> 	Brian Haley added the unsolicited NAs; I've added him to the cc
>> so perhaps he (or somebody else) can comment on the necessity of keeping
>> the ability to send multiple NAs.
>[...]
>
>How about restoring the parameters like this:

	I think the patch below is better than Brian's suggestion (new
single parameter) because it won't break existing configurations, even
though it is doing magic under the covers.  If the magic is a real
concern, we could add logic to detect when both parameters are used and
set to different values, but I'm not really that worked up about it as
long as the magic is clearly documented.

>---
>From: Ben Hutchings <bhutchings@solarflare.com>
>Date: Mon, 18 Apr 2011 19:36:48 +0100
>Subject: [PATCH net-next-2.6] ipv4,ipv6,bonding: Restore control over number of peer notifications
>
>For backward compatibility, we should retain the module parameters and
>sysfs attributes to control the number of peer notifications
>(gratuitous ARPs and unsolicited NAs) sent after bonding failover.
>Also, it is possible for failover to take place even though the new
>active slave does not have link up, and in that case the peer
>notification should be deferred until it does.
>
>Change ipv4 and ipv6 so they do not automatically send peer
>notifications on bonding failover.  Change the bonding driver to send
>separate NETDEV_NOTIFY_PEERS notifications when the link is up, as
>many times as requested.  Since it does not directly control which
>protocols send notifications, make num_grat_arp and num_unsol_na
>aliases for a single parameter.
>
>Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
>---
> Documentation/networking/bonding.txt |   31 ++++++++----------
> drivers/net/bonding/bond_main.c      |   59 ++++++++++++++++++++++++++++++++++
> drivers/net/bonding/bond_sysfs.c     |   26 +++++++++++++++
> drivers/net/bonding/bonding.h        |    2 +
> net/ipv4/devinet.c                   |    1 -
> net/ipv6/ndisc.c                     |    1 -
> 6 files changed, 101 insertions(+), 19 deletions(-)
>
>diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
>index e27202b..511b4e5 100644
>--- a/Documentation/networking/bonding.txt
>+++ b/Documentation/networking/bonding.txt
>@@ -1,7 +1,7 @@
>
> 		Linux Ethernet Bonding Driver HOWTO
>
>-		Latest update: 23 September 2009
>+		Latest update: 18 April 2011
>
> Initial release : Thomas Davis <tadavis at lbl.gov>
> Corrections, HA extensions : 2000/10/03-15 :
>@@ -585,25 +585,22 @@ mode
> 		chosen.
>
> num_grat_arp
>-
>-	Specifies the number of gratuitous ARPs to be issued after a
>-	failover event.  One gratuitous ARP is issued immediately after
>-	the failover, subsequent ARPs are sent at a rate of one per link
>-	monitor interval (arp_interval or miimon, whichever is active).
>-
>-	The valid range is 0 - 255; the default value is 1.  This option
>-	affects only the active-backup mode.  This option was added for
>-	bonding version 3.3.0.
>-
> num_unsol_na
>
>-	Specifies the number of unsolicited IPv6 Neighbor Advertisements
>-	to be issued after a failover event.  One unsolicited NA is issued
>-	immediately after the failover.
>+	Specify the number of peer notifications (gratuitous ARPs and
>+	unsolicited IPv6 Neighbor Advertisements) to be issued after a
>+	failover event.  As soon as the link is up on the new slave
>+	(possibly immediately) a peer notification is sent on the
>+	bonding device and each VLAN sub-device.  This is repeated at
>+	each link monitor interval (arp_interval or miimon, whichever
>+	is active) if the number is greater than 1.
>+
>+	These notifications are now generated by the ipv4 and ipv6 code
>+	and the numbers of repetitions cannot be set independently.

	I think it would be good for the long run to be specific about
"now," i.e., bump the bonding version and specify that here.

>-	The valid range is 0 - 255; the default value is 1.  This option
>-	affects only the active-backup mode.  This option was added for
>-	bonding version 3.4.0.
>+	The valid range is 0 - 255; the default value is 1.  These options
>+	affect only the active-backup mode.  These options were added for
>+	bonding versions 3.3.0 and 3.4.0 respectively.
>
> primary
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 5cd4766..c9043db 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -89,6 +89,7 @@
>
> static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
> static int tx_queues	= BOND_DEFAULT_TX_QUEUES;
>+static int num_peer_notif = 1;
> static int miimon	= BOND_LINK_MON_INTERV;
> static int updelay;
> static int downdelay;
>@@ -111,6 +112,10 @@ module_param(max_bonds, int, 0);
> MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
> module_param(tx_queues, int, 0);
> MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
>+module_param_named(num_grat_arp, num_peer_notif, int, 0644);
>+MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on failover event");
>+module_param_named(num_unsol_na, num_peer_notif, int, 0644);
>+MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on failover event");

	Perhaps add "alias of num_grat_arp" here so it shows up in modinfo?

	In any event, with the version number thing I mentioned above, I
think this is a reasonable way to proceed.

	-J

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>


> module_param(miimon, int, 0);
> MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
> module_param(updelay, int, 0);
>@@ -1080,6 +1085,21 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
> 	return bestslave;
> }
>
>+static bool bond_should_notify_peers(struct bonding *bond)
>+{
>+	struct slave *slave = bond->curr_active_slave;
>+
>+	pr_debug("bond_should_notify_peers: bond %s slave %s\n",
>+		 bond->dev->name, slave ? slave->dev->name : "NULL");
>+
>+	if (!slave || !bond->send_peer_notif ||
>+	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
>+		return false;
>+
>+	bond->send_peer_notif--;
>+	return true;
>+}
>+
> /**
>  * change_active_interface - change the active slave into the specified one
>  * @bond: our bonding struct
>@@ -1147,16 +1167,28 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> 			bond_set_slave_inactive_flags(old_active);
>
> 		if (new_active) {
>+			bool should_notify_peers = false;
>+
> 			bond_set_slave_active_flags(new_active);
>
> 			if (bond->params.fail_over_mac)
> 				bond_do_fail_over_mac(bond, new_active,
> 						      old_active);
>
>+			if (netif_running(bond->dev)) {
>+				bond->send_peer_notif =
>+					bond->params.num_peer_notif;
>+				should_notify_peers =
>+					bond_should_notify_peers(bond);
>+			}
>+
> 			write_unlock_bh(&bond->curr_slave_lock);
> 			read_unlock(&bond->lock);
>
> 			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
>+			if (should_notify_peers)
>+				netdev_bonding_change(bond->dev,
>+						      NETDEV_NOTIFY_PEERS);
>
> 			read_lock(&bond->lock);
> 			write_lock_bh(&bond->curr_slave_lock);
>@@ -2555,6 +2587,7 @@ void bond_mii_monitor(struct work_struct *work)
> {
> 	struct bonding *bond = container_of(work, struct bonding,
> 					    mii_work.work);
>+	bool should_notify_peers = false;
>
> 	read_lock(&bond->lock);
> 	if (bond->kill_timers)
>@@ -2563,6 +2596,8 @@ void bond_mii_monitor(struct work_struct *work)
> 	if (bond->slave_cnt == 0)
> 		goto re_arm;
>
>+	should_notify_peers = bond_should_notify_peers(bond);
>+
> 	if (bond_miimon_inspect(bond)) {
> 		read_unlock(&bond->lock);
> 		rtnl_lock();
>@@ -2581,6 +2616,12 @@ re_arm:
> 				   msecs_to_jiffies(bond->params.miimon));
> out:
> 	read_unlock(&bond->lock);
>+
>+	if (should_notify_peers) {
>+		rtnl_lock();
>+		netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
>+		rtnl_unlock();
>+	}
> }
>
> static __be32 bond_glean_dev_ip(struct net_device *dev)
>@@ -3178,6 +3219,7 @@ void bond_activebackup_arp_mon(struct work_struct *work)
> {
> 	struct bonding *bond = container_of(work, struct bonding,
> 					    arp_work.work);
>+	bool should_notify_peers = false;
> 	int delta_in_ticks;
>
> 	read_lock(&bond->lock);
>@@ -3190,6 +3232,8 @@ void bond_activebackup_arp_mon(struct work_struct *work)
> 	if (bond->slave_cnt == 0)
> 		goto re_arm;
>
>+	should_notify_peers = bond_should_notify_peers(bond);
>+
> 	if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
> 		read_unlock(&bond->lock);
> 		rtnl_lock();
>@@ -3209,6 +3253,12 @@ re_arm:
> 		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
> out:
> 	read_unlock(&bond->lock);
>+
>+	if (should_notify_peers) {
>+		rtnl_lock();
>+		netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
>+		rtnl_unlock();
>+	}
> }
>
> /*-------------------------- netdev event handling --------------------------*/
>@@ -3568,6 +3618,8 @@ static int bond_close(struct net_device *bond_dev)
>
> 	write_lock_bh(&bond->lock);
>
>+	bond->send_peer_notif = 0;
>+
> 	/* signal timers not to re-arm */
> 	bond->kill_timers = 1;
>
>@@ -4644,6 +4696,12 @@ static int bond_check_params(struct bond_params *params)
> 		use_carrier = 1;
> 	}
>
>+	if (num_peer_notif < 0 || num_peer_notif > 255) {
>+		pr_warning("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
>+			   num_peer_notif);
>+		num_peer_notif = 1;
>+	}
>+
> 	/* reset values for 802.3ad */
> 	if (bond_mode == BOND_MODE_8023AD) {
> 		if (!miimon) {
>@@ -4833,6 +4891,7 @@ static int bond_check_params(struct bond_params *params)
> 	params->mode = bond_mode;
> 	params->xmit_policy = xmit_hashtype;
> 	params->miimon = miimon;
>+	params->num_peer_notif = num_peer_notif;
> 	params->arp_interval = arp_interval;
> 	params->arp_validate = arp_validate_value;
> 	params->updelay = updelay;
>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>index 259ff32..58fb3e9 100644
>--- a/drivers/net/bonding/bond_sysfs.c
>+++ b/drivers/net/bonding/bond_sysfs.c
>@@ -874,6 +874,30 @@ static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
> 		   bonding_show_ad_select, bonding_store_ad_select);
>
> /*
>+ * Show and set the number of peer notifications to send after a failover event.
>+ */
>+static ssize_t bonding_show_num_peer_notif(struct device *d,
>+					   struct device_attribute *attr,
>+					   char *buf)
>+{
>+	struct bonding *bond = to_bond(d);
>+	return sprintf(buf, "%d\n", bond->params.num_peer_notif);
>+}
>+
>+static ssize_t bonding_store_num_peer_notif(struct device *d,
>+					    struct device_attribute *attr,
>+					    const char *buf, size_t count)
>+{
>+	struct bonding *bond = to_bond(d);
>+	int err = kstrtou8(buf, 10, &bond->params.num_peer_notif);
>+	return err ? err : count;
>+}
>+static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
>+		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
>+static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
>+		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);
>+
>+/*
>  * Show and set the MII monitor interval.  There are two tricky bits
>  * here.  First, if MII monitoring is activated, then we must disable
>  * ARP monitoring.  Second, if the timer isn't running, we must
>@@ -1572,6 +1596,8 @@ static struct attribute *per_bond_attrs[] = {
> 	&dev_attr_lacp_rate.attr,
> 	&dev_attr_ad_select.attr,
> 	&dev_attr_xmit_hash_policy.attr,
>+	&dev_attr_num_grat_arp.attr,
>+	&dev_attr_num_unsol_na.attr,
> 	&dev_attr_miimon.attr,
> 	&dev_attr_primary.attr,
> 	&dev_attr_primary_reselect.attr,
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 77180b1..5bf3b89 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -149,6 +149,7 @@ struct bond_params {
> 	int mode;
> 	int xmit_policy;
> 	int miimon;
>+	u8 num_peer_notif;
> 	int arp_interval;
> 	int arp_validate;
> 	int use_carrier;
>@@ -229,6 +230,7 @@ struct bonding {
> 	rwlock_t lock;
> 	rwlock_t curr_slave_lock;
> 	s8       kill_timers;
>+	u8	 send_peer_notif;
> 	s8	 setup_by_slave;
> 	s8       igmp_retrans;
> #ifdef CONFIG_PROC_FS
>diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
>index acf553f..5345b0b 100644
>--- a/net/ipv4/devinet.c
>+++ b/net/ipv4/devinet.c
>@@ -1203,7 +1203,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
> 			break;
> 		/* fall through */
> 	case NETDEV_NOTIFY_PEERS:
>-	case NETDEV_BONDING_FAILOVER:
> 		/* Send gratuitous ARP to notify of link change */
> 		inetdev_send_gratuitous_arp(dev, in_dev);
> 		break;
>diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
>index 6f7d491..a51fa74c 100644
>--- a/net/ipv6/ndisc.c
>+++ b/net/ipv6/ndisc.c
>@@ -1746,7 +1746,6 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
> 		fib6_run_gc(~0UL, net);
> 		break;
> 	case NETDEV_NOTIFY_PEERS:
>-	case NETDEV_BONDING_FAILOVER:
> 		ndisc_send_unsol_na(dev);
> 		break;
> 	default:
>-- 
>1.7.4
>
>
>-- 
>Ben Hutchings, Senior Software Engineer, Solarflare
>Not speaking for my employer; that's the marketing department's job.
>They asked us to note that Solarflare product names are trademarked.

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-19 19:12     ` Jay Vosburgh
@ 2011-04-19 19:34       ` Brian Haley
  2011-04-22  4:13       ` David Miller
  1 sibling, 0 replies; 10+ messages in thread
From: Brian Haley @ 2011-04-19 19:34 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Ben Hutchings, David Miller, Andy Gospodarek, Patrick McHardy, netdev

On 04/19/2011 03:12 PM, Jay Vosburgh wrote:
>>> 	Brian Haley added the unsolicited NAs; I've added him to the cc
>>> so perhaps he (or somebody else) can comment on the necessity of keeping
>>> the ability to send multiple NAs.
>> [...]
>>
>> How about restoring the parameters like this:
> 
> 	I think the patch below is better than Brian's suggestion (new
> single parameter) because it won't break existing configurations, even
> though it is doing magic under the covers.  If the magic is a real
> concern, we could add logic to detect when both parameters are used and
> set to different values, but I'm not really that worked up about it as
> long as the magic is clearly documented.

My patch was on-top of Ben's, it still left the old options around, just
gave people time to notice they didn't work as before.  It's not a big
deal either way.

-Brian

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
  2011-04-19 19:12     ` Jay Vosburgh
  2011-04-19 19:34       ` Brian Haley
@ 2011-04-22  4:13       ` David Miller
  1 sibling, 0 replies; 10+ messages in thread
From: David Miller @ 2011-04-22  4:13 UTC (permalink / raw)
  To: fubar; +Cc: bhutchings, andy, kaber, netdev, brian.haley

From: Jay Vosburgh <fubar@us.ibm.com>
Date: Tue, 19 Apr 2011 12:12:01 -0700

> Ben Hutchings <bhutchings@solarflare.com> wrote:
> 
>>Why would we activate a slave without link up?  Perhaps if the previous
>>active slave is removed?
> 
> 	It's special sauce for Infiniband; I don't recall the details
> except that the submitter said that without it the initial gratuitous
> ARP could be lost.  I didn't (and still don't) have IB hardware to test
> this on.

I vaguely remember this IB has too, but also forget the details.

If someone would get to the bottom of this and add a nice big comment
to the code, to prevent such difficulties in remembering exactly why we
do this in the future, I would very much appreciate it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-04-22  4:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-15 23:47 [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS Ben Hutchings
2011-04-16  0:30 ` Jay Vosburgh
2011-04-16  1:51   ` Brian Haley
2011-04-16  2:53     ` Ben Hutchings
2011-04-18 19:09   ` Ben Hutchings
2011-04-19  1:32     ` Brian Haley
2011-04-19 14:56       ` Ben Hutchings
2011-04-19 19:12     ` Jay Vosburgh
2011-04-19 19:34       ` Brian Haley
2011-04-22  4:13       ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.