All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] bonding: fix arp_validate on bonds inside a bridge
@ 2010-04-28 12:59 Jiri Bohac
  2010-04-28 19:05 ` Jay Vosburgh
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Bohac @ 2010-04-28 12:59 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: bonding-devel, netdev

Hi,

bonding with arp_validate does not currently work when the
bonding master is part of a bridge. This is because
bond_arp_rcv() is registered as a packet type handler for ARP,
but before netif_receive_skb() processes the ptype_base hash
table, handle_bridge() is called and changes the skb->dev to
point to the bridge device.

This patch makes bonding_should_drop() call the bonding ARP
handler directly if a IFF_MASTER_NEEDARP flag is set on the
bonding master.  bond_register_arp() now only needs to set the
IFF_MASTER_NEEDARP flag.

We ne longer need special ARP handling for inactive slaves, hence
IFF_SLAVE_NEEDARP is not needed.

skb_reset_network_header() and skb_reset_transport_header() need
to be called before the call to bonding_should_drop() because
bond_handle_arp() needs the offsets initialized.

P.S.: bonding_should_drop() should probably be renamed to
handle_bonding() -- we already have handle_bridge() and
handle_macvlan(), and bonding_should_drop() has long been doing
other stuff than deciding which packets to drop...



Signed-off-by: Jiri Bohac <jbohac@suse.cz>

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0075514..cafd404 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1940,8 +1940,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-				   IFF_SLAVE_INACTIVE | IFF_BONDING |
-				   IFF_SLAVE_NEEDARP);
+				   IFF_SLAVE_INACTIVE | IFF_BONDING);
 
 	kfree(slave);
 
@@ -2612,11 +2611,12 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
 	}
 }
 
-static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static void bond_handle_arp(struct sk_buff *skb)
 {
 	struct arphdr *arp;
 	struct slave *slave;
 	struct bonding *bond;
+	struct net_device *dev = skb->dev->master, *orig_dev = skb->dev;
 	unsigned char *arp_ptr;
 	__be32 sip, tip;
 
@@ -2637,9 +2637,8 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 	bond = netdev_priv(dev);
 	read_lock(&bond->lock);
 
-	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
-		 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
-		 orig_dev ? orig_dev->name : "NULL");
+	pr_debug("bond_handle_arp: bond: %s, master: %s, slave: %s\n",
+		bond->dev->name, dev->name, orig_dev->name);
 
 	slave = bond_get_slave_by_dev(bond, orig_dev);
 	if (!slave || !slave_do_arp_validate(bond, slave))
@@ -2684,8 +2683,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 out_unlock:
 	read_unlock(&bond->lock);
 out:
-	dev_kfree_skb(skb);
-	return NET_RX_SUCCESS;
+	return;
 }
 
 /*
@@ -3567,23 +3565,12 @@ static void bond_unregister_lacpdu(struct bonding *bond)
 
 void bond_register_arp(struct bonding *bond)
 {
-	struct packet_type *pt = &bond->arp_mon_pt;
-
-	if (pt->type)
-		return;
-
-	pt->type = htons(ETH_P_ARP);
-	pt->dev = bond->dev;
-	pt->func = bond_arp_rcv;
-	dev_add_pack(pt);
+	bond->dev->priv_flags |= IFF_MASTER_NEEDARP;
 }
 
 void bond_unregister_arp(struct bonding *bond)
 {
-	struct packet_type *pt = &bond->arp_mon_pt;
-
-	dev_remove_pack(pt);
-	pt->type = 0;
+	bond->dev->priv_flags &= ~IFF_MASTER_NEEDARP;
 }
 
 /*---------------------------- Hashing Policies -----------------------------*/
@@ -5041,6 +5028,7 @@ static int __init bonding_init(void)
 	register_netdevice_notifier(&bond_netdev_notifier);
 	register_inetaddr_notifier(&bond_inetaddr_notifier);
 	bond_register_ipv6_notifier();
+	bond_handle_arp_hook = bond_handle_arp;
 out:
 	return res;
 err:
@@ -5061,6 +5049,7 @@ static void __exit bonding_exit(void)
 
 	rtnl_link_unregister(&bond_link_ops);
 	unregister_pernet_subsys(&bond_net_ops);
+	bond_handle_arp_hook = NULL;
 }
 
 module_init(bonding_init);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 257a7a4..57adfe5 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -212,7 +212,6 @@ struct bonding {
 	struct   bond_params params;
 	struct   list_head vlan_list;
 	struct   vlan_group *vlgrp;
-	struct   packet_type arp_mon_pt;
 	struct   workqueue_struct *wq;
 	struct   delayed_work mii_work;
 	struct   delayed_work arp_work;
@@ -292,14 +291,12 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	if (!bond_is_lb(bond))
 		slave->state = BOND_STATE_BACKUP;
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
-	if (slave_do_arp_validate(bond, slave))
-		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
 	slave->state = BOND_STATE_ACTIVE;
-	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
+	slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
diff --git a/include/linux/if.h b/include/linux/if.h
index 3a9f410..84ab2c8 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -63,7 +63,7 @@
 #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
 #define IFF_BONDING	0x20		/* bonding master or slave	*/
-#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
+#define IFF_MASTER_NEEDARP 0x40		/* need ARPs for validation	*/
 #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
 #define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
 #define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fa8b476..9f82fc6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2055,6 +2055,8 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 	}
 }
 
+extern void (*bond_handle_arp_hook)(struct sk_buff *skb);
+
 /* On bonding slaves other than the currently active slave, suppress
  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
  * ARP on active-backup slaves with arp_validate enabled.
@@ -2076,11 +2078,13 @@ static inline int skb_bond_should_drop(struct sk_buff *skb,
 			skb_bond_set_mac_by_master(skb, master);
 		}
 
-		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-			    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-				return 0;
+		/* pass ARP frames directly to bonding
+		   before bridging or other hooks change them */
+		if ((master->priv_flags & IFF_MASTER_NEEDARP) &&
+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+			bond_handle_arp_hook(skb);
 
+		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
 			if (master->priv_flags & IFF_MASTER_ALB) {
 				if (skb->pkt_type != PACKET_BROADCAST &&
 				    skb->pkt_type != PACKET_MULTICAST)
diff --git a/net/core/dev.c b/net/core/dev.c
index f769098..98d85a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2314,6 +2314,9 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+void (*bond_handle_arp_hook)(struct sk_buff *skb);
+EXPORT_SYMBOL_GPL(bond_handle_arp_hook);
+
 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
@@ -2507,6 +2510,10 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (!skb->skb_iif)
 		skb->skb_iif = skb->dev->ifindex;
 
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
+
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
 	master = ACCESS_ONCE(orig_dev->master);
@@ -2519,10 +2526,6 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
-
 	pt_prev = NULL;
 
 	rcu_read_lock();


-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-04-28 12:59 [PATCH] bonding: fix arp_validate on bonds inside a bridge Jiri Bohac
@ 2010-04-28 19:05 ` Jay Vosburgh
       [not found]   ` <20100429163921.GA4228@midget.suse.cz>
  0 siblings, 1 reply; 10+ messages in thread
From: Jay Vosburgh @ 2010-04-28 19:05 UTC (permalink / raw)
  To: Jiri Bohac; +Cc: bonding-devel, netdev

Jiri Bohac <jbohac@suse.cz> wrote:

>bonding with arp_validate does not currently work when the
>bonding master is part of a bridge. This is because
>bond_arp_rcv() is registered as a packet type handler for ARP,
>but before netif_receive_skb() processes the ptype_base hash
>table, handle_bridge() is called and changes the skb->dev to
>point to the bridge device.
>
>This patch makes bonding_should_drop() call the bonding ARP
>handler directly if a IFF_MASTER_NEEDARP flag is set on the
>bonding master.  bond_register_arp() now only needs to set the
>IFF_MASTER_NEEDARP flag.
>
>We ne longer need special ARP handling for inactive slaves, hence
>IFF_SLAVE_NEEDARP is not needed.
>
>skb_reset_network_header() and skb_reset_transport_header() need
>to be called before the call to bonding_should_drop() because
>bond_handle_arp() needs the offsets initialized.
>
>P.S.: bonding_should_drop() should probably be renamed to
>handle_bonding() -- we already have handle_bridge() and
>handle_macvlan(), and bonding_should_drop() has long been doing
>other stuff than deciding which packets to drop...

	I agree, and I have code that I've been working on locally that
wraps the "should_drop" into a hook, similar to the bridge and macvlan
hooks that already exist.  I used different names, though.  I've got
bond_handle_frame (and bond_handle_frame_hook) bond_main.c and
handle_bonding in net/core/dev.c, where the implementation for
handle_bonding parallels that of bridge and macvlan:

#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
int (*bonding_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
EXPORT_SYMBOL_GPL(bonding_handle_frame_hook);

static inline int handle_bonding(struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
	struct net_device *master = dev->master;

	if (master->priv_flags & IFF_MASTER_ML)
		return bonding_handle_frame_hook(skb);

	return skb_bond_should_drop(skb);
}
#else
#define handle_bonding(skb)	(skb)
#endif

	I think the structure you're using (skb_bond_should_drop calls
the hook as needed) may be better overall, as it doesn't require special
logic in the VLAN cases.  The disadvantage is that it's a little uglier
to hide the hook declaration behind #ifdef CONFIG_BONDING (more on that
below).

	The code I'm working on now doesn't just hook for ARP (I'm
working on a load-balance by subnet mode that needs to assign skb->dev
by destination to permit the slaves to operate independently from the
master; but that's another topic).  I did leave as much as possible to
the priv_flags, since that is less expensive to process than poking
around in the bonding structures (no locks for the priv_flags).

	I haven't tested your patch yet, but it looks like it should
work as advertised.

	I have a couple of minor comments, below, but nothing
substantive about the core of the changes.

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>

>Signed-off-by: Jiri Bohac <jbohac@suse.cz>
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 0075514..cafd404 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1940,8 +1940,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
> 	}
>
> 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
>-				   IFF_SLAVE_INACTIVE | IFF_BONDING |
>-				   IFF_SLAVE_NEEDARP);
>+				   IFF_SLAVE_INACTIVE | IFF_BONDING);
>
> 	kfree(slave);
>
>@@ -2612,11 +2611,12 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
> 	}
> }
>
>-static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
>+static void bond_handle_arp(struct sk_buff *skb)
> {
> 	struct arphdr *arp;
> 	struct slave *slave;
> 	struct bonding *bond;
>+	struct net_device *dev = skb->dev->master, *orig_dev = skb->dev;
> 	unsigned char *arp_ptr;
> 	__be32 sip, tip;
>
>@@ -2637,9 +2637,8 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
> 	bond = netdev_priv(dev);
> 	read_lock(&bond->lock);
>
>-	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
>-		 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
>-		 orig_dev ? orig_dev->name : "NULL");
>+	pr_debug("bond_handle_arp: bond: %s, master: %s, slave: %s\n",
>+		bond->dev->name, dev->name, orig_dev->name);
>
> 	slave = bond_get_slave_by_dev(bond, orig_dev);
> 	if (!slave || !slave_do_arp_validate(bond, slave))
>@@ -2684,8 +2683,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
> out_unlock:
> 	read_unlock(&bond->lock);
> out:
>-	dev_kfree_skb(skb);
>-	return NET_RX_SUCCESS;
>+	return;
> }
>
> /*
>@@ -3567,23 +3565,12 @@ static void bond_unregister_lacpdu(struct bonding *bond)
>
> void bond_register_arp(struct bonding *bond)
> {
>-	struct packet_type *pt = &bond->arp_mon_pt;
>-
>-	if (pt->type)
>-		return;
>-
>-	pt->type = htons(ETH_P_ARP);
>-	pt->dev = bond->dev;
>-	pt->func = bond_arp_rcv;
>-	dev_add_pack(pt);
>+	bond->dev->priv_flags |= IFF_MASTER_NEEDARP;
> }
>
> void bond_unregister_arp(struct bonding *bond)
> {
>-	struct packet_type *pt = &bond->arp_mon_pt;
>-
>-	dev_remove_pack(pt);
>-	pt->type = 0;
>+	bond->dev->priv_flags &= ~IFF_MASTER_NEEDARP;
> }
>
> /*---------------------------- Hashing Policies -----------------------------*/
>@@ -5041,6 +5028,7 @@ static int __init bonding_init(void)
> 	register_netdevice_notifier(&bond_netdev_notifier);
> 	register_inetaddr_notifier(&bond_inetaddr_notifier);
> 	bond_register_ipv6_notifier();
>+	bond_handle_arp_hook = bond_handle_arp;
> out:
> 	return res;
> err:
>@@ -5061,6 +5049,7 @@ static void __exit bonding_exit(void)
>
> 	rtnl_link_unregister(&bond_link_ops);
> 	unregister_pernet_subsys(&bond_net_ops);
>+	bond_handle_arp_hook = NULL;
> }
>
> module_init(bonding_init);
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 257a7a4..57adfe5 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -212,7 +212,6 @@ struct bonding {
> 	struct   bond_params params;
> 	struct   list_head vlan_list;
> 	struct   vlan_group *vlgrp;
>-	struct   packet_type arp_mon_pt;
> 	struct   workqueue_struct *wq;
> 	struct   delayed_work mii_work;
> 	struct   delayed_work arp_work;
>@@ -292,14 +291,12 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
> 	if (!bond_is_lb(bond))
> 		slave->state = BOND_STATE_BACKUP;
> 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
>-	if (slave_do_arp_validate(bond, slave))
>-		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
> }
>
> static inline void bond_set_slave_active_flags(struct slave *slave)
> {
> 	slave->state = BOND_STATE_ACTIVE;
>-	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
>+	slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
> }
>
> static inline void bond_set_master_3ad_flags(struct bonding *bond)
>diff --git a/include/linux/if.h b/include/linux/if.h
>index 3a9f410..84ab2c8 100644
>--- a/include/linux/if.h
>+++ b/include/linux/if.h
>@@ -63,7 +63,7 @@
> #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
> #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
> #define IFF_BONDING	0x20		/* bonding master or slave	*/
>-#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
>+#define IFF_MASTER_NEEDARP 0x40		/* need ARPs for validation	*/
> #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
> #define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
> #define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
>diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>index fa8b476..9f82fc6 100644
>--- a/include/linux/netdevice.h
>+++ b/include/linux/netdevice.h
>@@ -2055,6 +2055,8 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
> 	}
> }
>
>+extern void (*bond_handle_arp_hook)(struct sk_buff *skb);

	Should this be inside the the skb_bond_should_drop function to
limit its scope?  Just wondering if that's a little tidier.

> /* On bonding slaves other than the currently active slave, suppress
>  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
>  * ARP on active-backup slaves with arp_validate enabled.
>@@ -2076,11 +2078,13 @@ static inline int skb_bond_should_drop(struct sk_buff *skb,
> 			skb_bond_set_mac_by_master(skb, master);
> 		}
>
>-		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
>-			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
>-			    skb->protocol == __cpu_to_be16(ETH_P_ARP))
>-				return 0;
>+		/* pass ARP frames directly to bonding
>+		   before bridging or other hooks change them */
>+		if ((master->priv_flags & IFF_MASTER_NEEDARP) &&
>+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
>+			bond_handle_arp_hook(skb);
>
>+		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
> 			if (master->priv_flags & IFF_MASTER_ALB) {
> 				if (skb->pkt_type != PACKET_BROADCAST &&
> 				    skb->pkt_type != PACKET_MULTICAST)
>diff --git a/net/core/dev.c b/net/core/dev.c
>index f769098..98d85a8 100644
>--- a/net/core/dev.c
>+++ b/net/core/dev.c
>@@ -2314,6 +2314,9 @@ static inline int deliver_skb(struct sk_buff *skb,
> 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
> }
>
>+void (*bond_handle_arp_hook)(struct sk_buff *skb);
>+EXPORT_SYMBOL_GPL(bond_handle_arp_hook);

	Should this be hidden by

#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)

	with some parallel changes to skb_bond_should_drop so it
vanishes if bonding is not configured?  Granted, distros will all turn
it on anyway, but the embedded size might be a bit smaller.

> #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
>
> #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
>@@ -2507,6 +2510,10 @@ int netif_receive_skb(struct sk_buff *skb)
> 	if (!skb->skb_iif)
> 		skb->skb_iif = skb->dev->ifindex;
>
>+	skb_reset_network_header(skb);
>+	skb_reset_transport_header(skb);
>+	skb->mac_len = skb->network_header - skb->mac_header;
>+
> 	null_or_orig = NULL;
> 	orig_dev = skb->dev;
> 	master = ACCESS_ONCE(orig_dev->master);
>@@ -2519,10 +2526,6 @@ int netif_receive_skb(struct sk_buff *skb)
>
> 	__get_cpu_var(netdev_rx_stat).total++;
>
>-	skb_reset_network_header(skb);
>-	skb_reset_transport_header(skb);
>-	skb->mac_len = skb->network_header - skb->mac_header;
>-
> 	pt_prev = NULL;
>
> 	rcu_read_lock();

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
       [not found]   ` <20100429163921.GA4228@midget.suse.cz>
@ 2010-04-29 18:57     ` Jay Vosburgh
  2010-04-30 15:45       ` Jiri Bohac
  0 siblings, 1 reply; 10+ messages in thread
From: Jay Vosburgh @ 2010-04-29 18:57 UTC (permalink / raw)
  To: Jiri Bohac; +Cc: bonding-devel, netdev

Jiri Bohac <jbohac@suse.cz> wrote:

>On Wed, Apr 28, 2010 at 12:05:11PM -0700, Jay Vosburgh wrote:
>> Jiri Bohac <jbohac@suse.cz> wrote:
>> >--- a/include/linux/netdevice.h
>> >+++ b/include/linux/netdevice.h
>> >@@ -2055,6 +2055,8 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
>> > 	}
>> > }
>> >
>> >+extern void (*bond_handle_arp_hook)(struct sk_buff *skb);
>> 
>> 	Should this be inside the the skb_bond_should_drop function to
>> limit its scope?  Just wondering if that's a little tidier.
>
>No, this needs to be global, so that the bonding module can
>initialize it with the correct address.
>
>
>> >--- a/net/core/dev.c
>> >+++ b/net/core/dev.c
>> >@@ -2314,6 +2314,9 @@ static inline int deliver_skb(struct sk_buff *skb,
>> > 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
>> > }
>> >
>> >+void (*bond_handle_arp_hook)(struct sk_buff *skb);
>> >+EXPORT_SYMBOL_GPL(bond_handle_arp_hook);
>> 
>> 	Should this be hidden by
>> 
>> #if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
>> 
>> 	with some parallel changes to skb_bond_should_drop so it
>> vanishes if bonding is not configured?  Granted, distros will all turn
>> it on anyway, but the embedded size might be a bit smaller.
>
>Sure, good idea. I put the whole skb_bonding_should_drop function
>in an #ifdef, which will actually speed up the rx path if
>CONFIG_BONDING is not set. A new version follows:

	This doesn't apply to the current net-next-2.6 (because
skb_bond_should_drop was pulled out of line a few weeks ago); can you
update the patch?

	-J

>----
>bonding with arp_validate does not currently work when the
>bonding master is part of a bridge. This is because
>bond_arp_rcv() is registered as a packet type handler for ARP,
>but before netif_receive_skb() processes the ptype_base hash
>table, handle_bridge() is called and changes the skb->dev to
>point to the bridge device.
>
>This patch makes bonding_should_drop() call the bonding ARP
>handler directly if a IFF_MASTER_NEEDARP flag is set on the
>bonding master.  bond_register_arp() now only needs to set the
>IFF_MASTER_NEEDARP flag.
>
>We no longer need special ARP handling for inactive slaves, hence
>IFF_SLAVE_NEEDARP is not needed.
>
>skb_reset_network_header() and skb_reset_transport_header() need
>to be called before the call to bonding_should_drop() because
>bond_handle_arp() needs the offsets initialized.
>
>As a side-effect, skb_bond_should_drop is #defined as 0
>when CONFIG_BONDING is not set.
>
>Signed-off-by: Jiri Bohac <jbohac@suse.cz>
>
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 0075514..cafd404 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1940,8 +1940,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
> 	}
>
> 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
>-				   IFF_SLAVE_INACTIVE | IFF_BONDING |
>-				   IFF_SLAVE_NEEDARP);
>+				   IFF_SLAVE_INACTIVE | IFF_BONDING);
>
> 	kfree(slave);
>
>@@ -2612,11 +2611,12 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
> 	}
> }
>
>-static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
>+static void bond_handle_arp(struct sk_buff *skb)
> {
> 	struct arphdr *arp;
> 	struct slave *slave;
> 	struct bonding *bond;
>+	struct net_device *dev = skb->dev->master, *orig_dev = skb->dev;
> 	unsigned char *arp_ptr;
> 	__be32 sip, tip;
>
>@@ -2637,9 +2637,8 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
> 	bond = netdev_priv(dev);
> 	read_lock(&bond->lock);
>
>-	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
>-		 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
>-		 orig_dev ? orig_dev->name : "NULL");
>+	pr_debug("bond_handle_arp: bond: %s, master: %s, slave: %s\n",
>+		bond->dev->name, dev->name, orig_dev->name);
>
> 	slave = bond_get_slave_by_dev(bond, orig_dev);
> 	if (!slave || !slave_do_arp_validate(bond, slave))
>@@ -2684,8 +2683,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
> out_unlock:
> 	read_unlock(&bond->lock);
> out:
>-	dev_kfree_skb(skb);
>-	return NET_RX_SUCCESS;
>+	return;
> }
>
> /*
>@@ -3567,23 +3565,12 @@ static void bond_unregister_lacpdu(struct bonding *bond)
>
> void bond_register_arp(struct bonding *bond)
> {
>-	struct packet_type *pt = &bond->arp_mon_pt;
>-
>-	if (pt->type)
>-		return;
>-
>-	pt->type = htons(ETH_P_ARP);
>-	pt->dev = bond->dev;
>-	pt->func = bond_arp_rcv;
>-	dev_add_pack(pt);
>+	bond->dev->priv_flags |= IFF_MASTER_NEEDARP;
> }
>
> void bond_unregister_arp(struct bonding *bond)
> {
>-	struct packet_type *pt = &bond->arp_mon_pt;
>-
>-	dev_remove_pack(pt);
>-	pt->type = 0;
>+	bond->dev->priv_flags &= ~IFF_MASTER_NEEDARP;
> }
>
> /*---------------------------- Hashing Policies -----------------------------*/
>@@ -5041,6 +5028,7 @@ static int __init bonding_init(void)
> 	register_netdevice_notifier(&bond_netdev_notifier);
> 	register_inetaddr_notifier(&bond_inetaddr_notifier);
> 	bond_register_ipv6_notifier();
>+	bond_handle_arp_hook = bond_handle_arp;
> out:
> 	return res;
> err:
>@@ -5061,6 +5049,7 @@ static void __exit bonding_exit(void)
>
> 	rtnl_link_unregister(&bond_link_ops);
> 	unregister_pernet_subsys(&bond_net_ops);
>+	bond_handle_arp_hook = NULL;
> }
>
> module_init(bonding_init);
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 257a7a4..57adfe5 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -212,7 +212,6 @@ struct bonding {
> 	struct   bond_params params;
> 	struct   list_head vlan_list;
> 	struct   vlan_group *vlgrp;
>-	struct   packet_type arp_mon_pt;
> 	struct   workqueue_struct *wq;
> 	struct   delayed_work mii_work;
> 	struct   delayed_work arp_work;
>@@ -292,14 +291,12 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
> 	if (!bond_is_lb(bond))
> 		slave->state = BOND_STATE_BACKUP;
> 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
>-	if (slave_do_arp_validate(bond, slave))
>-		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
> }
>
> static inline void bond_set_slave_active_flags(struct slave *slave)
> {
> 	slave->state = BOND_STATE_ACTIVE;
>-	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
>+	slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
> }
>
> static inline void bond_set_master_3ad_flags(struct bonding *bond)
>diff --git a/include/linux/if.h b/include/linux/if.h
>index 3a9f410..84ab2c8 100644
>--- a/include/linux/if.h
>+++ b/include/linux/if.h
>@@ -63,7 +63,7 @@
> #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
> #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
> #define IFF_BONDING	0x20		/* bonding master or slave	*/
>-#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
>+#define IFF_MASTER_NEEDARP 0x40		/* need ARPs for validation	*/
> #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
> #define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
> #define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
>diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>index fa8b476..1ad9b71 100644
>--- a/include/linux/netdevice.h
>+++ b/include/linux/netdevice.h
>@@ -2055,6 +2055,9 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
> 	}
> }
>
>+#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
>+extern void (*bond_handle_arp_hook)(struct sk_buff *skb);
>+
> /* On bonding slaves other than the currently active slave, suppress
>  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
>  * ARP on active-backup slaves with arp_validate enabled.
>@@ -2076,11 +2079,13 @@ static inline int skb_bond_should_drop(struct sk_buff *skb,
> 			skb_bond_set_mac_by_master(skb, master);
> 		}
>
>-		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
>-			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
>-			    skb->protocol == __cpu_to_be16(ETH_P_ARP))
>-				return 0;
>+		/* pass ARP frames directly to bonding
>+		   before bridging or other hooks change them */
>+		if ((master->priv_flags & IFF_MASTER_NEEDARP) &&
>+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
>+			bond_handle_arp_hook(skb);
>
>+		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
> 			if (master->priv_flags & IFF_MASTER_ALB) {
> 				if (skb->pkt_type != PACKET_BROADCAST &&
> 				    skb->pkt_type != PACKET_MULTICAST)
>@@ -2095,6 +2100,9 @@ static inline int skb_bond_should_drop(struct sk_buff *skb,
> 	}
> 	return 0;
> }
>+#else
>+#define skb_bond_should_drop(a, b) 0
>+#endif
>
> extern struct pernet_operations __net_initdata loopback_net_ops;
>
>diff --git a/net/core/dev.c b/net/core/dev.c
>index f769098..f9821f1 100644
>--- a/net/core/dev.c
>+++ b/net/core/dev.c
>@@ -2314,6 +2314,11 @@ static inline int deliver_skb(struct sk_buff *skb,
> 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
> }
>
>+#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
>+void (*bond_handle_arp_hook)(struct sk_buff *skb);
>+EXPORT_SYMBOL_GPL(bond_handle_arp_hook);
>+#endif
>+
> #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
>
> #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
>@@ -2507,6 +2512,10 @@ int netif_receive_skb(struct sk_buff *skb)
> 	if (!skb->skb_iif)
> 		skb->skb_iif = skb->dev->ifindex;
>
>+	skb_reset_network_header(skb);
>+	skb_reset_transport_header(skb);
>+	skb->mac_len = skb->network_header - skb->mac_header;
>+
> 	null_or_orig = NULL;
> 	orig_dev = skb->dev;
> 	master = ACCESS_ONCE(orig_dev->master);
>@@ -2519,10 +2528,6 @@ int netif_receive_skb(struct sk_buff *skb)
>
> 	__get_cpu_var(netdev_rx_stat).total++;
>
>-	skb_reset_network_header(skb);
>-	skb_reset_transport_header(skb);
>-	skb->mac_len = skb->network_header - skb->mac_header;
>-
> 	pt_prev = NULL;
>
> 	rcu_read_lock();
>
>
>Thanks,
>
>-- 
>Jiri Bohac <jbohac@suse.cz>
>SUSE Labs, SUSE CZ
>

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-04-29 18:57     ` Jay Vosburgh
@ 2010-04-30 15:45       ` Jiri Bohac
  2010-05-03 22:58         ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Bohac @ 2010-04-30 15:45 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: Jiri Bohac, bonding-devel, netdev

On Thu, Apr 29, 2010 at 11:57:23AM -0700, Jay Vosburgh wrote:
> 	This doesn't apply to the current net-next-2.6 (because
> skb_bond_should_drop was pulled out of line a few weeks ago); can you
> update the patch?

sure, here it goes:

----

bonding with arp_validate does not currently work when the
bonding master is part of a bridge. This is because
bond_arp_rcv() is registered as a packet type handler for ARP,
but before netif_receive_skb() processes the ptype_base hash
table, handle_bridge() is called and changes the skb->dev to
point to the bridge device.

This patch makes bonding_should_drop() call the bonding ARP
handler directly if a IFF_MASTER_NEEDARP flag is set on the
bonding master.  bond_register_arp() now only needs to set the
IFF_MASTER_NEEDARP flag.

We no longer need special ARP handling for inactive slaves, hence
IFF_SLAVE_NEEDARP is not needed.

skb_reset_network_header() and skb_reset_transport_header() need
to be called before the call to bonding_should_drop() because
bond_handle_arp() needs the offsets initialized.

As a side-effect, skb_bond_should_drop is #defined as 0
when CONFIG_BONDING is not set.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 85e813c..b149bfa 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1879,8 +1879,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-				   IFF_SLAVE_INACTIVE | IFF_BONDING |
-				   IFF_SLAVE_NEEDARP);
+				   IFF_SLAVE_INACTIVE | IFF_BONDING);
 
 	kfree(slave);
 
@@ -2551,11 +2550,12 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
 	}
 }
 
-static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static void bond_handle_arp(struct sk_buff *skb)
 {
 	struct arphdr *arp;
 	struct slave *slave;
 	struct bonding *bond;
+	struct net_device *dev = skb->dev->master, *orig_dev = skb->dev;
 	unsigned char *arp_ptr;
 	__be32 sip, tip;
 
@@ -2576,9 +2576,8 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 	bond = netdev_priv(dev);
 	read_lock(&bond->lock);
 
-	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
-		 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
-		 orig_dev ? orig_dev->name : "NULL");
+	pr_debug("bond_handle_arp: bond: %s, master: %s, slave: %s\n",
+		bond->dev->name, dev->name, orig_dev->name);
 
 	slave = bond_get_slave_by_dev(bond, orig_dev);
 	if (!slave || !slave_do_arp_validate(bond, slave))
@@ -2623,8 +2622,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 out_unlock:
 	read_unlock(&bond->lock);
 out:
-	dev_kfree_skb(skb);
-	return NET_RX_SUCCESS;
+	return;
 }
 
 /*
@@ -3506,23 +3504,12 @@ static void bond_unregister_lacpdu(struct bonding *bond)
 
 void bond_register_arp(struct bonding *bond)
 {
-	struct packet_type *pt = &bond->arp_mon_pt;
-
-	if (pt->type)
-		return;
-
-	pt->type = htons(ETH_P_ARP);
-	pt->dev = bond->dev;
-	pt->func = bond_arp_rcv;
-	dev_add_pack(pt);
+	bond->dev->priv_flags |= IFF_MASTER_NEEDARP;
 }
 
 void bond_unregister_arp(struct bonding *bond)
 {
-	struct packet_type *pt = &bond->arp_mon_pt;
-
-	dev_remove_pack(pt);
-	pt->type = 0;
+	bond->dev->priv_flags &= ~IFF_MASTER_NEEDARP;
 }
 
 /*---------------------------- Hashing Policies -----------------------------*/
@@ -4967,6 +4954,7 @@ static struct pernet_operations bond_net_ops = {
 	.size = sizeof(struct bond_net),
 };
 
+extern void (*bond_handle_arp_hook)(struct sk_buff *skb);
 static int __init bonding_init(void)
 {
 	int i;
@@ -4999,6 +4987,7 @@ static int __init bonding_init(void)
 	register_netdevice_notifier(&bond_netdev_notifier);
 	register_inetaddr_notifier(&bond_inetaddr_notifier);
 	bond_register_ipv6_notifier();
+	bond_handle_arp_hook = bond_handle_arp;
 out:
 	return res;
 err:
@@ -5019,6 +5008,7 @@ static void __exit bonding_exit(void)
 
 	rtnl_link_unregister(&bond_link_ops);
 	unregister_pernet_subsys(&bond_net_ops);
+	bond_handle_arp_hook = NULL;
 }
 
 module_init(bonding_init);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 2aa3367..64e0108 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -212,7 +212,6 @@ struct bonding {
 	struct   bond_params params;
 	struct   list_head vlan_list;
 	struct   vlan_group *vlgrp;
-	struct   packet_type arp_mon_pt;
 	struct   workqueue_struct *wq;
 	struct   delayed_work mii_work;
 	struct   delayed_work arp_work;
@@ -292,14 +291,12 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	if (!bond_is_lb(bond))
 		slave->state = BOND_STATE_BACKUP;
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
-	if (slave_do_arp_validate(bond, slave))
-		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
 	slave->state = BOND_STATE_ACTIVE;
-	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
+	slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
diff --git a/include/linux/if.h b/include/linux/if.h
index 3a9f410..84ab2c8 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -63,7 +63,7 @@
 #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
 #define IFF_BONDING	0x20		/* bonding master or slave	*/
-#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
+#define IFF_MASTER_NEEDARP 0x40		/* need ARPs for validation	*/
 #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
 #define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
 #define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40d4c20..fa27d16 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2162,6 +2162,7 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
+#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
 extern int __skb_bond_should_drop(struct sk_buff *skb,
 				  struct net_device *master);
 
@@ -2172,6 +2173,9 @@ static inline int skb_bond_should_drop(struct sk_buff *skb,
 		return __skb_bond_should_drop(skb, master);
 	return 0;
 }
+#else
+#define skb_bond_should_drop(a, b) 0
+#endif
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 100dcbd..2689ff0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2734,6 +2734,10 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 	}
 }
 
+#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
+void (*bond_handle_arp_hook)(struct sk_buff *skb);
+EXPORT_SYMBOL_GPL(bond_handle_arp_hook);
+
 /* On bonding slaves other than the currently active slave, suppress
  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
  * ARP on active-backup slaves with arp_validate enabled.
@@ -2753,11 +2757,13 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
 		skb_bond_set_mac_by_master(skb, master);
 	}
 
-	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-			return 0;
+	/* pass ARP frames directly to bonding
+	   before bridging or other hooks change them */
+	if ((master->priv_flags & IFF_MASTER_NEEDARP) &&
+	    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+		bond_handle_arp_hook(skb);
 
+	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
 		if (master->priv_flags & IFF_MASTER_ALB) {
 			if (skb->pkt_type != PACKET_BROADCAST &&
 			    skb->pkt_type != PACKET_MULTICAST)
@@ -2772,6 +2778,7 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
 	return 0;
 }
 EXPORT_SYMBOL(__skb_bond_should_drop);
+#endif
 
 static int __netif_receive_skb(struct sk_buff *skb)
 {
@@ -2796,6 +2803,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!skb->skb_iif)
 		skb->skb_iif = skb->dev->ifindex;
 
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
+
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
 	master = ACCESS_ONCE(orig_dev->master);
@@ -2808,10 +2819,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
-
 	pt_prev = NULL;
 
 	rcu_read_lock();
-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-04-30 15:45       ` Jiri Bohac
@ 2010-05-03 22:58         ` David Miller
  2010-05-03 23:02           ` Jay Vosburgh
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2010-05-03 22:58 UTC (permalink / raw)
  To: jbohac; +Cc: fubar, bonding-devel, netdev

From: Jiri Bohac <jbohac@suse.cz>
Date: Fri, 30 Apr 2010 17:45:29 +0200

> On Thu, Apr 29, 2010 at 11:57:23AM -0700, Jay Vosburgh wrote:
>> 	This doesn't apply to the current net-next-2.6 (because
>> skb_bond_should_drop was pulled out of line a few weeks ago); can you
>> update the patch?
> 
> sure, here it goes:

Jay can I get an ACK or similar?

Thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-05-03 22:58         ` David Miller
@ 2010-05-03 23:02           ` Jay Vosburgh
  2010-05-04  1:06             ` Jay Vosburgh
  0 siblings, 1 reply; 10+ messages in thread
From: Jay Vosburgh @ 2010-05-03 23:02 UTC (permalink / raw)
  To: David Miller; +Cc: jbohac, bonding-devel, netdev

David Miller <davem@davemloft.net> wrote:

>From: Jiri Bohac <jbohac@suse.cz>
>Date: Fri, 30 Apr 2010 17:45:29 +0200
>
>> On Thu, Apr 29, 2010 at 11:57:23AM -0700, Jay Vosburgh wrote:
>>> 	This doesn't apply to the current net-next-2.6 (because
>>> skb_bond_should_drop was pulled out of line a few weeks ago); can you
>>> update the patch?
>> 
>> sure, here it goes:
>
>Jay can I get an ACK or similar?

	Setting up to test it right now; will report back.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-05-03 23:02           ` Jay Vosburgh
@ 2010-05-04  1:06             ` Jay Vosburgh
  2010-05-04 23:18               ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Jay Vosburgh @ 2010-05-04  1:06 UTC (permalink / raw)
  Cc: David Miller, jbohac, bonding-devel, netdev

Jay Vosburgh <fubar@us.ibm.com> wrote:

>David Miller <davem@davemloft.net> wrote:
>
>From: Jiri Bohac <jbohac@suse.cz>
>>Date: Fri, 30 Apr 2010 17:45:29 +0200
>>
>>> On Thu, Apr 29, 2010 at 11:57:23AM -0700, Jay Vosburgh wrote:
>>>> 	This doesn't apply to the current net-next-2.6 (because
>>>> skb_bond_should_drop was pulled out of line a few weeks ago); can you
>>>> update the patch?
>>> 
>>> sure, here it goes:
>>
>>Jay can I get an ACK or similar?
>
>	Setting up to test it right now; will report back.

	Tested and it looks to work as advertised.  I see only one minor
nit, there's a pr_debug that missed being renamed to the new function
name; here's the whole patch with that fixed.

	-J

From: Jiri Bohac <jbohac@suse.cz>
Date: Fri, 30 Apr 2010 17:45:29 +0200
Subject: [PATCH] bonding: fix arp_validate on bonds inside a bridge

bonding with arp_validate does not currently work when the
bonding master is part of a bridge. This is because
bond_arp_rcv() is registered as a packet type handler for ARP,
but before netif_receive_skb() processes the ptype_base hash
table, handle_bridge() is called and changes the skb->dev to
point to the bridge device.

This patch makes bonding_should_drop() call the bonding ARP
handler directly if a IFF_MASTER_NEEDARP flag is set on the
bonding master.  bond_register_arp() now only needs to set the
IFF_MASTER_NEEDARP flag.

We no longer need special ARP handling for inactive slaves, hence
IFF_SLAVE_NEEDARP is not needed.

skb_reset_network_header() and skb_reset_transport_header() need
to be called before the call to bonding_should_drop() because
bond_handle_arp() needs the offsets initialized.

As a side-effect, skb_bond_should_drop is #defined as 0
when CONFIG_BONDING is not set.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
---
 drivers/net/bonding/bond_main.c |   34 ++++++++++++----------------------
 drivers/net/bonding/bonding.h   |    5 +----
 include/linux/if.h              |    2 +-
 include/linux/netdevice.h       |    4 ++++
 net/core/dev.c                  |   23 +++++++++++++++--------
 5 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 85e813c..b71209f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1879,8 +1879,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-				   IFF_SLAVE_INACTIVE | IFF_BONDING |
-				   IFF_SLAVE_NEEDARP);
+				   IFF_SLAVE_INACTIVE | IFF_BONDING);
 
 	kfree(slave);
 
@@ -2551,11 +2550,12 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
 	}
 }
 
-static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static void bond_handle_arp(struct sk_buff *skb)
 {
 	struct arphdr *arp;
 	struct slave *slave;
 	struct bonding *bond;
+	struct net_device *dev = skb->dev->master, *orig_dev = skb->dev;
 	unsigned char *arp_ptr;
 	__be32 sip, tip;
 
@@ -2576,9 +2576,8 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 	bond = netdev_priv(dev);
 	read_lock(&bond->lock);
 
-	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
-		 bond->dev->name, skb->dev ? skb->dev->name : "NULL",
-		 orig_dev ? orig_dev->name : "NULL");
+	pr_debug("bond_handle_arp: bond: %s, master: %s, slave: %s\n",
+		bond->dev->name, dev->name, orig_dev->name);
 
 	slave = bond_get_slave_by_dev(bond, orig_dev);
 	if (!slave || !slave_do_arp_validate(bond, slave))
@@ -2602,7 +2601,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 	arp_ptr += 4 + dev->addr_len;
 	memcpy(&tip, arp_ptr, 4);
 
-	pr_debug("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n",
+	pr_debug("bond_handle_arp: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n",
 		 bond->dev->name, slave->dev->name, slave->state,
 		 bond->params.arp_validate, slave_do_arp_validate(bond, slave),
 		 &sip, &tip);
@@ -2623,8 +2622,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 out_unlock:
 	read_unlock(&bond->lock);
 out:
-	dev_kfree_skb(skb);
-	return NET_RX_SUCCESS;
+	return;
 }
 
 /*
@@ -3506,23 +3504,12 @@ static void bond_unregister_lacpdu(struct bonding *bond)
 
 void bond_register_arp(struct bonding *bond)
 {
-	struct packet_type *pt = &bond->arp_mon_pt;
-
-	if (pt->type)
-		return;
-
-	pt->type = htons(ETH_P_ARP);
-	pt->dev = bond->dev;
-	pt->func = bond_arp_rcv;
-	dev_add_pack(pt);
+	bond->dev->priv_flags |= IFF_MASTER_NEEDARP;
 }
 
 void bond_unregister_arp(struct bonding *bond)
 {
-	struct packet_type *pt = &bond->arp_mon_pt;
-
-	dev_remove_pack(pt);
-	pt->type = 0;
+	bond->dev->priv_flags &= ~IFF_MASTER_NEEDARP;
 }
 
 /*---------------------------- Hashing Policies -----------------------------*/
@@ -4967,6 +4954,7 @@ static struct pernet_operations bond_net_ops = {
 	.size = sizeof(struct bond_net),
 };
 
+extern void (*bond_handle_arp_hook)(struct sk_buff *skb);
 static int __init bonding_init(void)
 {
 	int i;
@@ -4999,6 +4987,7 @@ static int __init bonding_init(void)
 	register_netdevice_notifier(&bond_netdev_notifier);
 	register_inetaddr_notifier(&bond_inetaddr_notifier);
 	bond_register_ipv6_notifier();
+	bond_handle_arp_hook = bond_handle_arp;
 out:
 	return res;
 err:
@@ -5019,6 +5008,7 @@ static void __exit bonding_exit(void)
 
 	rtnl_link_unregister(&bond_link_ops);
 	unregister_pernet_subsys(&bond_net_ops);
+	bond_handle_arp_hook = NULL;
 }
 
 module_init(bonding_init);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 2aa3367..64e0108 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -212,7 +212,6 @@ struct bonding {
 	struct   bond_params params;
 	struct   list_head vlan_list;
 	struct   vlan_group *vlgrp;
-	struct   packet_type arp_mon_pt;
 	struct   workqueue_struct *wq;
 	struct   delayed_work mii_work;
 	struct   delayed_work arp_work;
@@ -292,14 +291,12 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	if (!bond_is_lb(bond))
 		slave->state = BOND_STATE_BACKUP;
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
-	if (slave_do_arp_validate(bond, slave))
-		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
 	slave->state = BOND_STATE_ACTIVE;
-	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
+	slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
diff --git a/include/linux/if.h b/include/linux/if.h
index 3a9f410..84ab2c8 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -63,7 +63,7 @@
 #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
 #define IFF_BONDING	0x20		/* bonding master or slave	*/
-#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
+#define IFF_MASTER_NEEDARP 0x40		/* need ARPs for validation	*/
 #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
 #define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
 #define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40d4c20..fa27d16 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2162,6 +2162,7 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
+#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
 extern int __skb_bond_should_drop(struct sk_buff *skb,
 				  struct net_device *master);
 
@@ -2172,6 +2173,9 @@ static inline int skb_bond_should_drop(struct sk_buff *skb,
 		return __skb_bond_should_drop(skb, master);
 	return 0;
 }
+#else
+#define skb_bond_should_drop(a, b) 0
+#endif
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 100dcbd..2689ff0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2734,6 +2734,10 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 	}
 }
 
+#if defined(CONFIG_BONDING) || defined(CONFIG_BONDING_MODULE)
+void (*bond_handle_arp_hook)(struct sk_buff *skb);
+EXPORT_SYMBOL_GPL(bond_handle_arp_hook);
+
 /* On bonding slaves other than the currently active slave, suppress
  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
  * ARP on active-backup slaves with arp_validate enabled.
@@ -2753,11 +2757,13 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
 		skb_bond_set_mac_by_master(skb, master);
 	}
 
-	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-			return 0;
+	/* pass ARP frames directly to bonding
+	   before bridging or other hooks change them */
+	if ((master->priv_flags & IFF_MASTER_NEEDARP) &&
+	    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+		bond_handle_arp_hook(skb);
 
+	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
 		if (master->priv_flags & IFF_MASTER_ALB) {
 			if (skb->pkt_type != PACKET_BROADCAST &&
 			    skb->pkt_type != PACKET_MULTICAST)
@@ -2772,6 +2778,7 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
 	return 0;
 }
 EXPORT_SYMBOL(__skb_bond_should_drop);
+#endif
 
 static int __netif_receive_skb(struct sk_buff *skb)
 {
@@ -2796,6 +2803,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!skb->skb_iif)
 		skb->skb_iif = skb->dev->ifindex;
 
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
+
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
 	master = ACCESS_ONCE(orig_dev->master);
@@ -2808,10 +2819,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
-
 	pt_prev = NULL;
 
 	rcu_read_lock();
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-05-04  1:06             ` Jay Vosburgh
@ 2010-05-04 23:18               ` David Miller
  2010-05-05 13:33                 ` Jiri Bohac
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2010-05-04 23:18 UTC (permalink / raw)
  To: fubar; +Cc: jbohac, bonding-devel, netdev

From: Jay Vosburgh <fubar@us.ibm.com>
Date: Mon, 03 May 2010 18:06:22 -0700

> 	Tested and it looks to work as advertised.  I see only one minor
> nit, there's a pr_debug that missed being renamed to the new function
> name; here's the whole patch with that fixed.

I don't think you need the ugly arp hook.

Instead, it's much cleaner to provide a way for packet type taps to
see the packet before bridge et al. decapsulation.  In fact this makes
a lot of sense, wanting to see the device as __netif_receive_skb() saw
it, with no changes whatsoever.

In fact ptype_all runs before bridging, ING, and MACVLAN decap the
thing, so we could have a 'ptype_base_predecap[]' that we run over
right after those.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-05-04 23:18               ` David Miller
@ 2010-05-05 13:33                 ` Jiri Bohac
  2010-05-06  7:35                   ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Bohac @ 2010-05-05 13:33 UTC (permalink / raw)
  To: David Miller; +Cc: fubar, jbohac, bonding-devel, netdev

On Tue, May 04, 2010 at 04:18:15PM -0700, David Miller wrote:
> From: Jay Vosburgh <fubar@us.ibm.com>
> > 	Tested and it looks to work as advertised.  I see only one minor
> > nit, there's a pr_debug that missed being renamed to the new function
> > name; here's the whole patch with that fixed.
> 
> I don't think you need the ugly arp hook.
> 
> Instead, it's much cleaner to provide a way for packet type taps to
> see the packet before bridge et al. decapsulation.  In fact this makes
> a lot of sense, wanting to see the device as __netif_receive_skb() saw
> it, with no changes whatsoever.
> 
> In fact ptype_all runs before bridging, ING, and MACVLAN decap the
> thing, so we could have a 'ptype_base_predecap[]' that we run over
> right after those.

I was considering exactly this, but I thought it would be
rejected because of the overhead for all packets received.

In fact, bonding could register the ARP handler in the ptype_all
list and check itself whether the packets were ARPs. This
would require no changes to __netif_receive_skb() at all, but
would cause an extra fuction call and a condition for _every_
packet once a bond with arp_validate would be up.

Having a ptype_base_predecap[] hashtable would still cause at
least a comparision for _every_ packet, even without bonding
being loaded (!).

The current patch causes an extra comparison only for packets
arriving on boding slaves.

If either of the ptype_all or ptype_base_predecap[] method is
preferred, I'll be happy to re-work the patch. I just thought
performance had bigger priority here.

-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bonding: fix arp_validate on bonds inside a bridge
  2010-05-05 13:33                 ` Jiri Bohac
@ 2010-05-06  7:35                   ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2010-05-06  7:35 UTC (permalink / raw)
  To: jbohac; +Cc: fubar, bonding-devel, netdev

From: Jiri Bohac <jbohac@suse.cz>
Date: Wed, 5 May 2010 15:33:01 +0200

> If either of the ptype_all or ptype_base_predecap[] method is
> preferred, I'll be happy to re-work the patch. I just thought
> performance had bigger priority here.

Well, let's do some thinking.

If there are any ptype_all[] entries at all, processing of every
packet will be expensive.

Also, the ptype_base_predecap[] is unlikely to have any entries
unless bonding is on in this special mode.  But if it is, we
can accept a little bit of extra overhead.

So we can address all of this without adding undue overhead for people
not using this stuff, by simply having an atomic counter which shows
how many ptype_all[] or ptype_base_predecap[] entries exist.

And at the top level we test that __read_mostly atomic variable before
traversing the ptype_all[] and ptype_base_predecap[] lists.

That should address all of the issues.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2010-05-06  7:35 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-28 12:59 [PATCH] bonding: fix arp_validate on bonds inside a bridge Jiri Bohac
2010-04-28 19:05 ` Jay Vosburgh
     [not found]   ` <20100429163921.GA4228@midget.suse.cz>
2010-04-29 18:57     ` Jay Vosburgh
2010-04-30 15:45       ` Jiri Bohac
2010-05-03 22:58         ` David Miller
2010-05-03 23:02           ` Jay Vosburgh
2010-05-04  1:06             ` Jay Vosburgh
2010-05-04 23:18               ` David Miller
2010-05-05 13:33                 ` Jiri Bohac
2010-05-06  7:35                   ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.