All of lore.kernel.org
 help / color / mirror / Atom feed
* [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-05  8:11 ` Amerigo Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Matt Mackall, netdev, bridge, Andy Gospodarek, Neil Horman,
	Amerigo Wang, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

V5:
Fix coding style problems pointed by David.

V4:
Use "unlikely" to mark netpoll call path, suggested by Stephen.
Handle NETDEV_GOING_DOWN case.

V3:
Update to latest Linus' tree.
Fix deadlocks when releasing slaves of bonding devices.
Thanks to Andy.

V2:
Fix some bugs of previous version.
Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary.
Don't poll all underlying devices, poll ->real_dev in struct netpoll.
Thanks to David for suggesting above.

------------>

This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.


To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
   IFF_IN_NETPOLL which identifies we are processing a netpoll;
   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
   at run-time;

2) introduce one new method for netdev_ops:
   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
     removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
   export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

4) hide a pointer to struct netpoll in struct netpoll_info, ditto.

5) introduce ->real_dev for struct netpoll.

6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
   netconsole before releasing a slave, to avoid deadlocks.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -71,6 +71,8 @@
 					 * release skb->dst
 					 */
 #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -667,6 +667,7 @@ struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
Index: linux-2.6/include/linux/netpoll.h
===================================================================
--- linux-2.6.orig/include/linux/netpoll.h
+++ linux-2.6/include/linux/netpoll.h
@@ -14,6 +14,7 @@
 
 struct netpoll {
 	struct net_device *dev;
+	struct net_device *real_dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -36,8 +37,11 @@ struct netpoll_info {
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
+
+	struct netpoll *netpoll;
 };
 
+void netpoll_poll_dev(struct net_device *dev);
 void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
@@ -47,6 +51,7 @@ int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 
 #ifdef CONFIG_NETPOLL
Index: linux-2.6/net/core/netpoll.c
===================================================================
--- linux-2.6.orig/net/core/netpoll.c
+++ linux-2.6/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct net
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
 	zap_completion_queue();
 }
 
+void netpoll_poll(struct netpoll *np)
+{
+	netpoll_poll_dev(np->dev);
+}
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
+	npinfo->netpoll = np;
+
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
 			}
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
+				const struct net_device_ops *ops;
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				np->dev->npinfo = NULL;
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+				else
+					np->dev->npinfo = NULL;
 			}
 		}
 
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
 		atomic_dec(&trapped);
 }
 
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);
Index: linux-2.6/drivers/net/netconsole.c
===================================================================
--- linux-2.6.orig/drivers/net/netconsole.c
+++ linux-2.6/drivers/net/netconsole.c
@@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc
 	struct netconsole_target *nt;
 	struct net_device *dev = ptr;
 
-	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
+	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
 			case NETDEV_UNREGISTER:
-				if (!nt->enabled)
-					break;
 				netpoll_cleanup(&nt->np);
+				/* Fall through */
+			case NETDEV_GOING_DOWN:
+			case NETDEV_BONDING_DESLAVE:
 				nt->enabled = 0;
-				printk(KERN_INFO "netconsole: network logging stopped"
-					", interface %s unregistered\n",
-					dev->name);
 				break;
 			}
 		}
 		netconsole_target_put(nt);
 	}
 	spin_unlock_irqrestore(&target_list_lock, flags);
+	if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
+		printk(KERN_INFO "netconsole: network logging stopped, "
+			"interface %s %s\n",  dev->name,
+			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
 
 done:
 	return NOTIFY_DONE;
Index: linux-2.6/include/linux/notifier.h
===================================================================
--- linux-2.6.orig/include/linux/notifier.h
+++ linux-2.6/include/linux/notifier.h
@@ -203,6 +203,7 @@ static inline int notifier_to_errno(int 
 #define NETDEV_BONDING_NEWTYPE  0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
+#define NETDEV_BONDING_DESLAVE  0x0012
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-05  8:11 ` Amerigo Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jay Vosburgh, Amerigo Wang, Neil Horman, netdev, Matt Mackall,
	bridge, David Miller, Jeff Moyer, Andy Gospodarek, bonding-devel

V5:
Fix coding style problems pointed by David.

V4:
Use "unlikely" to mark netpoll call path, suggested by Stephen.
Handle NETDEV_GOING_DOWN case.

V3:
Update to latest Linus' tree.
Fix deadlocks when releasing slaves of bonding devices.
Thanks to Andy.

V2:
Fix some bugs of previous version.
Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary.
Don't poll all underlying devices, poll ->real_dev in struct netpoll.
Thanks to David for suggesting above.

------------>

This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.


To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
   IFF_IN_NETPOLL which identifies we are processing a netpoll;
   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
   at run-time;

2) introduce one new method for netdev_ops:
   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
     removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
   export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

4) hide a pointer to struct netpoll in struct netpoll_info, ditto.

5) introduce ->real_dev for struct netpoll.

6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
   netconsole before releasing a slave, to avoid deadlocks.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -71,6 +71,8 @@
 					 * release skb->dst
 					 */
 #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -667,6 +667,7 @@ struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
Index: linux-2.6/include/linux/netpoll.h
===================================================================
--- linux-2.6.orig/include/linux/netpoll.h
+++ linux-2.6/include/linux/netpoll.h
@@ -14,6 +14,7 @@
 
 struct netpoll {
 	struct net_device *dev;
+	struct net_device *real_dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -36,8 +37,11 @@ struct netpoll_info {
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
+
+	struct netpoll *netpoll;
 };
 
+void netpoll_poll_dev(struct net_device *dev);
 void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
@@ -47,6 +51,7 @@ int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 
 #ifdef CONFIG_NETPOLL
Index: linux-2.6/net/core/netpoll.c
===================================================================
--- linux-2.6.orig/net/core/netpoll.c
+++ linux-2.6/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct net
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
 	zap_completion_queue();
 }
 
+void netpoll_poll(struct netpoll *np)
+{
+	netpoll_poll_dev(np->dev);
+}
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
+	npinfo->netpoll = np;
+
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
 			}
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
+				const struct net_device_ops *ops;
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				np->dev->npinfo = NULL;
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+				else
+					np->dev->npinfo = NULL;
 			}
 		}
 
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
 		atomic_dec(&trapped);
 }
 
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);
Index: linux-2.6/drivers/net/netconsole.c
===================================================================
--- linux-2.6.orig/drivers/net/netconsole.c
+++ linux-2.6/drivers/net/netconsole.c
@@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc
 	struct netconsole_target *nt;
 	struct net_device *dev = ptr;
 
-	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
+	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
 			case NETDEV_UNREGISTER:
-				if (!nt->enabled)
-					break;
 				netpoll_cleanup(&nt->np);
+				/* Fall through */
+			case NETDEV_GOING_DOWN:
+			case NETDEV_BONDING_DESLAVE:
 				nt->enabled = 0;
-				printk(KERN_INFO "netconsole: network logging stopped"
-					", interface %s unregistered\n",
-					dev->name);
 				break;
 			}
 		}
 		netconsole_target_put(nt);
 	}
 	spin_unlock_irqrestore(&target_list_lock, flags);
+	if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
+		printk(KERN_INFO "netconsole: network logging stopped, "
+			"interface %s %s\n",  dev->name,
+			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
 
 done:
 	return NOTIFY_DONE;
Index: linux-2.6/include/linux/notifier.h
===================================================================
--- linux-2.6.orig/include/linux/notifier.h
+++ linux-2.6/include/linux/notifier.h
@@ -203,6 +203,7 @@ static inline int notifier_to_errno(int 
 #define NETDEV_BONDING_NEWTYPE  0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
+#define NETDEV_BONDING_DESLAVE  0x0012
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-05  8:11 ` Amerigo Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jay Vosburgh, Amerigo Wang, Neil Horman, netdev, Matt Mackall,
	bridge, David Miller, Jeff Moyer, Andy Gospodarek, bonding-devel

V5:
Fix coding style problems pointed by David.

V4:
Use "unlikely" to mark netpoll call path, suggested by Stephen.
Handle NETDEV_GOING_DOWN case.

V3:
Update to latest Linus' tree.
Fix deadlocks when releasing slaves of bonding devices.
Thanks to Andy.

V2:
Fix some bugs of previous version.
Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary.
Don't poll all underlying devices, poll ->real_dev in struct netpoll.
Thanks to David for suggesting above.

------------>

This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.


To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
   IFF_IN_NETPOLL which identifies we are processing a netpoll;
   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
   at run-time;

2) introduce one new method for netdev_ops:
   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
     removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
   export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

4) hide a pointer to struct netpoll in struct netpoll_info, ditto.

5) introduce ->real_dev for struct netpoll.

6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
   netconsole before releasing a slave, to avoid deadlocks.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -71,6 +71,8 @@
 					 * release skb->dst
 					 */
 #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -667,6 +667,7 @@ struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
Index: linux-2.6/include/linux/netpoll.h
===================================================================
--- linux-2.6.orig/include/linux/netpoll.h
+++ linux-2.6/include/linux/netpoll.h
@@ -14,6 +14,7 @@
 
 struct netpoll {
 	struct net_device *dev;
+	struct net_device *real_dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -36,8 +37,11 @@ struct netpoll_info {
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
+
+	struct netpoll *netpoll;
 };
 
+void netpoll_poll_dev(struct net_device *dev);
 void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
@@ -47,6 +51,7 @@ int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 
 #ifdef CONFIG_NETPOLL
Index: linux-2.6/net/core/netpoll.c
===================================================================
--- linux-2.6.orig/net/core/netpoll.c
+++ linux-2.6/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct net
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
 	zap_completion_queue();
 }
 
+void netpoll_poll(struct netpoll *np)
+{
+	netpoll_poll_dev(np->dev);
+}
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
+	npinfo->netpoll = np;
+
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
 			}
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
+				const struct net_device_ops *ops;
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				np->dev->npinfo = NULL;
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+				else
+					np->dev->npinfo = NULL;
 			}
 		}
 
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
 		atomic_dec(&trapped);
 }
 
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);
Index: linux-2.6/drivers/net/netconsole.c
===================================================================
--- linux-2.6.orig/drivers/net/netconsole.c
+++ linux-2.6/drivers/net/netconsole.c
@@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc
 	struct netconsole_target *nt;
 	struct net_device *dev = ptr;
 
-	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
+	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
 			case NETDEV_UNREGISTER:
-				if (!nt->enabled)
-					break;
 				netpoll_cleanup(&nt->np);
+				/* Fall through */
+			case NETDEV_GOING_DOWN:
+			case NETDEV_BONDING_DESLAVE:
 				nt->enabled = 0;
-				printk(KERN_INFO "netconsole: network logging stopped"
-					", interface %s unregistered\n",
-					dev->name);
 				break;
 			}
 		}
 		netconsole_target_put(nt);
 	}
 	spin_unlock_irqrestore(&target_list_lock, flags);
+	if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
+		printk(KERN_INFO "netconsole: network logging stopped, "
+			"interface %s %s\n",  dev->name,
+			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
 
 done:
 	return NOTIFY_DONE;
Index: linux-2.6/include/linux/notifier.h
===================================================================
--- linux-2.6.orig/include/linux/notifier.h
+++ linux-2.6/include/linux/notifier.h
@@ -203,6 +203,7 @@ static inline int notifier_to_errno(int 
 #define NETDEV_BONDING_NEWTYPE  0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
+#define NETDEV_BONDING_DESLAVE  0x0012
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [v5 Patch 2/3] bridge: make bridge support netpoll
  2010-05-05  8:11 ` Amerigo Wang
  (?)
@ 2010-05-05  8:11   ` Amerigo Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Stephen Hemminger, netdev, bridge, Andy Gospodarek, Neil Horman,
	Amerigo Wang, Jeff Moyer, Matt Mackall, bonding-devel,
	Jay Vosburgh, David Miller


Based on the previous patch, make bridge support netpoll by:

1) implement the 2 methods to support netpoll for bridge;

2) modify netpoll during forwarding packets via bridge;

3) disable netpoll support of bridge when a netpoll-unabled device
   is added to bridge;

4) enable netpoll support when all underlying devices support netpoll.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/net/bridge/br_device.c
===================================================================
--- linux-2.6.orig/net/bridge/br_device.c
+++ linux-2.6/net/bridge/br_device.c
@@ -13,8 +13,10 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/list.h>
 
 #include <asm/uaccess.h>
 #include "br_private.h"
@@ -162,6 +164,59 @@ static int br_set_tx_csum(struct net_dev
 	return 0;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+bool br_devices_support_netpoll(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	bool ret = true;
+	int count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&br->lock, flags);
+	list_for_each_entry(p, &br->port_list, list) {
+		count++;
+		if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !p->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	spin_unlock_irqrestore(&br->lock, flags);
+	return count != 0 && ret;
+}
+
+static void br_poll_controller(struct net_device *br_dev)
+{
+	struct netpoll *np = br_dev->npinfo->netpoll;
+
+	if (np->real_dev != br_dev)
+		netpoll_poll_dev(np->real_dev);
+}
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+	struct net_bridge *br = netdev_priv(br_dev);
+	struct net_bridge_port *p, *n;
+	const struct net_device_ops *ops;
+
+	br->dev->npinfo = NULL;
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		if (p->dev) {
+			ops = p->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(p->dev);
+			else
+				p->dev->npinfo = NULL;
+		}
+	}
+}
+
+#else
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+}
+
+#endif
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo    = br_getinfo,
 	.get_link	= ethtool_op_get_link,
@@ -184,6 +239,10 @@ static const struct net_device_ops br_ne
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
+	.ndo_poll_controller	 = br_poll_controller,
+#endif
 };
 
 void br_dev_setup(struct net_device *dev)
Index: linux-2.6/net/bridge/br_forward.c
===================================================================
--- linux-2.6.orig/net/bridge/br_forward.c
+++ linux-2.6/net/bridge/br_forward.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -50,7 +51,13 @@ int br_dev_queue_push_xmit(struct sk_buf
 		else {
 			skb_push(skb, ETH_HLEN);
 
-			dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
+				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
+				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
+			} else
+#endif
+				dev_queue_xmit(skb);
 		}
 	}
 
@@ -66,9 +73,23 @@ int br_forward_finish(struct sk_buff *sk
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct net_bridge *br = to->br;
+	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np;
+		to->dev->npinfo = skb->dev->npinfo;
+		np = skb->dev->npinfo->netpoll;
+		np->real_dev = np->dev = to->dev;
+		to->dev->priv_flags |= IFF_IN_NETPOLL;
+	}
+#endif
 	skb->dev = to->dev;
 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			br_forward_finish);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (skb->dev->npinfo)
+		skb->dev->npinfo->netpoll->dev = br->dev;
+#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
Index: linux-2.6/net/bridge/br_if.c
===================================================================
--- linux-2.6.orig/net/bridge/br_if.c
+++ linux-2.6/net/bridge/br_if.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
@@ -153,6 +154,14 @@ static void del_nbp(struct net_bridge_po
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br))
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	if (dev->netdev_ops->ndo_netpoll_cleanup)
+		dev->netdev_ops->ndo_netpoll_cleanup(dev);
+	else
+		dev->npinfo = NULL;
+#endif
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -165,6 +174,8 @@ static void del_br(struct net_bridge *br
 		del_nbp(p);
 	}
 
+	br_netpoll_cleanup(br->dev);
+
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -438,6 +449,20 @@ int br_add_if(struct net_bridge *br, str
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br)) {
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (br->dev->npinfo)
+			dev->npinfo = br->dev->npinfo;
+	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		printk(KERN_INFO "New device %s does not support netpoll\n",
+			dev->name);
+		printk(KERN_INFO "Disabling netpoll for %s\n",
+			br->dev->name);
+	}
+#endif
+
 	return 0;
 err2:
 	br_fdb_delete_by_port(br, p, 1);
Index: linux-2.6/net/bridge/br_private.h
===================================================================
--- linux-2.6.orig/net/bridge/br_private.h
+++ linux-2.6/net/bridge/br_private.h
@@ -233,6 +233,8 @@ static inline int br_is_root_bridge(cons
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
+extern bool br_devices_support_netpoll(struct net_bridge *br);
+extern void br_netpoll_cleanup(struct net_device *br_dev);
 
 /* br_fdb.c */
 extern int br_fdb_init(void);

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [v5 Patch 2/3] bridge: make bridge support netpoll
@ 2010-05-05  8:11   ` Amerigo Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jay Vosburgh, Amerigo Wang, Neil Horman, netdev, Matt Mackall,
	bridge, David Miller, Jeff Moyer, Andy Gospodarek, bonding-devel


Based on the previous patch, make bridge support netpoll by:

1) implement the 2 methods to support netpoll for bridge;

2) modify netpoll during forwarding packets via bridge;

3) disable netpoll support of bridge when a netpoll-unabled device
   is added to bridge;

4) enable netpoll support when all underlying devices support netpoll.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/net/bridge/br_device.c
===================================================================
--- linux-2.6.orig/net/bridge/br_device.c
+++ linux-2.6/net/bridge/br_device.c
@@ -13,8 +13,10 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/list.h>
 
 #include <asm/uaccess.h>
 #include "br_private.h"
@@ -162,6 +164,59 @@ static int br_set_tx_csum(struct net_dev
 	return 0;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+bool br_devices_support_netpoll(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	bool ret = true;
+	int count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&br->lock, flags);
+	list_for_each_entry(p, &br->port_list, list) {
+		count++;
+		if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !p->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	spin_unlock_irqrestore(&br->lock, flags);
+	return count != 0 && ret;
+}
+
+static void br_poll_controller(struct net_device *br_dev)
+{
+	struct netpoll *np = br_dev->npinfo->netpoll;
+
+	if (np->real_dev != br_dev)
+		netpoll_poll_dev(np->real_dev);
+}
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+	struct net_bridge *br = netdev_priv(br_dev);
+	struct net_bridge_port *p, *n;
+	const struct net_device_ops *ops;
+
+	br->dev->npinfo = NULL;
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		if (p->dev) {
+			ops = p->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(p->dev);
+			else
+				p->dev->npinfo = NULL;
+		}
+	}
+}
+
+#else
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+}
+
+#endif
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo    = br_getinfo,
 	.get_link	= ethtool_op_get_link,
@@ -184,6 +239,10 @@ static const struct net_device_ops br_ne
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
+	.ndo_poll_controller	 = br_poll_controller,
+#endif
 };
 
 void br_dev_setup(struct net_device *dev)
Index: linux-2.6/net/bridge/br_forward.c
===================================================================
--- linux-2.6.orig/net/bridge/br_forward.c
+++ linux-2.6/net/bridge/br_forward.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -50,7 +51,13 @@ int br_dev_queue_push_xmit(struct sk_buf
 		else {
 			skb_push(skb, ETH_HLEN);
 
-			dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
+				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
+				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
+			} else
+#endif
+				dev_queue_xmit(skb);
 		}
 	}
 
@@ -66,9 +73,23 @@ int br_forward_finish(struct sk_buff *sk
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct net_bridge *br = to->br;
+	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np;
+		to->dev->npinfo = skb->dev->npinfo;
+		np = skb->dev->npinfo->netpoll;
+		np->real_dev = np->dev = to->dev;
+		to->dev->priv_flags |= IFF_IN_NETPOLL;
+	}
+#endif
 	skb->dev = to->dev;
 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			br_forward_finish);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (skb->dev->npinfo)
+		skb->dev->npinfo->netpoll->dev = br->dev;
+#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
Index: linux-2.6/net/bridge/br_if.c
===================================================================
--- linux-2.6.orig/net/bridge/br_if.c
+++ linux-2.6/net/bridge/br_if.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
@@ -153,6 +154,14 @@ static void del_nbp(struct net_bridge_po
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br))
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	if (dev->netdev_ops->ndo_netpoll_cleanup)
+		dev->netdev_ops->ndo_netpoll_cleanup(dev);
+	else
+		dev->npinfo = NULL;
+#endif
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -165,6 +174,8 @@ static void del_br(struct net_bridge *br
 		del_nbp(p);
 	}
 
+	br_netpoll_cleanup(br->dev);
+
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -438,6 +449,20 @@ int br_add_if(struct net_bridge *br, str
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br)) {
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (br->dev->npinfo)
+			dev->npinfo = br->dev->npinfo;
+	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		printk(KERN_INFO "New device %s does not support netpoll\n",
+			dev->name);
+		printk(KERN_INFO "Disabling netpoll for %s\n",
+			br->dev->name);
+	}
+#endif
+
 	return 0;
 err2:
 	br_fdb_delete_by_port(br, p, 1);
Index: linux-2.6/net/bridge/br_private.h
===================================================================
--- linux-2.6.orig/net/bridge/br_private.h
+++ linux-2.6/net/bridge/br_private.h
@@ -233,6 +233,8 @@ static inline int br_is_root_bridge(cons
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
+extern bool br_devices_support_netpoll(struct net_bridge *br);
+extern void br_netpoll_cleanup(struct net_device *br_dev);
 
 /* br_fdb.c */
 extern int br_fdb_init(void);

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [Bridge] [v5 Patch 2/3] bridge: make bridge support netpoll
@ 2010-05-05  8:11   ` Amerigo Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jay Vosburgh, Amerigo Wang, Neil Horman, netdev, Matt Mackall,
	bridge, David Miller, Jeff Moyer, Andy Gospodarek, bonding-devel


Based on the previous patch, make bridge support netpoll by:

1) implement the 2 methods to support netpoll for bridge;

2) modify netpoll during forwarding packets via bridge;

3) disable netpoll support of bridge when a netpoll-unabled device
   is added to bridge;

4) enable netpoll support when all underlying devices support netpoll.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/net/bridge/br_device.c
===================================================================
--- linux-2.6.orig/net/bridge/br_device.c
+++ linux-2.6/net/bridge/br_device.c
@@ -13,8 +13,10 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/list.h>
 
 #include <asm/uaccess.h>
 #include "br_private.h"
@@ -162,6 +164,59 @@ static int br_set_tx_csum(struct net_dev
 	return 0;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+bool br_devices_support_netpoll(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	bool ret = true;
+	int count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&br->lock, flags);
+	list_for_each_entry(p, &br->port_list, list) {
+		count++;
+		if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !p->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	spin_unlock_irqrestore(&br->lock, flags);
+	return count != 0 && ret;
+}
+
+static void br_poll_controller(struct net_device *br_dev)
+{
+	struct netpoll *np = br_dev->npinfo->netpoll;
+
+	if (np->real_dev != br_dev)
+		netpoll_poll_dev(np->real_dev);
+}
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+	struct net_bridge *br = netdev_priv(br_dev);
+	struct net_bridge_port *p, *n;
+	const struct net_device_ops *ops;
+
+	br->dev->npinfo = NULL;
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		if (p->dev) {
+			ops = p->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(p->dev);
+			else
+				p->dev->npinfo = NULL;
+		}
+	}
+}
+
+#else
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+}
+
+#endif
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo    = br_getinfo,
 	.get_link	= ethtool_op_get_link,
@@ -184,6 +239,10 @@ static const struct net_device_ops br_ne
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
+	.ndo_poll_controller	 = br_poll_controller,
+#endif
 };
 
 void br_dev_setup(struct net_device *dev)
Index: linux-2.6/net/bridge/br_forward.c
===================================================================
--- linux-2.6.orig/net/bridge/br_forward.c
+++ linux-2.6/net/bridge/br_forward.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -50,7 +51,13 @@ int br_dev_queue_push_xmit(struct sk_buf
 		else {
 			skb_push(skb, ETH_HLEN);
 
-			dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
+				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
+				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
+			} else
+#endif
+				dev_queue_xmit(skb);
 		}
 	}
 
@@ -66,9 +73,23 @@ int br_forward_finish(struct sk_buff *sk
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct net_bridge *br = to->br;
+	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np;
+		to->dev->npinfo = skb->dev->npinfo;
+		np = skb->dev->npinfo->netpoll;
+		np->real_dev = np->dev = to->dev;
+		to->dev->priv_flags |= IFF_IN_NETPOLL;
+	}
+#endif
 	skb->dev = to->dev;
 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			br_forward_finish);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (skb->dev->npinfo)
+		skb->dev->npinfo->netpoll->dev = br->dev;
+#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
Index: linux-2.6/net/bridge/br_if.c
===================================================================
--- linux-2.6.orig/net/bridge/br_if.c
+++ linux-2.6/net/bridge/br_if.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
@@ -153,6 +154,14 @@ static void del_nbp(struct net_bridge_po
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br))
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	if (dev->netdev_ops->ndo_netpoll_cleanup)
+		dev->netdev_ops->ndo_netpoll_cleanup(dev);
+	else
+		dev->npinfo = NULL;
+#endif
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -165,6 +174,8 @@ static void del_br(struct net_bridge *br
 		del_nbp(p);
 	}
 
+	br_netpoll_cleanup(br->dev);
+
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -438,6 +449,20 @@ int br_add_if(struct net_bridge *br, str
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br)) {
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (br->dev->npinfo)
+			dev->npinfo = br->dev->npinfo;
+	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		printk(KERN_INFO "New device %s does not support netpoll\n",
+			dev->name);
+		printk(KERN_INFO "Disabling netpoll for %s\n",
+			br->dev->name);
+	}
+#endif
+
 	return 0;
 err2:
 	br_fdb_delete_by_port(br, p, 1);
Index: linux-2.6/net/bridge/br_private.h
===================================================================
--- linux-2.6.orig/net/bridge/br_private.h
+++ linux-2.6/net/bridge/br_private.h
@@ -233,6 +233,8 @@ static inline int br_is_root_bridge(cons
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
+extern bool br_devices_support_netpoll(struct net_bridge *br);
+extern void br_netpoll_cleanup(struct net_device *br_dev);
 
 /* br_fdb.c */
 extern int br_fdb_init(void);

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [v5 Patch 3/3] bonding: make bonding support netpoll
  2010-05-05  8:11 ` Amerigo Wang
  (?)
@ 2010-05-05  8:11   ` Amerigo Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Matt Mackall, netdev, bridge, Andy Gospodarek, Neil Horman,
	Amerigo Wang, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller


Based on Andy's work, but I modified a lot.

Similar to the patch for bridge, this patch does:

1) implement the 2 methods to support netpoll for bonding;

2) modify netpoll during forwarding packets via bonding;

3) disable netpoll support of bonding when a netpoll-unabled device
   is added to bonding;

4) enable netpoll support when all underlying devices support netpoll.

Cc: Andy Gospodarek <gospo@redhat.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/drivers/net/bonding/bond_main.c
===================================================================
--- linux-2.6.orig/drivers/net/bonding/bond_main.c
+++ linux-2.6/drivers/net/bonding/bond_main.c
@@ -59,6 +59,7 @@
 #include <linux/uaccess.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/etherdevice.h>
@@ -430,7 +431,18 @@ int bond_dev_queue_xmit(struct bonding *
 	}
 
 	skb->priority = 1;
-	dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np = bond->dev->npinfo->netpoll;
+		slave_dev->npinfo = bond->dev->npinfo;
+		np->real_dev = np->dev = skb->dev;
+		slave_dev->priv_flags |= IFF_IN_NETPOLL;
+		netpoll_send_skb(np, skb);
+		slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
+		np->dev = bond->dev;
+	} else
+#endif
+		dev_queue_xmit(skb);
 
 	return 0;
 }
@@ -1329,6 +1341,61 @@ static void bond_detach_slave(struct bon
 	bond->slave_cnt--;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/*
+ * You must hold read lock on bond->lock before calling this.
+ */
+static bool slaves_support_netpoll(struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	int i = 0;
+	bool ret = true;
+
+	bond_for_each_slave(bond, slave, i) {
+		if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !slave->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	return i != 0 && ret;
+}
+
+static void bond_poll_controller(struct net_device *bond_dev)
+{
+	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
+	if (dev != bond_dev)
+		netpoll_poll_dev(dev);
+}
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	const struct net_device_ops *ops;
+	int i;
+
+	read_lock(&bond->lock);
+	bond_dev->npinfo = NULL;
+	bond_for_each_slave(bond, slave, i) {
+		if (slave->dev) {
+			ops = slave->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(slave->dev);
+			else
+				slave->dev->npinfo = NULL;
+		}
+	}
+	read_unlock(&bond->lock);
+}
+
+#else
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+}
+
+#endif
+
 /*---------------------------------- IOCTL ----------------------------------*/
 
 static int bond_sethwaddr(struct net_device *bond_dev,
@@ -1735,6 +1802,18 @@ int bond_enslave(struct net_device *bond
 
 	bond_set_carrier(bond);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (slaves_support_netpoll(bond_dev)) {
+		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (bond_dev->npinfo)
+			slave_dev->npinfo = bond_dev->npinfo;
+	} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		pr_info("New slave device %s does not support netpoll\n",
+			slave_dev->name);
+		pr_info("Disabling netpoll support for %s\n", bond_dev->name);
+	}
+#endif
 	read_unlock(&bond->lock);
 
 	res = bond_create_slave_symlinks(bond_dev, slave_dev);
@@ -1801,6 +1880,7 @@ int bond_release(struct net_device *bond
 		return -EINVAL;
 	}
 
+	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
 	write_lock_bh(&bond->lock);
 
 	slave = bond_get_slave_by_dev(bond, slave_dev);
@@ -1929,6 +2009,17 @@ int bond_release(struct net_device *bond
 
 	netdev_set_master(slave_dev, NULL);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	read_lock_bh(&bond->lock);
+	if (slaves_support_netpoll(bond_dev))
+		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	read_unlock_bh(&bond->lock);
+	if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
+		slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
+	else
+		slave_dev->npinfo = NULL;
+#endif
+
 	/* close slave before restoring its mac address */
 	dev_close(slave_dev);
 
@@ -4448,6 +4539,10 @@ static const struct net_device_ops bond_
 	.ndo_vlan_rx_register	= bond_vlan_rx_register,
 	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	= bond_netpoll_cleanup,
+	.ndo_poll_controller	= bond_poll_controller,
+#endif
 };
 
 static void bond_destructor(struct net_device *bond_dev)
@@ -4541,6 +4636,8 @@ static void bond_uninit(struct net_devic
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
+	bond_netpoll_cleanup(bond_dev);
+
 	/* Release the bonded slaves */
 	bond_release_all(bond_dev);
 

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [v5 Patch 3/3] bonding: make bonding support netpoll
@ 2010-05-05  8:11   ` Amerigo Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jay Vosburgh, Amerigo Wang, Neil Horman, netdev, Matt Mackall,
	bridge, David Miller, Jeff Moyer, Andy Gospodarek, bonding-devel


Based on Andy's work, but I modified a lot.

Similar to the patch for bridge, this patch does:

1) implement the 2 methods to support netpoll for bonding;

2) modify netpoll during forwarding packets via bonding;

3) disable netpoll support of bonding when a netpoll-unabled device
   is added to bonding;

4) enable netpoll support when all underlying devices support netpoll.

Cc: Andy Gospodarek <gospo@redhat.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/drivers/net/bonding/bond_main.c
===================================================================
--- linux-2.6.orig/drivers/net/bonding/bond_main.c
+++ linux-2.6/drivers/net/bonding/bond_main.c
@@ -59,6 +59,7 @@
 #include <linux/uaccess.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/etherdevice.h>
@@ -430,7 +431,18 @@ int bond_dev_queue_xmit(struct bonding *
 	}
 
 	skb->priority = 1;
-	dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np = bond->dev->npinfo->netpoll;
+		slave_dev->npinfo = bond->dev->npinfo;
+		np->real_dev = np->dev = skb->dev;
+		slave_dev->priv_flags |= IFF_IN_NETPOLL;
+		netpoll_send_skb(np, skb);
+		slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
+		np->dev = bond->dev;
+	} else
+#endif
+		dev_queue_xmit(skb);
 
 	return 0;
 }
@@ -1329,6 +1341,61 @@ static void bond_detach_slave(struct bon
 	bond->slave_cnt--;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/*
+ * You must hold read lock on bond->lock before calling this.
+ */
+static bool slaves_support_netpoll(struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	int i = 0;
+	bool ret = true;
+
+	bond_for_each_slave(bond, slave, i) {
+		if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !slave->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	return i != 0 && ret;
+}
+
+static void bond_poll_controller(struct net_device *bond_dev)
+{
+	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
+	if (dev != bond_dev)
+		netpoll_poll_dev(dev);
+}
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	const struct net_device_ops *ops;
+	int i;
+
+	read_lock(&bond->lock);
+	bond_dev->npinfo = NULL;
+	bond_for_each_slave(bond, slave, i) {
+		if (slave->dev) {
+			ops = slave->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(slave->dev);
+			else
+				slave->dev->npinfo = NULL;
+		}
+	}
+	read_unlock(&bond->lock);
+}
+
+#else
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+}
+
+#endif
+
 /*---------------------------------- IOCTL ----------------------------------*/
 
 static int bond_sethwaddr(struct net_device *bond_dev,
@@ -1735,6 +1802,18 @@ int bond_enslave(struct net_device *bond
 
 	bond_set_carrier(bond);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (slaves_support_netpoll(bond_dev)) {
+		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (bond_dev->npinfo)
+			slave_dev->npinfo = bond_dev->npinfo;
+	} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		pr_info("New slave device %s does not support netpoll\n",
+			slave_dev->name);
+		pr_info("Disabling netpoll support for %s\n", bond_dev->name);
+	}
+#endif
 	read_unlock(&bond->lock);
 
 	res = bond_create_slave_symlinks(bond_dev, slave_dev);
@@ -1801,6 +1880,7 @@ int bond_release(struct net_device *bond
 		return -EINVAL;
 	}
 
+	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
 	write_lock_bh(&bond->lock);
 
 	slave = bond_get_slave_by_dev(bond, slave_dev);
@@ -1929,6 +2009,17 @@ int bond_release(struct net_device *bond
 
 	netdev_set_master(slave_dev, NULL);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	read_lock_bh(&bond->lock);
+	if (slaves_support_netpoll(bond_dev))
+		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	read_unlock_bh(&bond->lock);
+	if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
+		slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
+	else
+		slave_dev->npinfo = NULL;
+#endif
+
 	/* close slave before restoring its mac address */
 	dev_close(slave_dev);
 
@@ -4448,6 +4539,10 @@ static const struct net_device_ops bond_
 	.ndo_vlan_rx_register	= bond_vlan_rx_register,
 	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	= bond_netpoll_cleanup,
+	.ndo_poll_controller	= bond_poll_controller,
+#endif
 };
 
 static void bond_destructor(struct net_device *bond_dev)
@@ -4541,6 +4636,8 @@ static void bond_uninit(struct net_devic
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
+	bond_netpoll_cleanup(bond_dev);
+
 	/* Release the bonded slaves */
 	bond_release_all(bond_dev);

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [Bridge] [v5 Patch 3/3] bonding: make bonding support netpoll
@ 2010-05-05  8:11   ` Amerigo Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Amerigo Wang @ 2010-05-05  8:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jay Vosburgh, Amerigo Wang, Neil Horman, netdev, Matt Mackall,
	bridge, David Miller, Jeff Moyer, Andy Gospodarek, bonding-devel


Based on Andy's work, but I modified a lot.

Similar to the patch for bridge, this patch does:

1) implement the 2 methods to support netpoll for bonding;

2) modify netpoll during forwarding packets via bonding;

3) disable netpoll support of bonding when a netpoll-unabled device
   is added to bonding;

4) enable netpoll support when all underlying devices support netpoll.

Cc: Andy Gospodarek <gospo@redhat.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: WANG Cong <amwang@redhat.com>

---

Index: linux-2.6/drivers/net/bonding/bond_main.c
===================================================================
--- linux-2.6.orig/drivers/net/bonding/bond_main.c
+++ linux-2.6/drivers/net/bonding/bond_main.c
@@ -59,6 +59,7 @@
 #include <linux/uaccess.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/etherdevice.h>
@@ -430,7 +431,18 @@ int bond_dev_queue_xmit(struct bonding *
 	}
 
 	skb->priority = 1;
-	dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np = bond->dev->npinfo->netpoll;
+		slave_dev->npinfo = bond->dev->npinfo;
+		np->real_dev = np->dev = skb->dev;
+		slave_dev->priv_flags |= IFF_IN_NETPOLL;
+		netpoll_send_skb(np, skb);
+		slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
+		np->dev = bond->dev;
+	} else
+#endif
+		dev_queue_xmit(skb);
 
 	return 0;
 }
@@ -1329,6 +1341,61 @@ static void bond_detach_slave(struct bon
 	bond->slave_cnt--;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/*
+ * You must hold read lock on bond->lock before calling this.
+ */
+static bool slaves_support_netpoll(struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	int i = 0;
+	bool ret = true;
+
+	bond_for_each_slave(bond, slave, i) {
+		if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !slave->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	return i != 0 && ret;
+}
+
+static void bond_poll_controller(struct net_device *bond_dev)
+{
+	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
+	if (dev != bond_dev)
+		netpoll_poll_dev(dev);
+}
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	const struct net_device_ops *ops;
+	int i;
+
+	read_lock(&bond->lock);
+	bond_dev->npinfo = NULL;
+	bond_for_each_slave(bond, slave, i) {
+		if (slave->dev) {
+			ops = slave->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(slave->dev);
+			else
+				slave->dev->npinfo = NULL;
+		}
+	}
+	read_unlock(&bond->lock);
+}
+
+#else
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+}
+
+#endif
+
 /*---------------------------------- IOCTL ----------------------------------*/
 
 static int bond_sethwaddr(struct net_device *bond_dev,
@@ -1735,6 +1802,18 @@ int bond_enslave(struct net_device *bond
 
 	bond_set_carrier(bond);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (slaves_support_netpoll(bond_dev)) {
+		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (bond_dev->npinfo)
+			slave_dev->npinfo = bond_dev->npinfo;
+	} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		pr_info("New slave device %s does not support netpoll\n",
+			slave_dev->name);
+		pr_info("Disabling netpoll support for %s\n", bond_dev->name);
+	}
+#endif
 	read_unlock(&bond->lock);
 
 	res = bond_create_slave_symlinks(bond_dev, slave_dev);
@@ -1801,6 +1880,7 @@ int bond_release(struct net_device *bond
 		return -EINVAL;
 	}
 
+	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE);
 	write_lock_bh(&bond->lock);
 
 	slave = bond_get_slave_by_dev(bond, slave_dev);
@@ -1929,6 +2009,17 @@ int bond_release(struct net_device *bond
 
 	netdev_set_master(slave_dev, NULL);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	read_lock_bh(&bond->lock);
+	if (slaves_support_netpoll(bond_dev))
+		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	read_unlock_bh(&bond->lock);
+	if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
+		slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
+	else
+		slave_dev->npinfo = NULL;
+#endif
+
 	/* close slave before restoring its mac address */
 	dev_close(slave_dev);
 
@@ -4448,6 +4539,10 @@ static const struct net_device_ops bond_
 	.ndo_vlan_rx_register	= bond_vlan_rx_register,
 	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	= bond_netpoll_cleanup,
+	.ndo_poll_controller	= bond_poll_controller,
+#endif
 };
 
 static void bond_destructor(struct net_device *bond_dev)
@@ -4541,6 +4636,8 @@ static void bond_uninit(struct net_devic
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
+	bond_netpoll_cleanup(bond_dev);
+
 	/* Release the bonded slaves */
 	bond_release_all(bond_dev);
 

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-05  8:11 ` Amerigo Wang
@ 2010-05-06  2:05   ` Matt Mackall
  -1 siblings, 0 replies; 73+ messages in thread
From: Matt Mackall @ 2010-05-06  2:05 UTC (permalink / raw)
  To: Amerigo Wang
  Cc: linux-kernel, netdev, bridge, Andy Gospodarek, Neil Horman,
	Jeff Moyer, Stephen Hemminger, bonding-devel, Jay Vosburgh,
	David Miller

On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:
> V5:
> Fix coding style problems pointed by David.

Aside from my concern about the policy of disabling netpoll on
bridges/bonds with only partial netpoll support, I don't have any
remaining issues with this. But I'll leave it to other folks to ack the
underlying driver bits for this series.

-- 
Mathematics is the supreme nostalgia of our time.



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-06  2:05   ` Matt Mackall
  0 siblings, 0 replies; 73+ messages in thread
From: Matt Mackall @ 2010-05-06  2:05 UTC (permalink / raw)
  To: Amerigo Wang
  Cc: Jay Vosburgh, Neil Horman, netdev, bridge, linux-kernel,
	David Miller, Jeff Moyer, Andy Gospodarek, bonding-devel

On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:
> V5:
> Fix coding style problems pointed by David.

Aside from my concern about the policy of disabling netpoll on
bridges/bonds with only partial netpoll support, I don't have any
remaining issues with this. But I'll leave it to other folks to ack the
underlying driver bits for this series.

-- 
Mathematics is the supreme nostalgia of our time.



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-06  2:05   ` [Bridge] " Matt Mackall
@ 2010-05-06  7:44     ` David Miller
  -1 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-05-06  7:44 UTC (permalink / raw)
  To: mpm
  Cc: amwang, linux-kernel, netdev, bridge, gospo, nhorman, jmoyer,
	shemminger, bonding-devel, fubar

From: Matt Mackall <mpm@selenic.com>
Date: Wed, 05 May 2010 21:05:30 -0500

> On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:
>> V5:
>> Fix coding style problems pointed by David.
> 
> Aside from my concern about the policy of disabling netpoll on
> bridges/bonds with only partial netpoll support, I don't have any
> remaining issues with this. But I'll leave it to other folks to ack the
> underlying driver bits for this series.

Yes the partial support handling is a thorny issue.

But this patch set makes things better than they were before, because
support over such devices didn't work at all previously.

So I'll toss these patches into net-next-2.6, thanks everyone!

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-06  7:44     ` David Miller
  0 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-05-06  7:44 UTC (permalink / raw)
  To: mpm
  Cc: fubar, amwang, nhorman, netdev, bridge, linux-kernel, jmoyer,
	gospo, bonding-devel

From: Matt Mackall <mpm@selenic.com>
Date: Wed, 05 May 2010 21:05:30 -0500

> On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:
>> V5:
>> Fix coding style problems pointed by David.
> 
> Aside from my concern about the policy of disabling netpoll on
> bridges/bonds with only partial netpoll support, I don't have any
> remaining issues with this. But I'll leave it to other folks to ack the
> underlying driver bits for this series.

Yes the partial support handling is a thorny issue.

But this patch set makes things better than they were before, because
support over such devices didn't work at all previously.

So I'll toss these patches into net-next-2.6, thanks everyone!

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-06  7:44     ` [Bridge] " David Miller
@ 2010-05-07  3:24       ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-07  3:24 UTC (permalink / raw)
  To: David Miller
  Cc: mpm, linux-kernel, netdev, bridge, gospo, nhorman, jmoyer,
	shemminger, bonding-devel, fubar

On 05/06/10 15:44, David Miller wrote:
> From: Matt Mackall<mpm@selenic.com>
> Date: Wed, 05 May 2010 21:05:30 -0500
>
>> On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:
>>> V5:
>>> Fix coding style problems pointed by David.
>>
>> Aside from my concern about the policy of disabling netpoll on
>> bridges/bonds with only partial netpoll support, I don't have any
>> remaining issues with this. But I'll leave it to other folks to ack the
>> underlying driver bits for this series.
>
> Yes the partial support handling is a thorny issue.
>
> But this patch set makes things better than they were before, because
> support over such devices didn't work at all previously.
>
> So I'll toss these patches into net-next-2.6, thanks everyone!

Thank you, David.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-07  3:24       ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-07  3:24 UTC (permalink / raw)
  To: David Miller
  Cc: fubar, nhorman, netdev, mpm, bridge, linux-kernel, jmoyer, gospo,
	bonding-devel

On 05/06/10 15:44, David Miller wrote:
> From: Matt Mackall<mpm@selenic.com>
> Date: Wed, 05 May 2010 21:05:30 -0500
>
>> On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:
>>> V5:
>>> Fix coding style problems pointed by David.
>>
>> Aside from my concern about the policy of disabling netpoll on
>> bridges/bonds with only partial netpoll support, I don't have any
>> remaining issues with this. But I'll leave it to other folks to ack the
>> underlying driver bits for this series.
>
> Yes the partial support handling is a thorny issue.
>
> But this patch set makes things better than they were before, because
> support over such devices didn't work at all previously.
>
> So I'll toss these patches into net-next-2.6, thanks everyone!

Thank you, David.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-05  8:11 ` Amerigo Wang
@ 2010-05-27 18:05   ` Flavio Leitner
  -1 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-27 18:05 UTC (permalink / raw)
  To: Amerigo Wang
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller


Hi guys!

I finally could test this to see if an old problem reported on bugzilla[1] was
fixed now, but unfortunately it is still there.

The ticket is private I guess, but basically the problem happens when bonding
driver tries to print something after it had taken the write_lock (monitor
functions, enslave/de-enslave), so the printk() will pass through netpoll, then
on bonding again which no matter what mode you use, it will try to read_lock()
the lock again. The result is a deadlock and the entire system hangs.

I manage to get a fresh backtrace with mode 1, see below:

 
[   93.167079] Call Trace:
[   93.167079]  [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f
[   93.167079]  [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e
[   93.167079]  [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b
[   93.167079]  [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding]
-> read_lock fails
[   93.167079]  [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding]
[   93.167079]  [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf
[   93.167079]  [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3
[   93.167079]  [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d
[   93.167079]  [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole]
[   93.167079]  [<ffffffff81034e65>] __call_console_drivers+0x67/0x79
[   93.167079]  [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d
[   93.167079]  [<ffffffff810352d3>] release_console_sem+0x121/0x1d7
[   93.167079]  [<ffffffff8103590a>] vprintk+0x35d/0x393
[   93.167079]  [<ffffffff8103f947>] ? add_timer+0x17/0x19
[   93.167079]  [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9
[   93.167079]  [<ffffffff81363bb8>] printk+0x3c/0x44
[   93.167079]  [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding] 
-> write_locked
[   93.167079]  [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding]
[   93.167079]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2

In this case, the message should be 
    "bonding: bond0: making interface eth0 the new active one"

I did the following patch to discard the packet if it was IN_NETPOLL
and the read_lock() fails, so I could go ahead testing it:

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e12462..a3b8bad 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 	struct bonding *bond = netdev_priv(bond_dev);
 	int res = 1;
 
-	read_lock(&bond->lock);
-	read_lock(&bond->curr_slave_lock);
+	if (read_trylock(&bond->lock) == 0 && 
+		(bond_dev->flags & IFF_IN_NETPOLL)) {
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+	}
+
+	if (read_trylock(&bond->curr_slave_lock) == 0 && 
+		(bond_dev->flags & IFF_IN_NETPOLL)) {
+			read_unlock(&bond->lock);
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+	}
+			
 
 	if (!BOND_IS_OK(bond))
 		goto out;


and I found another problem.  The function netpoll_send_skb() checks
if the npinfo's queue length is zero and if it's not, it will queue
the packet to make sure it's in order and then schedule the thread
to run. Later, the thread wakes up running queue_process() which disables
interrupts before calling ndo_start_xmit().  However, dev_queue_xmit()
uses rcu_*_bh() and before return, it will enable the interrupts again,
spitting this:

------------[ cut here ]------------
WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86()
Hardware name: Precision WorkStation 490
Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6]
Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21
Call Trace:
 [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f
 [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11
 [<ffffffff8103d691>] local_bh_enable+0x3c/0x86
 [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493
 [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding]
 [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding]
-> interrupts disabled
 [<ffffffff812f3fca>] queue_process+0x9d/0xf9
 [<ffffffff8104d022>] worker_thread+0x19d/0x224
 [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9
 [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34
 [<ffffffff8104ce85>] ? worker_thread+0x0/0x224
 [<ffffffff8105040b>] kthread+0x7a/0x82
 [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81050391>] ? kthread+0x0/0x82
 [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10
---[ end trace 74e3904503fdb632 ]---

kernel/softirq.c:
141 static inline void _local_bh_enable_ip(unsigned long ip)
142 {
143         WARN_ON_ONCE(in_irq() || irqs_disabled());
144 #ifdef CONFIG_TRACE_IRQFLAGS
145         local_irq_disable();
146 #endif
147         /*
148          * Are softirqs going to be turned on now:
149          */


The git is updated up to:
  d938a70 be2net: increase POST timeout for EEH recovery

Two slave interfaces, bonding mode 1, netconsole over bond0.

[1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5


regards,
fbl


On Wed, May 05, 2010 at 04:11:15AM -0400, Amerigo Wang wrote:
> V5:
> Fix coding style problems pointed by David.
> 
> V4:
> Use "unlikely" to mark netpoll call path, suggested by Stephen.
> Handle NETDEV_GOING_DOWN case.
> 
> V3:
> Update to latest Linus' tree.
> Fix deadlocks when releasing slaves of bonding devices.
> Thanks to Andy.
> 
> V2:
> Fix some bugs of previous version.
> Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary.
> Don't poll all underlying devices, poll ->real_dev in struct netpoll.
> Thanks to David for suggesting above.
> 
> ------------>
> 
> This whole patchset is for adding netpoll support to bridge and bonding
> devices. I already tested it for bridge, bonding, bridge over bonding,
> and bonding over bridge. It looks fine now.
> 
> 
> To make bridge and bonding support netpoll, we need to adjust
> some netpoll generic code. This patch does the following things:
> 
> 1) introduce two new priv_flags for struct net_device:
>    IFF_IN_NETPOLL which identifies we are processing a netpoll;
>    IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
>    at run-time;
> 
> 2) introduce one new method for netdev_ops:
>    ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
>      removed.
> 
> 3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
>    export netpoll_send_skb() and netpoll_poll_dev() which will be used later;
> 
> 4) hide a pointer to struct netpoll in struct netpoll_info, ditto.
> 
> 5) introduce ->real_dev for struct netpoll.
> 
> 6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
>    netconsole before releasing a slave, to avoid deadlocks.
> 
> Cc: David Miller <davem@davemloft.net>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: WANG Cong <amwang@redhat.com>
> 
> ---
> 
> Index: linux-2.6/include/linux/if.h
> ===================================================================
> --- linux-2.6.orig/include/linux/if.h
> +++ linux-2.6/include/linux/if.h
> @@ -71,6 +71,8 @@
>  					 * release skb->dst
>  					 */
>  #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
> +#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
> +#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
>  
>  #define IF_GET_IFACE	0x0001		/* for querying only */
>  #define IF_GET_PROTO	0x0002
> Index: linux-2.6/include/linux/netdevice.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netdevice.h
> +++ linux-2.6/include/linux/netdevice.h
> @@ -667,6 +667,7 @@ struct net_device_ops {
>  						        unsigned short vid);
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	void                    (*ndo_poll_controller)(struct net_device *dev);
> +	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
>  #endif
>  	int			(*ndo_set_vf_mac)(struct net_device *dev,
>  						  int queue, u8 *mac);
> Index: linux-2.6/include/linux/netpoll.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netpoll.h
> +++ linux-2.6/include/linux/netpoll.h
> @@ -14,6 +14,7 @@
>  
>  struct netpoll {
>  	struct net_device *dev;
> +	struct net_device *real_dev;
>  	char dev_name[IFNAMSIZ];
>  	const char *name;
>  	void (*rx_hook)(struct netpoll *, int, char *, int);
> @@ -36,8 +37,11 @@ struct netpoll_info {
>  	struct sk_buff_head txq;
>  
>  	struct delayed_work tx_work;
> +
> +	struct netpoll *netpoll;
>  };
>  
> +void netpoll_poll_dev(struct net_device *dev);
>  void netpoll_poll(struct netpoll *np);
>  void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
>  void netpoll_print_options(struct netpoll *np);
> @@ -47,6 +51,7 @@ int netpoll_trap(void);
>  void netpoll_set_trap(int trap);
>  void netpoll_cleanup(struct netpoll *np);
>  int __netpoll_rx(struct sk_buff *skb);
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
>  
>  
>  #ifdef CONFIG_NETPOLL
> Index: linux-2.6/net/core/netpoll.c
> ===================================================================
> --- linux-2.6.orig/net/core/netpoll.c
> +++ linux-2.6/net/core/netpoll.c
> @@ -179,9 +179,8 @@ static void service_arp_queue(struct net
>  	}
>  }
>  
> -void netpoll_poll(struct netpoll *np)
> +void netpoll_poll_dev(struct net_device *dev)
>  {
> -	struct net_device *dev = np->dev;
>  	const struct net_device_ops *ops;
>  
>  	if (!dev || !netif_running(dev))
> @@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
>  	zap_completion_queue();
>  }
>  
> +void netpoll_poll(struct netpoll *np)
> +{
> +	netpoll_poll_dev(np->dev);
> +}
> +
>  static void refill_skbs(void)
>  {
>  	struct sk_buff *skb;
> @@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n
>  	return 0;
>  }
>  
> -static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
>  {
>  	int status = NETDEV_TX_BUSY;
>  	unsigned long tries;
> @@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp
>  		     tries > 0; --tries) {
>  			if (__netif_tx_trylock(txq)) {
>  				if (!netif_tx_queue_stopped(txq)) {
> +					dev->priv_flags |= IFF_IN_NETPOLL;
>  					status = ops->ndo_start_xmit(skb, dev);
> +					dev->priv_flags &= ~IFF_IN_NETPOLL;
>  					if (status == NETDEV_TX_OK)
>  						txq_trans_update(txq);
>  				}
> @@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
>  		atomic_inc(&npinfo->refcnt);
>  	}
>  
> -	if (!ndev->netdev_ops->ndo_poll_controller) {
> +	npinfo->netpoll = np;
> +
> +	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
> +	    !ndev->netdev_ops->ndo_poll_controller) {
>  		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
>  		       np->name, np->dev_name);
>  		err = -ENOTSUPP;
> @@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
>  			}
>  
>  			if (atomic_dec_and_test(&npinfo->refcnt)) {
> +				const struct net_device_ops *ops;
>  				skb_queue_purge(&npinfo->arp_tx);
>  				skb_queue_purge(&npinfo->txq);
>  				cancel_rearming_delayed_work(&npinfo->tx_work);
> @@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
>  				/* clean after last, unfinished work */
>  				__skb_queue_purge(&npinfo->txq);
>  				kfree(npinfo);
> -				np->dev->npinfo = NULL;
> +				ops = np->dev->netdev_ops;
> +				if (ops->ndo_netpoll_cleanup)
> +					ops->ndo_netpoll_cleanup(np->dev);
> +				else
> +					np->dev->npinfo = NULL;
>  			}
>  		}
>  
> @@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
>  		atomic_dec(&trapped);
>  }
>  
> +EXPORT_SYMBOL(netpoll_send_skb);
>  EXPORT_SYMBOL(netpoll_set_trap);
>  EXPORT_SYMBOL(netpoll_trap);
>  EXPORT_SYMBOL(netpoll_print_options);
> @@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
>  EXPORT_SYMBOL(netpoll_setup);
>  EXPORT_SYMBOL(netpoll_cleanup);
>  EXPORT_SYMBOL(netpoll_send_udp);
> +EXPORT_SYMBOL(netpoll_poll_dev);
>  EXPORT_SYMBOL(netpoll_poll);
> Index: linux-2.6/drivers/net/netconsole.c
> ===================================================================
> --- linux-2.6.orig/drivers/net/netconsole.c
> +++ linux-2.6/drivers/net/netconsole.c
> @@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc
>  	struct netconsole_target *nt;
>  	struct net_device *dev = ptr;
>  
> -	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
> +	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
> +	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
>  		goto done;
>  
>  	spin_lock_irqsave(&target_list_lock, flags);
> @@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc
>  				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
>  				break;
>  			case NETDEV_UNREGISTER:
> -				if (!nt->enabled)
> -					break;
>  				netpoll_cleanup(&nt->np);
> +				/* Fall through */
> +			case NETDEV_GOING_DOWN:
> +			case NETDEV_BONDING_DESLAVE:
>  				nt->enabled = 0;
> -				printk(KERN_INFO "netconsole: network logging stopped"
> -					", interface %s unregistered\n",
> -					dev->name);
>  				break;
>  			}
>  		}
>  		netconsole_target_put(nt);
>  	}
>  	spin_unlock_irqrestore(&target_list_lock, flags);
> +	if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
> +		printk(KERN_INFO "netconsole: network logging stopped, "
> +			"interface %s %s\n",  dev->name,
> +			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
>  
>  done:
>  	return NOTIFY_DONE;
> Index: linux-2.6/include/linux/notifier.h
> ===================================================================
> --- linux-2.6.orig/include/linux/notifier.h
> +++ linux-2.6/include/linux/notifier.h
> @@ -203,6 +203,7 @@ static inline int notifier_to_errno(int 
>  #define NETDEV_BONDING_NEWTYPE  0x000F
>  #define NETDEV_POST_INIT	0x0010
>  #define NETDEV_UNREGISTER_BATCH 0x0011
> +#define NETDEV_BONDING_DESLAVE  0x0012
>  
>  #define SYS_DOWN	0x0001	/* Notify of system down */
>  #define SYS_RESTART	SYS_DOWN
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Flavio

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-27 18:05   ` Flavio Leitner
  0 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-27 18:05 UTC (permalink / raw)
  To: Amerigo Wang
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel


Hi guys!

I finally could test this to see if an old problem reported on bugzilla[1] was
fixed now, but unfortunately it is still there.

The ticket is private I guess, but basically the problem happens when bonding
driver tries to print something after it had taken the write_lock (monitor
functions, enslave/de-enslave), so the printk() will pass through netpoll, then
on bonding again which no matter what mode you use, it will try to read_lock()
the lock again. The result is a deadlock and the entire system hangs.

I manage to get a fresh backtrace with mode 1, see below:

 
[   93.167079] Call Trace:
[   93.167079]  [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f
[   93.167079]  [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e
[   93.167079]  [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b
[   93.167079]  [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding]
-> read_lock fails
[   93.167079]  [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding]
[   93.167079]  [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf
[   93.167079]  [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3
[   93.167079]  [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d
[   93.167079]  [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole]
[   93.167079]  [<ffffffff81034e65>] __call_console_drivers+0x67/0x79
[   93.167079]  [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d
[   93.167079]  [<ffffffff810352d3>] release_console_sem+0x121/0x1d7
[   93.167079]  [<ffffffff8103590a>] vprintk+0x35d/0x393
[   93.167079]  [<ffffffff8103f947>] ? add_timer+0x17/0x19
[   93.167079]  [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9
[   93.167079]  [<ffffffff81363bb8>] printk+0x3c/0x44
[   93.167079]  [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding] 
-> write_locked
[   93.167079]  [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding]
[   93.167079]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2

In this case, the message should be 
    "bonding: bond0: making interface eth0 the new active one"

I did the following patch to discard the packet if it was IN_NETPOLL
and the read_lock() fails, so I could go ahead testing it:

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e12462..a3b8bad 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 	struct bonding *bond = netdev_priv(bond_dev);
 	int res = 1;
 
-	read_lock(&bond->lock);
-	read_lock(&bond->curr_slave_lock);
+	if (read_trylock(&bond->lock) == 0 && 
+		(bond_dev->flags & IFF_IN_NETPOLL)) {
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+	}
+
+	if (read_trylock(&bond->curr_slave_lock) == 0 && 
+		(bond_dev->flags & IFF_IN_NETPOLL)) {
+			read_unlock(&bond->lock);
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+	}
+			
 
 	if (!BOND_IS_OK(bond))
 		goto out;


and I found another problem.  The function netpoll_send_skb() checks
if the npinfo's queue length is zero and if it's not, it will queue
the packet to make sure it's in order and then schedule the thread
to run. Later, the thread wakes up running queue_process() which disables
interrupts before calling ndo_start_xmit().  However, dev_queue_xmit()
uses rcu_*_bh() and before return, it will enable the interrupts again,
spitting this:

------------[ cut here ]------------
WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86()
Hardware name: Precision WorkStation 490
Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6]
Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21
Call Trace:
 [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f
 [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11
 [<ffffffff8103d691>] local_bh_enable+0x3c/0x86
 [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493
 [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding]
 [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding]
-> interrupts disabled
 [<ffffffff812f3fca>] queue_process+0x9d/0xf9
 [<ffffffff8104d022>] worker_thread+0x19d/0x224
 [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9
 [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34
 [<ffffffff8104ce85>] ? worker_thread+0x0/0x224
 [<ffffffff8105040b>] kthread+0x7a/0x82
 [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81050391>] ? kthread+0x0/0x82
 [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10
---[ end trace 74e3904503fdb632 ]---

kernel/softirq.c:
141 static inline void _local_bh_enable_ip(unsigned long ip)
142 {
143         WARN_ON_ONCE(in_irq() || irqs_disabled());
144 #ifdef CONFIG_TRACE_IRQFLAGS
145         local_irq_disable();
146 #endif
147         /*
148          * Are softirqs going to be turned on now:
149          */


The git is updated up to:
  d938a70 be2net: increase POST timeout for EEH recovery

Two slave interfaces, bonding mode 1, netconsole over bond0.

[1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5


regards,
fbl


On Wed, May 05, 2010 at 04:11:15AM -0400, Amerigo Wang wrote:
> V5:
> Fix coding style problems pointed by David.
> 
> V4:
> Use "unlikely" to mark netpoll call path, suggested by Stephen.
> Handle NETDEV_GOING_DOWN case.
> 
> V3:
> Update to latest Linus' tree.
> Fix deadlocks when releasing slaves of bonding devices.
> Thanks to Andy.
> 
> V2:
> Fix some bugs of previous version.
> Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary.
> Don't poll all underlying devices, poll ->real_dev in struct netpoll.
> Thanks to David for suggesting above.
> 
> ------------>
> 
> This whole patchset is for adding netpoll support to bridge and bonding
> devices. I already tested it for bridge, bonding, bridge over bonding,
> and bonding over bridge. It looks fine now.
> 
> 
> To make bridge and bonding support netpoll, we need to adjust
> some netpoll generic code. This patch does the following things:
> 
> 1) introduce two new priv_flags for struct net_device:
>    IFF_IN_NETPOLL which identifies we are processing a netpoll;
>    IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
>    at run-time;
> 
> 2) introduce one new method for netdev_ops:
>    ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
>      removed.
> 
> 3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
>    export netpoll_send_skb() and netpoll_poll_dev() which will be used later;
> 
> 4) hide a pointer to struct netpoll in struct netpoll_info, ditto.
> 
> 5) introduce ->real_dev for struct netpoll.
> 
> 6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
>    netconsole before releasing a slave, to avoid deadlocks.
> 
> Cc: David Miller <davem@davemloft.net>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: WANG Cong <amwang@redhat.com>
> 
> ---
> 
> Index: linux-2.6/include/linux/if.h
> ===================================================================
> --- linux-2.6.orig/include/linux/if.h
> +++ linux-2.6/include/linux/if.h
> @@ -71,6 +71,8 @@
>  					 * release skb->dst
>  					 */
>  #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
> +#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
> +#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
>  
>  #define IF_GET_IFACE	0x0001		/* for querying only */
>  #define IF_GET_PROTO	0x0002
> Index: linux-2.6/include/linux/netdevice.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netdevice.h
> +++ linux-2.6/include/linux/netdevice.h
> @@ -667,6 +667,7 @@ struct net_device_ops {
>  						        unsigned short vid);
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	void                    (*ndo_poll_controller)(struct net_device *dev);
> +	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
>  #endif
>  	int			(*ndo_set_vf_mac)(struct net_device *dev,
>  						  int queue, u8 *mac);
> Index: linux-2.6/include/linux/netpoll.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netpoll.h
> +++ linux-2.6/include/linux/netpoll.h
> @@ -14,6 +14,7 @@
>  
>  struct netpoll {
>  	struct net_device *dev;
> +	struct net_device *real_dev;
>  	char dev_name[IFNAMSIZ];
>  	const char *name;
>  	void (*rx_hook)(struct netpoll *, int, char *, int);
> @@ -36,8 +37,11 @@ struct netpoll_info {
>  	struct sk_buff_head txq;
>  
>  	struct delayed_work tx_work;
> +
> +	struct netpoll *netpoll;
>  };
>  
> +void netpoll_poll_dev(struct net_device *dev);
>  void netpoll_poll(struct netpoll *np);
>  void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
>  void netpoll_print_options(struct netpoll *np);
> @@ -47,6 +51,7 @@ int netpoll_trap(void);
>  void netpoll_set_trap(int trap);
>  void netpoll_cleanup(struct netpoll *np);
>  int __netpoll_rx(struct sk_buff *skb);
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
>  
>  
>  #ifdef CONFIG_NETPOLL
> Index: linux-2.6/net/core/netpoll.c
> ===================================================================
> --- linux-2.6.orig/net/core/netpoll.c
> +++ linux-2.6/net/core/netpoll.c
> @@ -179,9 +179,8 @@ static void service_arp_queue(struct net
>  	}
>  }
>  
> -void netpoll_poll(struct netpoll *np)
> +void netpoll_poll_dev(struct net_device *dev)
>  {
> -	struct net_device *dev = np->dev;
>  	const struct net_device_ops *ops;
>  
>  	if (!dev || !netif_running(dev))
> @@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
>  	zap_completion_queue();
>  }
>  
> +void netpoll_poll(struct netpoll *np)
> +{
> +	netpoll_poll_dev(np->dev);
> +}
> +
>  static void refill_skbs(void)
>  {
>  	struct sk_buff *skb;
> @@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n
>  	return 0;
>  }
>  
> -static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
>  {
>  	int status = NETDEV_TX_BUSY;
>  	unsigned long tries;
> @@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp
>  		     tries > 0; --tries) {
>  			if (__netif_tx_trylock(txq)) {
>  				if (!netif_tx_queue_stopped(txq)) {
> +					dev->priv_flags |= IFF_IN_NETPOLL;
>  					status = ops->ndo_start_xmit(skb, dev);
> +					dev->priv_flags &= ~IFF_IN_NETPOLL;
>  					if (status == NETDEV_TX_OK)
>  						txq_trans_update(txq);
>  				}
> @@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
>  		atomic_inc(&npinfo->refcnt);
>  	}
>  
> -	if (!ndev->netdev_ops->ndo_poll_controller) {
> +	npinfo->netpoll = np;
> +
> +	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
> +	    !ndev->netdev_ops->ndo_poll_controller) {
>  		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
>  		       np->name, np->dev_name);
>  		err = -ENOTSUPP;
> @@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
>  			}
>  
>  			if (atomic_dec_and_test(&npinfo->refcnt)) {
> +				const struct net_device_ops *ops;
>  				skb_queue_purge(&npinfo->arp_tx);
>  				skb_queue_purge(&npinfo->txq);
>  				cancel_rearming_delayed_work(&npinfo->tx_work);
> @@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
>  				/* clean after last, unfinished work */
>  				__skb_queue_purge(&npinfo->txq);
>  				kfree(npinfo);
> -				np->dev->npinfo = NULL;
> +				ops = np->dev->netdev_ops;
> +				if (ops->ndo_netpoll_cleanup)
> +					ops->ndo_netpoll_cleanup(np->dev);
> +				else
> +					np->dev->npinfo = NULL;
>  			}
>  		}
>  
> @@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
>  		atomic_dec(&trapped);
>  }
>  
> +EXPORT_SYMBOL(netpoll_send_skb);
>  EXPORT_SYMBOL(netpoll_set_trap);
>  EXPORT_SYMBOL(netpoll_trap);
>  EXPORT_SYMBOL(netpoll_print_options);
> @@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
>  EXPORT_SYMBOL(netpoll_setup);
>  EXPORT_SYMBOL(netpoll_cleanup);
>  EXPORT_SYMBOL(netpoll_send_udp);
> +EXPORT_SYMBOL(netpoll_poll_dev);
>  EXPORT_SYMBOL(netpoll_poll);
> Index: linux-2.6/drivers/net/netconsole.c
> ===================================================================
> --- linux-2.6.orig/drivers/net/netconsole.c
> +++ linux-2.6/drivers/net/netconsole.c
> @@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc
>  	struct netconsole_target *nt;
>  	struct net_device *dev = ptr;
>  
> -	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
> +	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
> +	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
>  		goto done;
>  
>  	spin_lock_irqsave(&target_list_lock, flags);
> @@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc
>  				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
>  				break;
>  			case NETDEV_UNREGISTER:
> -				if (!nt->enabled)
> -					break;
>  				netpoll_cleanup(&nt->np);
> +				/* Fall through */
> +			case NETDEV_GOING_DOWN:
> +			case NETDEV_BONDING_DESLAVE:
>  				nt->enabled = 0;
> -				printk(KERN_INFO "netconsole: network logging stopped"
> -					", interface %s unregistered\n",
> -					dev->name);
>  				break;
>  			}
>  		}
>  		netconsole_target_put(nt);
>  	}
>  	spin_unlock_irqrestore(&target_list_lock, flags);
> +	if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
> +		printk(KERN_INFO "netconsole: network logging stopped, "
> +			"interface %s %s\n",  dev->name,
> +			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
>  
>  done:
>  	return NOTIFY_DONE;
> Index: linux-2.6/include/linux/notifier.h
> ===================================================================
> --- linux-2.6.orig/include/linux/notifier.h
> +++ linux-2.6/include/linux/notifier.h
> @@ -203,6 +203,7 @@ static inline int notifier_to_errno(int 
>  #define NETDEV_BONDING_NEWTYPE  0x000F
>  #define NETDEV_POST_INIT	0x0010
>  #define NETDEV_UNREGISTER_BATCH 0x0011
> +#define NETDEV_BONDING_DESLAVE  0x0012
>  
>  #define SYS_DOWN	0x0001	/* Notify of system down */
>  #define SYS_RESTART	SYS_DOWN
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Flavio

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-27 18:05   ` [Bridge] " Flavio Leitner
@ 2010-05-27 20:35     ` David Miller
  -1 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-05-27 20:35 UTC (permalink / raw)
  To: fbl
  Cc: amwang, linux-kernel, mpm, netdev, bridge, gospo, nhorman,
	jmoyer, shemminger, bonding-devel, fubar

From: Flavio Leitner <fbl@sysclose.org>
Date: Thu, 27 May 2010 15:05:45 -0300

> I did the following patch to discard the packet if it was IN_NETPOLL
> and the read_lock() fails, so I could go ahead testing it:

This is disgusting, let's just disallow console output from such
locations.  Defer them to a workqueue if their output is so critical.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-27 20:35     ` David Miller
  0 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-05-27 20:35 UTC (permalink / raw)
  To: fbl
  Cc: fubar, amwang, nhorman, netdev, mpm, bridge, linux-kernel,
	jmoyer, gospo, bonding-devel

From: Flavio Leitner <fbl@sysclose.org>
Date: Thu, 27 May 2010 15:05:45 -0300

> I did the following patch to discard the packet if it was IN_NETPOLL
> and the read_lock() fails, so I could go ahead testing it:

This is disgusting, let's just disallow console output from such
locations.  Defer them to a workqueue if their output is so critical.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-27 20:35     ` [Bridge] " David Miller
@ 2010-05-27 21:25       ` Flavio Leitner
  -1 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-27 21:25 UTC (permalink / raw)
  To: David Miller
  Cc: amwang, linux-kernel, mpm, netdev, bridge, gospo, nhorman,
	jmoyer, shemminger, bonding-devel, fubar

On Thu, May 27, 2010 at 01:35:59PM -0700, David Miller wrote:
> From: Flavio Leitner <fbl@sysclose.org>
> Date: Thu, 27 May 2010 15:05:45 -0300
> 
> > I did the following patch to discard the packet if it was IN_NETPOLL
> > and the read_lock() fails, so I could go ahead testing it:
> 
> This is disgusting, let's just disallow console output from such
> locations.  Defer them to a workqueue if their output is so critical.

I did that patch just to see the backtrace in the serial console
and to keep testing it. It's not a solution at all.

Just to be clear, the second problem isn't related to that patch
and the console message is already in a workqueue.

-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-27 21:25       ` Flavio Leitner
  0 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-27 21:25 UTC (permalink / raw)
  To: David Miller
  Cc: fubar, amwang, nhorman, netdev, mpm, bridge, linux-kernel,
	jmoyer, gospo, bonding-devel

On Thu, May 27, 2010 at 01:35:59PM -0700, David Miller wrote:
> From: Flavio Leitner <fbl@sysclose.org>
> Date: Thu, 27 May 2010 15:05:45 -0300
> 
> > I did the following patch to discard the packet if it was IN_NETPOLL
> > and the read_lock() fails, so I could go ahead testing it:
> 
> This is disgusting, let's just disallow console output from such
> locations.  Defer them to a workqueue if their output is so critical.

I did that patch just to see the backtrace in the serial console
and to keep testing it. It's not a solution at all.

Just to be clear, the second problem isn't related to that patch
and the console message is already in a workqueue.

-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-27 18:05   ` [Bridge] " Flavio Leitner
@ 2010-05-28  2:47     ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-28  2:47 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

On 05/28/10 02:05, Flavio Leitner wrote:
>
> Hi guys!
>
> I finally could test this to see if an old problem reported on bugzilla[1] was
> fixed now, but unfortunately it is still there.
>
> The ticket is private I guess, but basically the problem happens when bonding
> driver tries to print something after it had taken the write_lock (monitor
> functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> on bonding again which no matter what mode you use, it will try to read_lock()
> the lock again. The result is a deadlock and the entire system hangs.


This is true, I already fixed some similar issues.

>
> I manage to get a fresh backtrace with mode 1, see below:
>
>
> [   93.167079] Call Trace:
> [   93.167079]  [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f
> [   93.167079]  [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e
> [   93.167079]  [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b
> [   93.167079]  [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding]
> ->  read_lock fails
> [   93.167079]  [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding]
> [   93.167079]  [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf
> [   93.167079]  [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3
> [   93.167079]  [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d
> [   93.167079]  [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole]
> [   93.167079]  [<ffffffff81034e65>] __call_console_drivers+0x67/0x79
> [   93.167079]  [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d
> [   93.167079]  [<ffffffff810352d3>] release_console_sem+0x121/0x1d7
> [   93.167079]  [<ffffffff8103590a>] vprintk+0x35d/0x393
> [   93.167079]  [<ffffffff8103f947>] ? add_timer+0x17/0x19
> [   93.167079]  [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9
> [   93.167079]  [<ffffffff81363bb8>] printk+0x3c/0x44
> [   93.167079]  [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding]
> ->  write_locked
> [   93.167079]  [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding]
> [   93.167079]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>
> In this case, the message should be
>      "bonding: bond0: making interface eth0 the new active one"


Hmm, you triggered a warning here, let me check the source code
and try to reproduce it here.

>
> I did the following patch to discard the packet if it was IN_NETPOLL
> and the read_lock() fails, so I could go ahead testing it:
>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 5e12462..a3b8bad 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
>   	struct bonding *bond = netdev_priv(bond_dev);
>   	int res = 1;
>
> -	read_lock(&bond->lock);
> -	read_lock(&bond->curr_slave_lock);
> +	if (read_trylock(&bond->lock) == 0&&
> +		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> +			dev_kfree_skb(skb);
> +			return NETDEV_TX_OK;
> +	}
> +
> +	if (read_trylock(&bond->curr_slave_lock) == 0&&
> +		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> +			read_unlock(&bond->lock);
> +			dev_kfree_skb(skb);
> +			return NETDEV_TX_OK;
> +	}
> +			
>
>   	if (!BOND_IS_OK(bond))
>   		goto out;
>


This looks like a workaround, not a fix. :)

>
> and I found another problem.  The function netpoll_send_skb() checks
> if the npinfo's queue length is zero and if it's not, it will queue
> the packet to make sure it's in order and then schedule the thread
> to run. Later, the thread wakes up running queue_process() which disables
> interrupts before calling ndo_start_xmit().  However, dev_queue_xmit()
> uses rcu_*_bh() and before return, it will enable the interrupts again,
> spitting this:
>
> ------------[ cut here ]------------
> WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86()
> Hardware name: Precision WorkStation 490
> Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6]
> Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21
> Call Trace:
>   [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f
>   [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11
>   [<ffffffff8103d691>] local_bh_enable+0x3c/0x86
>   [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493
>   [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding]
>   [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding]
> ->  interrupts disabled
>   [<ffffffff812f3fca>] queue_process+0x9d/0xf9
>   [<ffffffff8104d022>] worker_thread+0x19d/0x224
>   [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9
>   [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34
>   [<ffffffff8104ce85>] ? worker_thread+0x0/0x224
>   [<ffffffff8105040b>] kthread+0x7a/0x82
>   [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10
>   [<ffffffff81050391>] ? kthread+0x0/0x82
>   [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10
> ---[ end trace 74e3904503fdb632 ]---
>
> kernel/softirq.c:
> 141 static inline void _local_bh_enable_ip(unsigned long ip)
> 142 {
> 143         WARN_ON_ONCE(in_irq() || irqs_disabled());
> 144 #ifdef CONFIG_TRACE_IRQFLAGS
> 145         local_irq_disable();
> 146 #endif
> 147         /*
> 148          * Are softirqs going to be turned on now:
> 149          */
>
>

I am wondering if this was caused by the previous issue.


> The git is updated up to:
>    d938a70 be2net: increase POST timeout for EEH recovery
>
> Two slave interfaces, bonding mode 1, netconsole over bond0.
>
> [1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5

How did you reproduce this?
I will check that BZ to see if I can find how to reproduce this.

Thanks.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-28  2:47     ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-28  2:47 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

On 05/28/10 02:05, Flavio Leitner wrote:
>
> Hi guys!
>
> I finally could test this to see if an old problem reported on bugzilla[1] was
> fixed now, but unfortunately it is still there.
>
> The ticket is private I guess, but basically the problem happens when bonding
> driver tries to print something after it had taken the write_lock (monitor
> functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> on bonding again which no matter what mode you use, it will try to read_lock()
> the lock again. The result is a deadlock and the entire system hangs.


This is true, I already fixed some similar issues.

>
> I manage to get a fresh backtrace with mode 1, see below:
>
>
> [   93.167079] Call Trace:
> [   93.167079]  [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f
> [   93.167079]  [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e
> [   93.167079]  [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b
> [   93.167079]  [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding]
> ->  read_lock fails
> [   93.167079]  [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding]
> [   93.167079]  [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf
> [   93.167079]  [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3
> [   93.167079]  [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d
> [   93.167079]  [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole]
> [   93.167079]  [<ffffffff81034e65>] __call_console_drivers+0x67/0x79
> [   93.167079]  [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d
> [   93.167079]  [<ffffffff810352d3>] release_console_sem+0x121/0x1d7
> [   93.167079]  [<ffffffff8103590a>] vprintk+0x35d/0x393
> [   93.167079]  [<ffffffff8103f947>] ? add_timer+0x17/0x19
> [   93.167079]  [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9
> [   93.167079]  [<ffffffff81363bb8>] printk+0x3c/0x44
> [   93.167079]  [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding]
> ->  write_locked
> [   93.167079]  [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding]
> [   93.167079]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>
> In this case, the message should be
>      "bonding: bond0: making interface eth0 the new active one"


Hmm, you triggered a warning here, let me check the source code
and try to reproduce it here.

>
> I did the following patch to discard the packet if it was IN_NETPOLL
> and the read_lock() fails, so I could go ahead testing it:
>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 5e12462..a3b8bad 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
>   	struct bonding *bond = netdev_priv(bond_dev);
>   	int res = 1;
>
> -	read_lock(&bond->lock);
> -	read_lock(&bond->curr_slave_lock);
> +	if (read_trylock(&bond->lock) == 0&&
> +		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> +			dev_kfree_skb(skb);
> +			return NETDEV_TX_OK;
> +	}
> +
> +	if (read_trylock(&bond->curr_slave_lock) == 0&&
> +		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> +			read_unlock(&bond->lock);
> +			dev_kfree_skb(skb);
> +			return NETDEV_TX_OK;
> +	}
> +			
>
>   	if (!BOND_IS_OK(bond))
>   		goto out;
>


This looks like a workaround, not a fix. :)

>
> and I found another problem.  The function netpoll_send_skb() checks
> if the npinfo's queue length is zero and if it's not, it will queue
> the packet to make sure it's in order and then schedule the thread
> to run. Later, the thread wakes up running queue_process() which disables
> interrupts before calling ndo_start_xmit().  However, dev_queue_xmit()
> uses rcu_*_bh() and before return, it will enable the interrupts again,
> spitting this:
>
> ------------[ cut here ]------------
> WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86()
> Hardware name: Precision WorkStation 490
> Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6]
> Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21
> Call Trace:
>   [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f
>   [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11
>   [<ffffffff8103d691>] local_bh_enable+0x3c/0x86
>   [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493
>   [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding]
>   [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding]
> ->  interrupts disabled
>   [<ffffffff812f3fca>] queue_process+0x9d/0xf9
>   [<ffffffff8104d022>] worker_thread+0x19d/0x224
>   [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9
>   [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34
>   [<ffffffff8104ce85>] ? worker_thread+0x0/0x224
>   [<ffffffff8105040b>] kthread+0x7a/0x82
>   [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10
>   [<ffffffff81050391>] ? kthread+0x0/0x82
>   [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10
> ---[ end trace 74e3904503fdb632 ]---
>
> kernel/softirq.c:
> 141 static inline void _local_bh_enable_ip(unsigned long ip)
> 142 {
> 143         WARN_ON_ONCE(in_irq() || irqs_disabled());
> 144 #ifdef CONFIG_TRACE_IRQFLAGS
> 145         local_irq_disable();
> 146 #endif
> 147         /*
> 148          * Are softirqs going to be turned on now:
> 149          */
>
>

I am wondering if this was caused by the previous issue.


> The git is updated up to:
>    d938a70 be2net: increase POST timeout for EEH recovery
>
> Two slave interfaces, bonding mode 1, netconsole over bond0.
>
> [1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5

How did you reproduce this?
I will check that BZ to see if I can find how to reproduce this.

Thanks.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-27 18:05   ` [Bridge] " Flavio Leitner
@ 2010-05-28  8:16     ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-28  8:16 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

[-- Attachment #1: Type: text/plain, Size: 805 bytes --]

On 05/28/10 02:05, Flavio Leitner wrote:
>
> Hi guys!
>
> I finally could test this to see if an old problem reported on bugzilla[1] was
> fixed now, but unfortunately it is still there.
>
> The ticket is private I guess, but basically the problem happens when bonding
> driver tries to print something after it had taken the write_lock (monitor
> functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> on bonding again which no matter what mode you use, it will try to read_lock()
> the lock again. The result is a deadlock and the entire system hangs.
>

Does the attached patch fix this hang?

Thanks!

----------------------->

We should notify netconsole that bond is changing its slaves
when we use active-backup mode.

Signed-off-by: WANG Cong <amwang@redhat.com>

----


[-- Attachment #2: drivers-net-bonding-fix-activebackup-deadlock.diff --]
[-- Type: text/x-patch, Size: 898 bytes --]

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e12462..9494c02 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
 
 	best_slave = bond_find_best_slave(bond);
 	if (best_slave != bond->curr_active_slave) {
+		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
 		bond_change_active_slave(bond, best_slave);
 		rv = bond_set_carrier(bond);
 		if (!rv)
@@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
 	    (old_active) &&
 	    (new_active->link == BOND_LINK_UP) &&
 	    IS_UP(new_active->dev)) {
+		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
 		write_lock_bh(&bond->curr_slave_lock);
 		bond_change_active_slave(bond, new_active);
 		write_unlock_bh(&bond->curr_slave_lock);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-28  8:16     ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-28  8:16 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

[-- Attachment #1: Type: text/plain, Size: 805 bytes --]

On 05/28/10 02:05, Flavio Leitner wrote:
>
> Hi guys!
>
> I finally could test this to see if an old problem reported on bugzilla[1] was
> fixed now, but unfortunately it is still there.
>
> The ticket is private I guess, but basically the problem happens when bonding
> driver tries to print something after it had taken the write_lock (monitor
> functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> on bonding again which no matter what mode you use, it will try to read_lock()
> the lock again. The result is a deadlock and the entire system hangs.
>

Does the attached patch fix this hang?

Thanks!

----------------------->

We should notify netconsole that bond is changing its slaves
when we use active-backup mode.

Signed-off-by: WANG Cong <amwang@redhat.com>

----


[-- Attachment #2: drivers-net-bonding-fix-activebackup-deadlock.diff --]
[-- Type: text/x-patch, Size: 898 bytes --]

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e12462..9494c02 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
 
 	best_slave = bond_find_best_slave(bond);
 	if (best_slave != bond->curr_active_slave) {
+		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
 		bond_change_active_slave(bond, best_slave);
 		rv = bond_set_carrier(bond);
 		if (!rv)
@@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
 	    (old_active) &&
 	    (new_active->link == BOND_LINK_UP) &&
 	    IS_UP(new_active->dev)) {
+		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
 		write_lock_bh(&bond->curr_slave_lock);
 		bond_change_active_slave(bond, new_active);
 		write_unlock_bh(&bond->curr_slave_lock);

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-28  2:47     ` [Bridge] " Cong Wang
@ 2010-05-28 19:40       ` Flavio Leitner
  -1 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-28 19:40 UTC (permalink / raw)
  To: Cong Wang
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

On Fri, May 28, 2010 at 10:47:01AM +0800, Cong Wang wrote:
> On 05/28/10 02:05, Flavio Leitner wrote:
> >
> >Hi guys!
> >
> >I finally could test this to see if an old problem reported on bugzilla[1] was
> >fixed now, but unfortunately it is still there.
> >
> >The ticket is private I guess, but basically the problem happens when bonding
> >driver tries to print something after it had taken the write_lock (monitor
> >functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> >on bonding again which no matter what mode you use, it will try to read_lock()
> >the lock again. The result is a deadlock and the entire system hangs.
> 
> 
> This is true, I already fixed some similar issues.
> 
> >
> >I manage to get a fresh backtrace with mode 1, see below:
> >
> >
> >[   93.167079] Call Trace:
> >[   93.167079]  [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f
> >[   93.167079]  [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e
> >[   93.167079]  [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b
> >[   93.167079]  [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding]
> >->  read_lock fails
> >[   93.167079]  [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding]
> >[   93.167079]  [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf
> >[   93.167079]  [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3
> >[   93.167079]  [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d
> >[   93.167079]  [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole]
> >[   93.167079]  [<ffffffff81034e65>] __call_console_drivers+0x67/0x79
> >[   93.167079]  [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d
> >[   93.167079]  [<ffffffff810352d3>] release_console_sem+0x121/0x1d7
> >[   93.167079]  [<ffffffff8103590a>] vprintk+0x35d/0x393
> >[   93.167079]  [<ffffffff8103f947>] ? add_timer+0x17/0x19
> >[   93.167079]  [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9
> >[   93.167079]  [<ffffffff81363bb8>] printk+0x3c/0x44
> >[   93.167079]  [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding]
> >->  write_locked
> >[   93.167079]  [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding]
> >[   93.167079]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
> >
> >In this case, the message should be
> >     "bonding: bond0: making interface eth0 the new active one"
> 
> 
> Hmm, you triggered a warning here, let me check the source code
> and try to reproduce it here.

Okay, just pull the cable from one slave or both slaves.



> >I did the following patch to discard the packet if it was IN_NETPOLL
> >and the read_lock() fails, so I could go ahead testing it:
> >
> >diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> >index 5e12462..a3b8bad 100644
> >--- a/drivers/net/bonding/bond_main.c
> >+++ b/drivers/net/bonding/bond_main.c
> >@@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
> >  	struct bonding *bond = netdev_priv(bond_dev);
> >  	int res = 1;
> >
> >-	read_lock(&bond->lock);
> >-	read_lock(&bond->curr_slave_lock);
> >+	if (read_trylock(&bond->lock) == 0&&
> >+		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> >+			dev_kfree_skb(skb);
> >+			return NETDEV_TX_OK;
> >+	}
> >+
> >+	if (read_trylock(&bond->curr_slave_lock) == 0&&
> >+		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> >+			read_unlock(&bond->lock);
> >+			dev_kfree_skb(skb);
> >+			return NETDEV_TX_OK;
> >+	}
> >+			
> >
> >  	if (!BOND_IS_OK(bond))
> >  		goto out;
> >
> 
> 
> This looks like a workaround, not a fix. :)

No, it's a debugging patch, otherwise I couldn't see anything on
serial console when it deadlocks or to keep testing it. :)


> >and I found another problem.  The function netpoll_send_skb() checks
> >if the npinfo's queue length is zero and if it's not, it will queue
> >the packet to make sure it's in order and then schedule the thread
> >to run. Later, the thread wakes up running queue_process() which disables
> >interrupts before calling ndo_start_xmit().  However, dev_queue_xmit()
> >uses rcu_*_bh() and before return, it will enable the interrupts again,
> >spitting this:
> >
> >------------[ cut here ]------------
> >WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86()
> >Hardware name: Precision WorkStation 490
> >Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6]
> >Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21
> >Call Trace:
> >  [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f
> >  [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11
> >  [<ffffffff8103d691>] local_bh_enable+0x3c/0x86
> >  [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493
> >  [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding]
> >  [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding]
> >->  interrupts disabled
> >  [<ffffffff812f3fca>] queue_process+0x9d/0xf9
> >  [<ffffffff8104d022>] worker_thread+0x19d/0x224
> >  [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9
> >  [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34
> >  [<ffffffff8104ce85>] ? worker_thread+0x0/0x224
> >  [<ffffffff8105040b>] kthread+0x7a/0x82
> >  [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10
> >  [<ffffffff81050391>] ? kthread+0x0/0x82
> >  [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10
> >---[ end trace 74e3904503fdb632 ]---
> >
> >kernel/softirq.c:
> >141 static inline void _local_bh_enable_ip(unsigned long ip)
> >142 {
> >143         WARN_ON_ONCE(in_irq() || irqs_disabled());
> >144 #ifdef CONFIG_TRACE_IRQFLAGS
> >145         local_irq_disable();
> >146 #endif
> >147         /*
> >148          * Are softirqs going to be turned on now:
> >149          */
> >
> >
> 
> I am wondering if this was caused by the previous issue.

Yeah, the first problem can help to trigger this.
The spinlock target_list_lock in write_msg() seems to serialize
this, but I'm not sure yet if the queue will always be empty by
the time another call to write_msg() is made because the 
netpoll_send_skb() will queue the packet if it fails to send.


> >The git is updated up to:
> >   d938a70 be2net: increase POST timeout for EEH recovery
> >
> >Two slave interfaces, bonding mode 1, netconsole over bond0.
> >
> >[1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5
> 
> How did you reproduce this?
> I will check that BZ to see if I can find how to reproduce this.
> 
> Thanks.
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-28 19:40       ` Flavio Leitner
  0 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-28 19:40 UTC (permalink / raw)
  To: Cong Wang
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

On Fri, May 28, 2010 at 10:47:01AM +0800, Cong Wang wrote:
> On 05/28/10 02:05, Flavio Leitner wrote:
> >
> >Hi guys!
> >
> >I finally could test this to see if an old problem reported on bugzilla[1] was
> >fixed now, but unfortunately it is still there.
> >
> >The ticket is private I guess, but basically the problem happens when bonding
> >driver tries to print something after it had taken the write_lock (monitor
> >functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> >on bonding again which no matter what mode you use, it will try to read_lock()
> >the lock again. The result is a deadlock and the entire system hangs.
> 
> 
> This is true, I already fixed some similar issues.
> 
> >
> >I manage to get a fresh backtrace with mode 1, see below:
> >
> >
> >[   93.167079] Call Trace:
> >[   93.167079]  [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f
> >[   93.167079]  [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e
> >[   93.167079]  [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b
> >[   93.167079]  [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding]
> >->  read_lock fails
> >[   93.167079]  [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding]
> >[   93.167079]  [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf
> >[   93.167079]  [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3
> >[   93.167079]  [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d
> >[   93.167079]  [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole]
> >[   93.167079]  [<ffffffff81034e65>] __call_console_drivers+0x67/0x79
> >[   93.167079]  [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d
> >[   93.167079]  [<ffffffff810352d3>] release_console_sem+0x121/0x1d7
> >[   93.167079]  [<ffffffff8103590a>] vprintk+0x35d/0x393
> >[   93.167079]  [<ffffffff8103f947>] ? add_timer+0x17/0x19
> >[   93.167079]  [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9
> >[   93.167079]  [<ffffffff81363bb8>] printk+0x3c/0x44
> >[   93.167079]  [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding]
> >->  write_locked
> >[   93.167079]  [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding]
> >[   93.167079]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
> >
> >In this case, the message should be
> >     "bonding: bond0: making interface eth0 the new active one"
> 
> 
> Hmm, you triggered a warning here, let me check the source code
> and try to reproduce it here.

Okay, just pull the cable from one slave or both slaves.



> >I did the following patch to discard the packet if it was IN_NETPOLL
> >and the read_lock() fails, so I could go ahead testing it:
> >
> >diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> >index 5e12462..a3b8bad 100644
> >--- a/drivers/net/bonding/bond_main.c
> >+++ b/drivers/net/bonding/bond_main.c
> >@@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
> >  	struct bonding *bond = netdev_priv(bond_dev);
> >  	int res = 1;
> >
> >-	read_lock(&bond->lock);
> >-	read_lock(&bond->curr_slave_lock);
> >+	if (read_trylock(&bond->lock) == 0&&
> >+		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> >+			dev_kfree_skb(skb);
> >+			return NETDEV_TX_OK;
> >+	}
> >+
> >+	if (read_trylock(&bond->curr_slave_lock) == 0&&
> >+		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> >+			read_unlock(&bond->lock);
> >+			dev_kfree_skb(skb);
> >+			return NETDEV_TX_OK;
> >+	}
> >+			
> >
> >  	if (!BOND_IS_OK(bond))
> >  		goto out;
> >
> 
> 
> This looks like a workaround, not a fix. :)

No, it's a debugging patch, otherwise I couldn't see anything on
serial console when it deadlocks or to keep testing it. :)


> >and I found another problem.  The function netpoll_send_skb() checks
> >if the npinfo's queue length is zero and if it's not, it will queue
> >the packet to make sure it's in order and then schedule the thread
> >to run. Later, the thread wakes up running queue_process() which disables
> >interrupts before calling ndo_start_xmit().  However, dev_queue_xmit()
> >uses rcu_*_bh() and before return, it will enable the interrupts again,
> >spitting this:
> >
> >------------[ cut here ]------------
> >WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86()
> >Hardware name: Precision WorkStation 490
> >Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6]
> >Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21
> >Call Trace:
> >  [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f
> >  [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11
> >  [<ffffffff8103d691>] local_bh_enable+0x3c/0x86
> >  [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493
> >  [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding]
> >  [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding]
> >->  interrupts disabled
> >  [<ffffffff812f3fca>] queue_process+0x9d/0xf9
> >  [<ffffffff8104d022>] worker_thread+0x19d/0x224
> >  [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9
> >  [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34
> >  [<ffffffff8104ce85>] ? worker_thread+0x0/0x224
> >  [<ffffffff8105040b>] kthread+0x7a/0x82
> >  [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10
> >  [<ffffffff81050391>] ? kthread+0x0/0x82
> >  [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10
> >---[ end trace 74e3904503fdb632 ]---
> >
> >kernel/softirq.c:
> >141 static inline void _local_bh_enable_ip(unsigned long ip)
> >142 {
> >143         WARN_ON_ONCE(in_irq() || irqs_disabled());
> >144 #ifdef CONFIG_TRACE_IRQFLAGS
> >145         local_irq_disable();
> >146 #endif
> >147         /*
> >148          * Are softirqs going to be turned on now:
> >149          */
> >
> >
> 
> I am wondering if this was caused by the previous issue.

Yeah, the first problem can help to trigger this.
The spinlock target_list_lock in write_msg() seems to serialize
this, but I'm not sure yet if the queue will always be empty by
the time another call to write_msg() is made because the 
netpoll_send_skb() will queue the packet if it fails to send.


> >The git is updated up to:
> >   d938a70 be2net: increase POST timeout for EEH recovery
> >
> >Two slave interfaces, bonding mode 1, netconsole over bond0.
> >
> >[1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5
> 
> How did you reproduce this?
> I will check that BZ to see if I can find how to reproduce this.
> 
> Thanks.
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-28  8:16     ` [Bridge] " Cong Wang
@ 2010-05-28 20:42       ` Flavio Leitner
  -1 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-28 20:42 UTC (permalink / raw)
  To: Cong Wang
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
> On 05/28/10 02:05, Flavio Leitner wrote:
> >
> >Hi guys!
> >
> >I finally could test this to see if an old problem reported on bugzilla[1] was
> >fixed now, but unfortunately it is still there.
> >
> >The ticket is private I guess, but basically the problem happens when bonding
> >driver tries to print something after it had taken the write_lock (monitor
> >functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> >on bonding again which no matter what mode you use, it will try to read_lock()
> >the lock again. The result is a deadlock and the entire system hangs.
> >
> 
> Does the attached patch fix this hang?

I got another issue now:

[   89.523062] bonding: bond0: enslaving eth0 as a backup interface with a down link.
[   89.580746] bonding: bond0: enslaving eth2 as a backup interface with a down link.
[   91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow Control: None
[   91.238245] bonding: bond0: link status definitely up for interface eth2.

[   91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
[   91.251565] 5 locks held by bond0/2716:
[   91.255663]  #0:  ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
[   91.265179]  #1:  ((&(&bond->mii_work)->work)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
[   91.275554]  #2:  (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>] rtnl_lock+0x12/0x14
[   91.284018]  #3:  (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>] bond_mii_monitor+0x2a2/0x4ed [bonding]
[   91.294230]  #4:  (&bond->curr_slave_lock){+...+.}, at: [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
[   91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last unloaded: mperf]
[   91.357735] Pid: 2716, comm: bond0 Not tainted 2.6.34-04700-gd938a70-dirty #36
[   91.371112] Call Trace:
[   91.373825]  [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
[   91.380530]  [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
[   91.386284]  [<ffffffff81363f6e>] schedule+0xc9/0x791
[   91.391600]  [<ffffffff81032540>] __cond_resched+0x25/0x30
[   91.397350]  [<ffffffff81364757>] _cond_resched+0x27/0x32
[   91.403013]  [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
[   91.408936]  [<ffffffff812c61fd>] skb_clone+0x42/0x5d
[   91.414253]  [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
[   91.420436]  [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
[   91.426012]  [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
[   91.431501]  [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
[   91.437165]  [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
[   91.443003]  [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
[   91.449188]  [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
[   91.455634]  [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
[   91.462253]  [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14
[   91.468614]  [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123 [bonding]
[   91.476408]  [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
[   91.483375]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
[   91.489212]  [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
[   91.495227]  [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
[   91.502192]  [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
[   91.508897]  [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
[   91.514734]  [<ffffffff810498bb>] kthread+0x7a/0x82
[   91.519878]  [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
[   91.526060]  [<ffffffff81366ffc>] ? restore_args+0x0/0x30
[   91.531723]  [<ffffffff81049841>] ? kthread+0x0/0x82
[   91.536953]  [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
[   91.543343] bonding: bond0: making interface eth2 the new active one.
[   91.550554] bonding: bond0: first active interface up!
[   91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready


No other patch applied. Just started netconsole over bonding, so no need
to pull the cable from slaves. Reproduced twice, one I got the
backtrace above, and on the other one the system hangs completely 
after the BUG: scheduling message.

fbl


> 
> Thanks!
> 
> ----------------------->
> 
> We should notify netconsole that bond is changing its slaves
> when we use active-backup mode.
> 
> Signed-off-by: WANG Cong <amwang@redhat.com>
> 
> ----
> 

> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 5e12462..9494c02 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
>  
>  	best_slave = bond_find_best_slave(bond);
>  	if (best_slave != bond->curr_active_slave) {
> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>  		bond_change_active_slave(bond, best_slave);
>  		rv = bond_set_carrier(bond);
>  		if (!rv)
> @@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
>  	    (old_active) &&
>  	    (new_active->link == BOND_LINK_UP) &&
>  	    IS_UP(new_active->dev)) {
> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>  		write_lock_bh(&bond->curr_slave_lock);
>  		bond_change_active_slave(bond, new_active);
>  		write_unlock_bh(&bond->curr_slave_lock);


-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-28 20:42       ` Flavio Leitner
  0 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-28 20:42 UTC (permalink / raw)
  To: Cong Wang
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
> On 05/28/10 02:05, Flavio Leitner wrote:
> >
> >Hi guys!
> >
> >I finally could test this to see if an old problem reported on bugzilla[1] was
> >fixed now, but unfortunately it is still there.
> >
> >The ticket is private I guess, but basically the problem happens when bonding
> >driver tries to print something after it had taken the write_lock (monitor
> >functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> >on bonding again which no matter what mode you use, it will try to read_lock()
> >the lock again. The result is a deadlock and the entire system hangs.
> >
> 
> Does the attached patch fix this hang?

I got another issue now:

[   89.523062] bonding: bond0: enslaving eth0 as a backup interface with a down link.
[   89.580746] bonding: bond0: enslaving eth2 as a backup interface with a down link.
[   91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow Control: None
[   91.238245] bonding: bond0: link status definitely up for interface eth2.

[   91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
[   91.251565] 5 locks held by bond0/2716:
[   91.255663]  #0:  ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
[   91.265179]  #1:  ((&(&bond->mii_work)->work)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
[   91.275554]  #2:  (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>] rtnl_lock+0x12/0x14
[   91.284018]  #3:  (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>] bond_mii_monitor+0x2a2/0x4ed [bonding]
[   91.294230]  #4:  (&bond->curr_slave_lock){+...+.}, at: [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
[   91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last unloaded: mperf]
[   91.357735] Pid: 2716, comm: bond0 Not tainted 2.6.34-04700-gd938a70-dirty #36
[   91.371112] Call Trace:
[   91.373825]  [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
[   91.380530]  [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
[   91.386284]  [<ffffffff81363f6e>] schedule+0xc9/0x791
[   91.391600]  [<ffffffff81032540>] __cond_resched+0x25/0x30
[   91.397350]  [<ffffffff81364757>] _cond_resched+0x27/0x32
[   91.403013]  [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
[   91.408936]  [<ffffffff812c61fd>] skb_clone+0x42/0x5d
[   91.414253]  [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
[   91.420436]  [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
[   91.426012]  [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
[   91.431501]  [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
[   91.437165]  [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
[   91.443003]  [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
[   91.449188]  [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
[   91.455634]  [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
[   91.462253]  [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14
[   91.468614]  [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123 [bonding]
[   91.476408]  [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
[   91.483375]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
[   91.489212]  [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
[   91.495227]  [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
[   91.502192]  [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
[   91.508897]  [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
[   91.514734]  [<ffffffff810498bb>] kthread+0x7a/0x82
[   91.519878]  [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
[   91.526060]  [<ffffffff81366ffc>] ? restore_args+0x0/0x30
[   91.531723]  [<ffffffff81049841>] ? kthread+0x0/0x82
[   91.536953]  [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
[   91.543343] bonding: bond0: making interface eth2 the new active one.
[   91.550554] bonding: bond0: first active interface up!
[   91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready


No other patch applied. Just started netconsole over bonding, so no need
to pull the cable from slaves. Reproduced twice, one I got the
backtrace above, and on the other one the system hangs completely 
after the BUG: scheduling message.

fbl


> 
> Thanks!
> 
> ----------------------->
> 
> We should notify netconsole that bond is changing its slaves
> when we use active-backup mode.
> 
> Signed-off-by: WANG Cong <amwang@redhat.com>
> 
> ----
> 

> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 5e12462..9494c02 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
>  
>  	best_slave = bond_find_best_slave(bond);
>  	if (best_slave != bond->curr_active_slave) {
> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>  		bond_change_active_slave(bond, best_slave);
>  		rv = bond_set_carrier(bond);
>  		if (!rv)
> @@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
>  	    (old_active) &&
>  	    (new_active->link == BOND_LINK_UP) &&
>  	    IS_UP(new_active->dev)) {
> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>  		write_lock_bh(&bond->curr_slave_lock);
>  		bond_change_active_slave(bond, new_active);
>  		write_unlock_bh(&bond->curr_slave_lock);


-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-28 20:42       ` [Bridge] " Flavio Leitner
@ 2010-05-28 21:03         ` Jay Vosburgh
  -1 siblings, 0 replies; 73+ messages in thread
From: Jay Vosburgh @ 2010-05-28 21:03 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: Cong Wang, linux-kernel, Matt Mackall, netdev, bridge,
	Andy Gospodarek, Neil Horman, Jeff Moyer, Stephen Hemminger,
	bonding-devel, David Miller

Flavio Leitner <fbl@sysclose.org> wrote:

>On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
>> On 05/28/10 02:05, Flavio Leitner wrote:
>> >
>> >Hi guys!
>> >
>> >I finally could test this to see if an old problem reported on bugzilla[1] was
>> >fixed now, but unfortunately it is still there.
>> >
>> >The ticket is private I guess, but basically the problem happens when bonding
>> >driver tries to print something after it had taken the write_lock (monitor
>> >functions, enslave/de-enslave), so the printk() will pass through netpoll, then
>> >on bonding again which no matter what mode you use, it will try to read_lock()
>> >the lock again. The result is a deadlock and the entire system hangs.
>> >
>> 
>> Does the attached patch fix this hang?
>
>I got another issue now:
>
>[   89.523062] bonding: bond0: enslaving eth0 as a backup interface with a down link.
>[   89.580746] bonding: bond0: enslaving eth2 as a backup interface with a down link.
>[   91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow Control: None
>[   91.238245] bonding: bond0: link status definitely up for interface eth2.
>
>[   91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
>[   91.251565] 5 locks held by bond0/2716:
>[   91.255663]  #0:  ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>[   91.265179]  #1:  ((&(&bond->mii_work)->work)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>[   91.275554]  #2:  (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>] rtnl_lock+0x12/0x14
>[   91.284018]  #3:  (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>] bond_mii_monitor+0x2a2/0x4ed [bonding]
>[   91.294230]  #4:  (&bond->curr_slave_lock){+...+.}, at: [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
>[   91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last unloaded: mperf]
>[   91.357735] Pid: 2716, comm: bond0 Not tainted 2.6.34-04700-gd938a70-dirty #36
>[   91.371112] Call Trace:
>[   91.373825]  [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
>[   91.380530]  [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
>[   91.386284]  [<ffffffff81363f6e>] schedule+0xc9/0x791
>[   91.391600]  [<ffffffff81032540>] __cond_resched+0x25/0x30
>[   91.397350]  [<ffffffff81364757>] _cond_resched+0x27/0x32
>[   91.403013]  [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
>[   91.408936]  [<ffffffff812c61fd>] skb_clone+0x42/0x5d
>[   91.414253]  [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
>[   91.420436]  [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
>[   91.426012]  [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
>[   91.431501]  [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
>[   91.437165]  [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
>[   91.443003]  [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
>[   91.449188]  [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
>[   91.455634]  [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
>[   91.462253]  [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14

	This warning is because the notifier call is happening with spin
locks held.

>[   91.468614]  [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123 [bonding]
>[   91.476408]  [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
>[   91.483375]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>[   91.489212]  [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
>[   91.495227]  [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
>[   91.502192]  [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
>[   91.508897]  [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
>[   91.514734]  [<ffffffff810498bb>] kthread+0x7a/0x82
>[   91.519878]  [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
>[   91.526060]  [<ffffffff81366ffc>] ? restore_args+0x0/0x30
>[   91.531723]  [<ffffffff81049841>] ? kthread+0x0/0x82
>[   91.536953]  [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
>[   91.543343] bonding: bond0: making interface eth2 the new active one.
>[   91.550554] bonding: bond0: first active interface up!
>[   91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
>
>
>No other patch applied. Just started netconsole over bonding, so no need
>to pull the cable from slaves. Reproduced twice, one I got the
>backtrace above, and on the other one the system hangs completely 
>after the BUG: scheduling message.
>
>fbl
>
>
>> 
>> Thanks!
>> 
>> ----------------------->
>> 
>> We should notify netconsole that bond is changing its slaves
>> when we use active-backup mode.
>> 
>> Signed-off-by: WANG Cong <amwang@redhat.com>
>> 
>> ----
>> 
>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 5e12462..9494c02 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
>>  
>>  	best_slave = bond_find_best_slave(bond);
>>  	if (best_slave != bond->curr_active_slave) {
>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>  		bond_change_active_slave(bond, best_slave);
>>  		rv = bond_set_carrier(bond);
>>  		if (!rv)

	You can't do this here; the driver is holding various spin
locks, and notifier calls can sleep (hence the warning).  If you look at
the bond_change_active_slave function, it drops all locks other than
RTNL before making a notifier call, e.g.,

void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
{
[...]
	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
[...]	
			write_unlock_bh(&bond->curr_slave_lock);
			read_unlock(&bond->lock);

			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);

			read_lock(&bond->lock);
			write_lock_bh(&bond->curr_slave_lock);
		}


	You may be able to add your notifier to this case, or change
your handler to notice the _FAILOVER notifier.

>> @@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
>>  	    (old_active) &&
>>  	    (new_active->link == BOND_LINK_UP) &&
>>  	    IS_UP(new_active->dev)) {
>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>  		write_lock_bh(&bond->curr_slave_lock);
>>  		bond_change_active_slave(bond, new_active);
>>  		write_unlock_bh(&bond->curr_slave_lock);

	This case will have the same problem, but will only be hit if a
user does a manual "ifenslave -c bond0 ethX".

	You also probably wanted to do the sysfs path, but if the
notifier goes into the change_active_slave function itself, then I don't
think additional notifications would be necessary.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-28 21:03         ` Jay Vosburgh
  0 siblings, 0 replies; 73+ messages in thread
From: Jay Vosburgh @ 2010-05-28 21:03 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: Cong Wang, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

Flavio Leitner <fbl@sysclose.org> wrote:

>On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
>> On 05/28/10 02:05, Flavio Leitner wrote:
>> >
>> >Hi guys!
>> >
>> >I finally could test this to see if an old problem reported on bugzilla[1] was
>> >fixed now, but unfortunately it is still there.
>> >
>> >The ticket is private I guess, but basically the problem happens when bonding
>> >driver tries to print something after it had taken the write_lock (monitor
>> >functions, enslave/de-enslave), so the printk() will pass through netpoll, then
>> >on bonding again which no matter what mode you use, it will try to read_lock()
>> >the lock again. The result is a deadlock and the entire system hangs.
>> >
>> 
>> Does the attached patch fix this hang?
>
>I got another issue now:
>
>[   89.523062] bonding: bond0: enslaving eth0 as a backup interface with a down link.
>[   89.580746] bonding: bond0: enslaving eth2 as a backup interface with a down link.
>[   91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow Control: None
>[   91.238245] bonding: bond0: link status definitely up for interface eth2.
>
>[   91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
>[   91.251565] 5 locks held by bond0/2716:
>[   91.255663]  #0:  ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>[   91.265179]  #1:  ((&(&bond->mii_work)->work)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>[   91.275554]  #2:  (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>] rtnl_lock+0x12/0x14
>[   91.284018]  #3:  (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>] bond_mii_monitor+0x2a2/0x4ed [bonding]
>[   91.294230]  #4:  (&bond->curr_slave_lock){+...+.}, at: [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
>[   91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last unloaded: mperf]
>[   91.357735] Pid: 2716, comm: bond0 Not tainted 2.6.34-04700-gd938a70-dirty #36
>[   91.371112] Call Trace:
>[   91.373825]  [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
>[   91.380530]  [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
>[   91.386284]  [<ffffffff81363f6e>] schedule+0xc9/0x791
>[   91.391600]  [<ffffffff81032540>] __cond_resched+0x25/0x30
>[   91.397350]  [<ffffffff81364757>] _cond_resched+0x27/0x32
>[   91.403013]  [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
>[   91.408936]  [<ffffffff812c61fd>] skb_clone+0x42/0x5d
>[   91.414253]  [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
>[   91.420436]  [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
>[   91.426012]  [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
>[   91.431501]  [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
>[   91.437165]  [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
>[   91.443003]  [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
>[   91.449188]  [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
>[   91.455634]  [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
>[   91.462253]  [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14

	This warning is because the notifier call is happening with spin
locks held.

>[   91.468614]  [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123 [bonding]
>[   91.476408]  [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
>[   91.483375]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>[   91.489212]  [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
>[   91.495227]  [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
>[   91.502192]  [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
>[   91.508897]  [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
>[   91.514734]  [<ffffffff810498bb>] kthread+0x7a/0x82
>[   91.519878]  [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
>[   91.526060]  [<ffffffff81366ffc>] ? restore_args+0x0/0x30
>[   91.531723]  [<ffffffff81049841>] ? kthread+0x0/0x82
>[   91.536953]  [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
>[   91.543343] bonding: bond0: making interface eth2 the new active one.
>[   91.550554] bonding: bond0: first active interface up!
>[   91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
>
>
>No other patch applied. Just started netconsole over bonding, so no need
>to pull the cable from slaves. Reproduced twice, one I got the
>backtrace above, and on the other one the system hangs completely 
>after the BUG: scheduling message.
>
>fbl
>
>
>> 
>> Thanks!
>> 
>> ----------------------->
>> 
>> We should notify netconsole that bond is changing its slaves
>> when we use active-backup mode.
>> 
>> Signed-off-by: WANG Cong <amwang@redhat.com>
>> 
>> ----
>> 
>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 5e12462..9494c02 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
>>  
>>  	best_slave = bond_find_best_slave(bond);
>>  	if (best_slave != bond->curr_active_slave) {
>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>  		bond_change_active_slave(bond, best_slave);
>>  		rv = bond_set_carrier(bond);
>>  		if (!rv)

	You can't do this here; the driver is holding various spin
locks, and notifier calls can sleep (hence the warning).  If you look at
the bond_change_active_slave function, it drops all locks other than
RTNL before making a notifier call, e.g.,

void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
{
[...]
	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
[...]	
			write_unlock_bh(&bond->curr_slave_lock);
			read_unlock(&bond->lock);

			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);

			read_lock(&bond->lock);
			write_lock_bh(&bond->curr_slave_lock);
		}


	You may be able to add your notifier to this case, or change
your handler to notice the _FAILOVER notifier.

>> @@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
>>  	    (old_active) &&
>>  	    (new_active->link == BOND_LINK_UP) &&
>>  	    IS_UP(new_active->dev)) {
>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>  		write_lock_bh(&bond->curr_slave_lock);
>>  		bond_change_active_slave(bond, new_active);
>>  		write_unlock_bh(&bond->curr_slave_lock);

	This case will have the same problem, but will only be hit if a
user does a manual "ifenslave -c bond0 ethX".

	You also probably wanted to do the sysfs path, but if the
notifier goes into the change_active_slave function itself, then I don't
think additional notifications would be necessary.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-28 21:03         ` [Bridge] " Jay Vosburgh
@ 2010-05-31  5:29           ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-31  5:29 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Flavio Leitner, linux-kernel, Matt Mackall, netdev, bridge,
	Andy Gospodarek, Neil Horman, Jeff Moyer, Stephen Hemminger,
	bonding-devel, David Miller

On 05/29/10 05:03, Jay Vosburgh wrote:
> Flavio Leitner<fbl@sysclose.org>  wrote:
>
>> On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
>>> On 05/28/10 02:05, Flavio Leitner wrote:
>>>>
>>>> Hi guys!
>>>>
>>>> I finally could test this to see if an old problem reported on bugzilla[1] was
>>>> fixed now, but unfortunately it is still there.
>>>>
>>>> The ticket is private I guess, but basically the problem happens when bonding
>>>> driver tries to print something after it had taken the write_lock (monitor
>>>> functions, enslave/de-enslave), so the printk() will pass through netpoll, then
>>>> on bonding again which no matter what mode you use, it will try to read_lock()
>>>> the lock again. The result is a deadlock and the entire system hangs.
>>>>
>>>
>>> Does the attached patch fix this hang?
>>
>> I got another issue now:
>>
>> [   89.523062] bonding: bond0: enslaving eth0 as a backup interface with a down link.
>> [   89.580746] bonding: bond0: enslaving eth2 as a backup interface with a down link.
>> [   91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow Control: None
>> [   91.238245] bonding: bond0: link status definitely up for interface eth2.
>>
>> [   91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
>> [   91.251565] 5 locks held by bond0/2716:
>> [   91.255663]  #0:  ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>> [   91.265179]  #1:  ((&(&bond->mii_work)->work)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>> [   91.275554]  #2:  (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>] rtnl_lock+0x12/0x14
>> [   91.284018]  #3:  (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>] bond_mii_monitor+0x2a2/0x4ed [bonding]
>> [   91.294230]  #4:  (&bond->curr_slave_lock){+...+.}, at: [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
>> [   91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last unloaded: mperf]
>> [   91.357735] Pid: 2716, comm: bond0 Not tainted 2.6.34-04700-gd938a70-dirty #36
>> [   91.371112] Call Trace:
>> [   91.373825]  [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
>> [   91.380530]  [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
>> [   91.386284]  [<ffffffff81363f6e>] schedule+0xc9/0x791
>> [   91.391600]  [<ffffffff81032540>] __cond_resched+0x25/0x30
>> [   91.397350]  [<ffffffff81364757>] _cond_resched+0x27/0x32
>> [   91.403013]  [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
>> [   91.408936]  [<ffffffff812c61fd>] skb_clone+0x42/0x5d
>> [   91.414253]  [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
>> [   91.420436]  [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
>> [   91.426012]  [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
>> [   91.431501]  [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
>> [   91.437165]  [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
>> [   91.443003]  [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
>> [   91.449188]  [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
>> [   91.455634]  [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
>> [   91.462253]  [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14
>
> 	This warning is because the notifier call is happening with spin
> locks held.
>
>> [   91.468614]  [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123 [bonding]
>> [   91.476408]  [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
>> [   91.483375]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>> [   91.489212]  [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
>> [   91.495227]  [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
>> [   91.502192]  [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
>> [   91.508897]  [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
>> [   91.514734]  [<ffffffff810498bb>] kthread+0x7a/0x82
>> [   91.519878]  [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
>> [   91.526060]  [<ffffffff81366ffc>] ? restore_args+0x0/0x30
>> [   91.531723]  [<ffffffff81049841>] ? kthread+0x0/0x82
>> [   91.536953]  [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
>> [   91.543343] bonding: bond0: making interface eth2 the new active one.
>> [   91.550554] bonding: bond0: first active interface up!
>> [   91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
>>
>>
>> No other patch applied. Just started netconsole over bonding, so no need
>> to pull the cable from slaves. Reproduced twice, one I got the
>> backtrace above, and on the other one the system hangs completely
>> after the BUG: scheduling message.
>>
>> fbl
>>
>>
>>>
>>> Thanks!
>>>
>>> ----------------------->
>>>
>>> We should notify netconsole that bond is changing its slaves
>>> when we use active-backup mode.
>>>
>>> Signed-off-by: WANG Cong<amwang@redhat.com>
>>>
>>> ----
>>>
>>
>>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>>> index 5e12462..9494c02 100644
>>> --- a/drivers/net/bonding/bond_main.c
>>> +++ b/drivers/net/bonding/bond_main.c
>>> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
>>>
>>>   	best_slave = bond_find_best_slave(bond);
>>>   	if (best_slave != bond->curr_active_slave) {
>>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>>   		bond_change_active_slave(bond, best_slave);
>>>   		rv = bond_set_carrier(bond);
>>>   		if (!rv)
>
> 	You can't do this here; the driver is holding various spin
> locks, and notifier calls can sleep (hence the warning).  If you look at
> the bond_change_active_slave function, it drops all locks other than
> RTNL before making a notifier call, e.g.,
>
> void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> {
> [...]
> 	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
> [...]	
> 			write_unlock_bh(&bond->curr_slave_lock);
> 			read_unlock(&bond->lock);
>
> 			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
>
> 			read_lock(&bond->lock);
> 			write_lock_bh(&bond->curr_slave_lock);
> 		}
>
>
> 	You may be able to add your notifier to this case, or change
> your handler to notice the _FAILOVER notifier.


Thanks for your analysis! Hmm, I think let netconsole to handle
NETDEV_BONDING_FAILOVER here is a better solution.

>
>>> @@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
>>>   	    (old_active)&&
>>>   	(new_active->link == BOND_LINK_UP)&&
>>>   	IS_UP(new_active->dev)) {
>>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>>   		write_lock_bh(&bond->curr_slave_lock);
>>>   		bond_change_active_slave(bond, new_active);
>>>   		write_unlock_bh(&bond->curr_slave_lock);
>
> 	This case will have the same problem, but will only be hit if a
> user does a manual "ifenslave -c bond0 ethX".
>
> 	You also probably wanted to do the sysfs path, but if the
> notifier goes into the change_active_slave function itself, then I don't
> think additional notifications would be necessary.
>

Okay, sounds above solution should also handle this case.

Thanks.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-31  5:29           ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-31  5:29 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Neil Horman, netdev, Matt Mackall, bridge, linux-kernel,
	David Miller, Flavio Leitner, Jeff Moyer, Andy Gospodarek,
	bonding-devel

On 05/29/10 05:03, Jay Vosburgh wrote:
> Flavio Leitner<fbl@sysclose.org>  wrote:
>
>> On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
>>> On 05/28/10 02:05, Flavio Leitner wrote:
>>>>
>>>> Hi guys!
>>>>
>>>> I finally could test this to see if an old problem reported on bugzilla[1] was
>>>> fixed now, but unfortunately it is still there.
>>>>
>>>> The ticket is private I guess, but basically the problem happens when bonding
>>>> driver tries to print something after it had taken the write_lock (monitor
>>>> functions, enslave/de-enslave), so the printk() will pass through netpoll, then
>>>> on bonding again which no matter what mode you use, it will try to read_lock()
>>>> the lock again. The result is a deadlock and the entire system hangs.
>>>>
>>>
>>> Does the attached patch fix this hang?
>>
>> I got another issue now:
>>
>> [   89.523062] bonding: bond0: enslaving eth0 as a backup interface with a down link.
>> [   89.580746] bonding: bond0: enslaving eth2 as a backup interface with a down link.
>> [   91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow Control: None
>> [   91.238245] bonding: bond0: link status definitely up for interface eth2.
>>
>> [   91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
>> [   91.251565] 5 locks held by bond0/2716:
>> [   91.255663]  #0:  ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>> [   91.265179]  #1:  ((&(&bond->mii_work)->work)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>> [   91.275554]  #2:  (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>] rtnl_lock+0x12/0x14
>> [   91.284018]  #3:  (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>] bond_mii_monitor+0x2a2/0x4ed [bonding]
>> [   91.294230]  #4:  (&bond->curr_slave_lock){+...+.}, at: [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
>> [   91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last unloaded: mperf]
>> [   91.357735] Pid: 2716, comm: bond0 Not tainted 2.6.34-04700-gd938a70-dirty #36
>> [   91.371112] Call Trace:
>> [   91.373825]  [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
>> [   91.380530]  [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
>> [   91.386284]  [<ffffffff81363f6e>] schedule+0xc9/0x791
>> [   91.391600]  [<ffffffff81032540>] __cond_resched+0x25/0x30
>> [   91.397350]  [<ffffffff81364757>] _cond_resched+0x27/0x32
>> [   91.403013]  [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
>> [   91.408936]  [<ffffffff812c61fd>] skb_clone+0x42/0x5d
>> [   91.414253]  [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
>> [   91.420436]  [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
>> [   91.426012]  [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
>> [   91.431501]  [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
>> [   91.437165]  [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
>> [   91.443003]  [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
>> [   91.449188]  [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
>> [   91.455634]  [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
>> [   91.462253]  [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14
>
> 	This warning is because the notifier call is happening with spin
> locks held.
>
>> [   91.468614]  [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123 [bonding]
>> [   91.476408]  [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
>> [   91.483375]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>> [   91.489212]  [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
>> [   91.495227]  [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
>> [   91.502192]  [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
>> [   91.508897]  [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
>> [   91.514734]  [<ffffffff810498bb>] kthread+0x7a/0x82
>> [   91.519878]  [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
>> [   91.526060]  [<ffffffff81366ffc>] ? restore_args+0x0/0x30
>> [   91.531723]  [<ffffffff81049841>] ? kthread+0x0/0x82
>> [   91.536953]  [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
>> [   91.543343] bonding: bond0: making interface eth2 the new active one.
>> [   91.550554] bonding: bond0: first active interface up!
>> [   91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
>>
>>
>> No other patch applied. Just started netconsole over bonding, so no need
>> to pull the cable from slaves. Reproduced twice, one I got the
>> backtrace above, and on the other one the system hangs completely
>> after the BUG: scheduling message.
>>
>> fbl
>>
>>
>>>
>>> Thanks!
>>>
>>> ----------------------->
>>>
>>> We should notify netconsole that bond is changing its slaves
>>> when we use active-backup mode.
>>>
>>> Signed-off-by: WANG Cong<amwang@redhat.com>
>>>
>>> ----
>>>
>>
>>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>>> index 5e12462..9494c02 100644
>>> --- a/drivers/net/bonding/bond_main.c
>>> +++ b/drivers/net/bonding/bond_main.c
>>> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond)
>>>
>>>   	best_slave = bond_find_best_slave(bond);
>>>   	if (best_slave != bond->curr_active_slave) {
>>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>>   		bond_change_active_slave(bond, best_slave);
>>>   		rv = bond_set_carrier(bond);
>>>   		if (!rv)
>
> 	You can't do this here; the driver is holding various spin
> locks, and notifier calls can sleep (hence the warning).  If you look at
> the bond_change_active_slave function, it drops all locks other than
> RTNL before making a notifier call, e.g.,
>
> void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> {
> [...]
> 	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
> [...]	
> 			write_unlock_bh(&bond->curr_slave_lock);
> 			read_unlock(&bond->lock);
>
> 			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
>
> 			read_lock(&bond->lock);
> 			write_lock_bh(&bond->curr_slave_lock);
> 		}
>
>
> 	You may be able to add your notifier to this case, or change
> your handler to notice the _FAILOVER notifier.


Thanks for your analysis! Hmm, I think let netconsole to handle
NETDEV_BONDING_FAILOVER here is a better solution.

>
>>> @@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
>>>   	    (old_active)&&
>>>   	(new_active->link == BOND_LINK_UP)&&
>>>   	IS_UP(new_active->dev)) {
>>> +		netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>>   		write_lock_bh(&bond->curr_slave_lock);
>>>   		bond_change_active_slave(bond, new_active);
>>>   		write_unlock_bh(&bond->curr_slave_lock);
>
> 	This case will have the same problem, but will only be hit if a
> user does a manual "ifenslave -c bond0 ethX".
>
> 	You also probably wanted to do the sysfs path, but if the
> notifier goes into the change_active_slave function itself, then I don't
> think additional notifications would be necessary.
>

Okay, sounds above solution should also handle this case.

Thanks.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-31  5:29           ` [Bridge] " Cong Wang
@ 2010-05-31  5:37             ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-31  5:37 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Flavio Leitner, linux-kernel, Matt Mackall, netdev, bridge,
	Andy Gospodarek, Neil Horman, Jeff Moyer, Stephen Hemminger,
	bonding-devel, David Miller

On 05/31/10 13:29, Cong Wang wrote:
> On 05/29/10 05:03, Jay Vosburgh wrote:
>> Flavio Leitner<fbl@sysclose.org> wrote:
>>
>>> On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
>>>> On 05/28/10 02:05, Flavio Leitner wrote:
>>>>>
>>>>> Hi guys!
>>>>>
>>>>> I finally could test this to see if an old problem reported on
>>>>> bugzilla[1] was
>>>>> fixed now, but unfortunately it is still there.
>>>>>
>>>>> The ticket is private I guess, but basically the problem happens
>>>>> when bonding
>>>>> driver tries to print something after it had taken the write_lock
>>>>> (monitor
>>>>> functions, enslave/de-enslave), so the printk() will pass through
>>>>> netpoll, then
>>>>> on bonding again which no matter what mode you use, it will try to
>>>>> read_lock()
>>>>> the lock again. The result is a deadlock and the entire system hangs.
>>>>>
>>>>
>>>> Does the attached patch fix this hang?
>>>
>>> I got another issue now:
>>>
>>> [ 89.523062] bonding: bond0: enslaving eth0 as a backup interface
>>> with a down link.
>>> [ 89.580746] bonding: bond0: enslaving eth2 as a backup interface
>>> with a down link.
>>> [ 91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow
>>> Control: None
>>> [ 91.238245] bonding: bond0: link status definitely up for interface
>>> eth2.
>>>
>>> [ 91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
>>> [ 91.251565] 5 locks held by bond0/2716:
>>> [ 91.255663] #0: ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>]
>>> worker_thread+0x19a/0x2e2
>>> [ 91.265179] #1: ((&(&bond->mii_work)->work)){+.+.+.}, at:
>>> [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>>> [ 91.275554] #2: (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>]
>>> rtnl_lock+0x12/0x14
>>> [ 91.284018] #3: (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>]
>>> bond_mii_monitor+0x2a2/0x4ed [bonding]
>>> [ 91.294230] #4: (&bond->curr_slave_lock){+...+.}, at:
>>> [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
>>> [ 91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp
>>> nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables
>>> x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput
>>> snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq
>>> snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer
>>> tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib
>>> edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom
>>> serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last
>>> unloaded: mperf]
>>> [ 91.357735] Pid: 2716, comm: bond0 Not tainted
>>> 2.6.34-04700-gd938a70-dirty #36
>>> [ 91.371112] Call Trace:
>>> [ 91.373825] [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
>>> [ 91.380530] [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
>>> [ 91.386284] [<ffffffff81363f6e>] schedule+0xc9/0x791
>>> [ 91.391600] [<ffffffff81032540>] __cond_resched+0x25/0x30
>>> [ 91.397350] [<ffffffff81364757>] _cond_resched+0x27/0x32
>>> [ 91.403013] [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
>>> [ 91.408936] [<ffffffff812c61fd>] skb_clone+0x42/0x5d
>>> [ 91.414253] [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
>>> [ 91.420436] [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
>>> [ 91.426012] [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
>>> [ 91.431501] [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
>>> [ 91.437165] [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
>>> [ 91.443003] [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
>>> [ 91.449188] [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
>>> [ 91.455634] [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
>>> [ 91.462253] [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14
>>
>> This warning is because the notifier call is happening with spin
>> locks held.
>>
>>> [ 91.468614] [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123
>>> [bonding]
>>> [ 91.476408] [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
>>> [ 91.483375] [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>>> [ 91.489212] [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
>>> [ 91.495227] [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
>>> [ 91.502192] [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
>>> [ 91.508897] [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
>>> [ 91.514734] [<ffffffff810498bb>] kthread+0x7a/0x82
>>> [ 91.519878] [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
>>> [ 91.526060] [<ffffffff81366ffc>] ? restore_args+0x0/0x30
>>> [ 91.531723] [<ffffffff81049841>] ? kthread+0x0/0x82
>>> [ 91.536953] [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
>>> [ 91.543343] bonding: bond0: making interface eth2 the new active one.
>>> [ 91.550554] bonding: bond0: first active interface up!
>>> [ 91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
>>>
>>>
>>> No other patch applied. Just started netconsole over bonding, so no need
>>> to pull the cable from slaves. Reproduced twice, one I got the
>>> backtrace above, and on the other one the system hangs completely
>>> after the BUG: scheduling message.
>>>
>>> fbl
>>>
>>>
>>>>
>>>> Thanks!
>>>>
>>>> ----------------------->
>>>>
>>>> We should notify netconsole that bond is changing its slaves
>>>> when we use active-backup mode.
>>>>
>>>> Signed-off-by: WANG Cong<amwang@redhat.com>
>>>>
>>>> ----
>>>>
>>>
>>>> diff --git a/drivers/net/bonding/bond_main.c
>>>> b/drivers/net/bonding/bond_main.c
>>>> index 5e12462..9494c02 100644
>>>> --- a/drivers/net/bonding/bond_main.c
>>>> +++ b/drivers/net/bonding/bond_main.c
>>>> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding
>>>> *bond)
>>>>
>>>> best_slave = bond_find_best_slave(bond);
>>>> if (best_slave != bond->curr_active_slave) {
>>>> + netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>>> bond_change_active_slave(bond, best_slave);
>>>> rv = bond_set_carrier(bond);
>>>> if (!rv)
>>
>> You can't do this here; the driver is holding various spin
>> locks, and notifier calls can sleep (hence the warning). If you look at
>> the bond_change_active_slave function, it drops all locks other than
>> RTNL before making a notifier call, e.g.,
>>
>> void bond_change_active_slave(struct bonding *bond, struct slave
>> *new_active)
>> {
>> [...]
>> if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
>> [...]
>> write_unlock_bh(&bond->curr_slave_lock);
>> read_unlock(&bond->lock);
>>
>> netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
>>
>> read_lock(&bond->lock);
>> write_lock_bh(&bond->curr_slave_lock);
>> }
>>
>>
>> You may be able to add your notifier to this case, or change
>> your handler to notice the _FAILOVER notifier.
>
>
> Thanks for your analysis! Hmm, I think let netconsole to handle
> NETDEV_BONDING_FAILOVER here is a better solution.
>

No, in bond_change_active_slave() does notification after
printing messages, thus will not solve the problem here,
we need to notify netconsole before printing any messages.

Thanks.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-31  5:37             ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-31  5:37 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Neil Horman, netdev, Matt Mackall, bridge, linux-kernel,
	David Miller, Flavio Leitner, Jeff Moyer, Andy Gospodarek,
	bonding-devel

On 05/31/10 13:29, Cong Wang wrote:
> On 05/29/10 05:03, Jay Vosburgh wrote:
>> Flavio Leitner<fbl@sysclose.org> wrote:
>>
>>> On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:
>>>> On 05/28/10 02:05, Flavio Leitner wrote:
>>>>>
>>>>> Hi guys!
>>>>>
>>>>> I finally could test this to see if an old problem reported on
>>>>> bugzilla[1] was
>>>>> fixed now, but unfortunately it is still there.
>>>>>
>>>>> The ticket is private I guess, but basically the problem happens
>>>>> when bonding
>>>>> driver tries to print something after it had taken the write_lock
>>>>> (monitor
>>>>> functions, enslave/de-enslave), so the printk() will pass through
>>>>> netpoll, then
>>>>> on bonding again which no matter what mode you use, it will try to
>>>>> read_lock()
>>>>> the lock again. The result is a deadlock and the entire system hangs.
>>>>>
>>>>
>>>> Does the attached patch fix this hang?
>>>
>>> I got another issue now:
>>>
>>> [ 89.523062] bonding: bond0: enslaving eth0 as a backup interface
>>> with a down link.
>>> [ 89.580746] bonding: bond0: enslaving eth2 as a backup interface
>>> with a down link.
>>> [ 91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow
>>> Control: None
>>> [ 91.238245] bonding: bond0: link status definitely up for interface
>>> eth2.
>>>
>>> [ 91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100
>>> [ 91.251565] 5 locks held by bond0/2716:
>>> [ 91.255663] #0: ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>]
>>> worker_thread+0x19a/0x2e2
>>> [ 91.265179] #1: ((&(&bond->mii_work)->work)){+.+.+.}, at:
>>> [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2
>>> [ 91.275554] #2: (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>]
>>> rtnl_lock+0x12/0x14
>>> [ 91.284018] #3: (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>]
>>> bond_mii_monitor+0x2a2/0x4ed [bonding]
>>> [ 91.294230] #4: (&bond->curr_slave_lock){+...+.}, at:
>>> [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding]
>>> [ 91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp
>>> nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables
>>> x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput
>>> snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq
>>> snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer
>>> tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib
>>> edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom
>>> serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last
>>> unloaded: mperf]
>>> [ 91.357735] Pid: 2716, comm: bond0 Not tainted
>>> 2.6.34-04700-gd938a70-dirty #36
>>> [ 91.371112] Call Trace:
>>> [ 91.373825] [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24
>>> [ 91.380530] [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72
>>> [ 91.386284] [<ffffffff81363f6e>] schedule+0xc9/0x791
>>> [ 91.391600] [<ffffffff81032540>] __cond_resched+0x25/0x30
>>> [ 91.397350] [<ffffffff81364757>] _cond_resched+0x27/0x32
>>> [ 91.403013] [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac
>>> [ 91.408936] [<ffffffff812c61fd>] skb_clone+0x42/0x5d
>>> [ 91.414253] [<ffffffff812ec696>] netlink_broadcast+0x192/0x369
>>> [ 91.420436] [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89
>>> [ 91.426012] [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d
>>> [ 91.431501] [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118
>>> [ 91.437165] [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f
>>> [ 91.443003] [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e
>>> [ 91.449188] [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11
>>> [ 91.455634] [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a
>>> [ 91.462253] [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14
>>
>> This warning is because the notifier call is happening with spin
>> locks held.
>>
>>> [ 91.468614] [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123
>>> [bonding]
>>> [ 91.476408] [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding]
>>> [ 91.483375] [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>>> [ 91.489212] [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2
>>> [ 91.495227] [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding]
>>> [ 91.502192] [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34
>>> [ 91.508897] [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2
>>> [ 91.514734] [<ffffffff810498bb>] kthread+0x7a/0x82
>>> [ 91.519878] [<ffffffff81003714>] kernel_thread_helper+0x4/0x10
>>> [ 91.526060] [<ffffffff81366ffc>] ? restore_args+0x0/0x30
>>> [ 91.531723] [<ffffffff81049841>] ? kthread+0x0/0x82
>>> [ 91.536953] [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10
>>> [ 91.543343] bonding: bond0: making interface eth2 the new active one.
>>> [ 91.550554] bonding: bond0: first active interface up!
>>> [ 91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
>>>
>>>
>>> No other patch applied. Just started netconsole over bonding, so no need
>>> to pull the cable from slaves. Reproduced twice, one I got the
>>> backtrace above, and on the other one the system hangs completely
>>> after the BUG: scheduling message.
>>>
>>> fbl
>>>
>>>
>>>>
>>>> Thanks!
>>>>
>>>> ----------------------->
>>>>
>>>> We should notify netconsole that bond is changing its slaves
>>>> when we use active-backup mode.
>>>>
>>>> Signed-off-by: WANG Cong<amwang@redhat.com>
>>>>
>>>> ----
>>>>
>>>
>>>> diff --git a/drivers/net/bonding/bond_main.c
>>>> b/drivers/net/bonding/bond_main.c
>>>> index 5e12462..9494c02 100644
>>>> --- a/drivers/net/bonding/bond_main.c
>>>> +++ b/drivers/net/bonding/bond_main.c
>>>> @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding
>>>> *bond)
>>>>
>>>> best_slave = bond_find_best_slave(bond);
>>>> if (best_slave != bond->curr_active_slave) {
>>>> + netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>>>> bond_change_active_slave(bond, best_slave);
>>>> rv = bond_set_carrier(bond);
>>>> if (!rv)
>>
>> You can't do this here; the driver is holding various spin
>> locks, and notifier calls can sleep (hence the warning). If you look at
>> the bond_change_active_slave function, it drops all locks other than
>> RTNL before making a notifier call, e.g.,
>>
>> void bond_change_active_slave(struct bonding *bond, struct slave
>> *new_active)
>> {
>> [...]
>> if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
>> [...]
>> write_unlock_bh(&bond->curr_slave_lock);
>> read_unlock(&bond->lock);
>>
>> netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
>>
>> read_lock(&bond->lock);
>> write_lock_bh(&bond->curr_slave_lock);
>> }
>>
>>
>> You may be able to add your notifier to this case, or change
>> your handler to notice the _FAILOVER notifier.
>
>
> Thanks for your analysis! Hmm, I think let netconsole to handle
> NETDEV_BONDING_FAILOVER here is a better solution.
>

No, in bond_change_active_slave() does notification after
printing messages, thus will not solve the problem here,
we need to notify netconsole before printing any messages.

Thanks.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-28 19:40       ` [Bridge] " Flavio Leitner
@ 2010-05-31  5:56         ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-31  5:56 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

[-- Attachment #1: Type: text/plain, Size: 111 bytes --]

Hi, Flavio,

Please use the attached patch instead, try to see if it solves
all your problems.

Thanks a lot!


[-- Attachment #2: drivers-net-bonding-fix-activebackup-deadlock.diff --]
[-- Type: text/x-patch, Size: 837 bytes --]

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index ca142c4..2d1d594 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -666,7 +666,8 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	struct net_device *dev = ptr;
 
 	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
-	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN ||
+	      event == NETDEV_BONDING_FAILOVER))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -682,6 +683,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 				/* Fall through */
 			case NETDEV_GOING_DOWN:
 			case NETDEV_BONDING_DESLAVE:
+			case NETDEV_BONDING_FAILOVER:
 				nt->enabled = 0;
 				break;
 			}

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-31  5:56         ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-05-31  5:56 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

[-- Attachment #1: Type: text/plain, Size: 111 bytes --]

Hi, Flavio,

Please use the attached patch instead, try to see if it solves
all your problems.

Thanks a lot!


[-- Attachment #2: drivers-net-bonding-fix-activebackup-deadlock.diff --]
[-- Type: text/x-patch, Size: 837 bytes --]

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index ca142c4..2d1d594 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -666,7 +666,8 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	struct net_device *dev = ptr;
 
 	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
-	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN ||
+	      event == NETDEV_BONDING_FAILOVER))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -682,6 +683,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 				/* Fall through */
 			case NETDEV_GOING_DOWN:
 			case NETDEV_BONDING_DESLAVE:
+			case NETDEV_BONDING_FAILOVER:
 				nt->enabled = 0;
 				break;
 			}

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-31  5:56         ` [Bridge] " Cong Wang
@ 2010-05-31 19:08           ` Flavio Leitner
  -1 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-31 19:08 UTC (permalink / raw)
  To: Cong Wang
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
> Hi, Flavio,
> 
> Please use the attached patch instead, try to see if it solves
> all your problems.

I tried and it hangs. No backtraces this time.
The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
notification, so I think it won't work. 

Please, correct if I'm wrong, but when a failover happens with your 
patch applied, the netconsole would be disabled forever even with
another healthy slave, right?

fbl


> 
> Thanks a lot!
> 

> diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
> index ca142c4..2d1d594 100644
> --- a/drivers/net/netconsole.c
> +++ b/drivers/net/netconsole.c
> @@ -666,7 +666,8 @@ static int netconsole_netdev_event(struct notifier_block *this,
>  	struct net_device *dev = ptr;
>  
>  	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
> -	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
> +	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN ||
> +	      event == NETDEV_BONDING_FAILOVER))
>  		goto done;
>  
>  	spin_lock_irqsave(&target_list_lock, flags);
> @@ -682,6 +683,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
>  				/* Fall through */
>  			case NETDEV_GOING_DOWN:
>  			case NETDEV_BONDING_DESLAVE:
> +			case NETDEV_BONDING_FAILOVER:
>  				nt->enabled = 0;
>  				break;
>  			}


-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-05-31 19:08           ` Flavio Leitner
  0 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-05-31 19:08 UTC (permalink / raw)
  To: Cong Wang
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
> Hi, Flavio,
> 
> Please use the attached patch instead, try to see if it solves
> all your problems.

I tried and it hangs. No backtraces this time.
The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
notification, so I think it won't work. 

Please, correct if I'm wrong, but when a failover happens with your 
patch applied, the netconsole would be disabled forever even with
another healthy slave, right?

fbl


> 
> Thanks a lot!
> 

> diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
> index ca142c4..2d1d594 100644
> --- a/drivers/net/netconsole.c
> +++ b/drivers/net/netconsole.c
> @@ -666,7 +666,8 @@ static int netconsole_netdev_event(struct notifier_block *this,
>  	struct net_device *dev = ptr;
>  
>  	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
> -	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
> +	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN ||
> +	      event == NETDEV_BONDING_FAILOVER))
>  		goto done;
>  
>  	spin_lock_irqsave(&target_list_lock, flags);
> @@ -682,6 +683,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
>  				/* Fall through */
>  			case NETDEV_GOING_DOWN:
>  			case NETDEV_BONDING_DESLAVE:
> +			case NETDEV_BONDING_FAILOVER:
>  				nt->enabled = 0;
>  				break;
>  			}


-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-05-31 19:08           ` [Bridge] " Flavio Leitner
@ 2010-06-01  9:57             ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-01  9:57 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller

[-- Attachment #1: Type: text/plain, Size: 754 bytes --]

On 06/01/10 03:08, Flavio Leitner wrote:
> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>> Hi, Flavio,
>>
>> Please use the attached patch instead, try to see if it solves
>> all your problems.
>
> I tried and it hangs. No backtraces this time.
> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
> notification, so I think it won't work.

Ah, I thought the same.

>
> Please, correct if I'm wrong, but when a failover happens with your
> patch applied, the netconsole would be disabled forever even with
> another healthy slave, right?
>

Yes, this is an easy solution, because bonding has several modes,
it is complex to make netpoll work in different modes.

Would you like to test the following patch?

Thanks much!


[-- Attachment #2: drivers-net-bonding-fix-activebackup-deadlock.diff --]
[-- Type: text/x-patch, Size: 591 bytes --]

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e12462..59ade92 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1109,6 +1109,14 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 	if (old_active == new_active)
 		return;
 
+	write_unlock_bh(&bond->curr_slave_lock);
+	read_unlock(&bond->lock);
+
+	netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
+
+	read_lock(&bond->lock);
+	write_lock_bh(&bond->curr_slave_lock);
+
 	if (new_active) {
 		new_active->jiffies = jiffies;
 

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-01  9:57             ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-01  9:57 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: Jay Vosburgh, Neil Horman, netdev, Matt Mackall, bridge,
	linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel

[-- Attachment #1: Type: text/plain, Size: 754 bytes --]

On 06/01/10 03:08, Flavio Leitner wrote:
> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>> Hi, Flavio,
>>
>> Please use the attached patch instead, try to see if it solves
>> all your problems.
>
> I tried and it hangs. No backtraces this time.
> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
> notification, so I think it won't work.

Ah, I thought the same.

>
> Please, correct if I'm wrong, but when a failover happens with your
> patch applied, the netconsole would be disabled forever even with
> another healthy slave, right?
>

Yes, this is an easy solution, because bonding has several modes,
it is complex to make netpoll work in different modes.

Would you like to test the following patch?

Thanks much!


[-- Attachment #2: drivers-net-bonding-fix-activebackup-deadlock.diff --]
[-- Type: text/x-patch, Size: 591 bytes --]

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e12462..59ade92 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1109,6 +1109,14 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 	if (old_active == new_active)
 		return;
 
+	write_unlock_bh(&bond->curr_slave_lock);
+	read_unlock(&bond->lock);
+
+	netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
+
+	read_lock(&bond->lock);
+	write_lock_bh(&bond->curr_slave_lock);
+
 	if (new_active) {
 		new_active->jiffies = jiffies;
 

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-01  9:57             ` [Bridge] " Cong Wang
@ 2010-06-01 18:42               ` Jay Vosburgh
  -1 siblings, 0 replies; 73+ messages in thread
From: Jay Vosburgh @ 2010-06-01 18:42 UTC (permalink / raw)
  To: Cong Wang
  Cc: Flavio Leitner, linux-kernel, Matt Mackall, netdev, bridge,
	Andy Gospodarek, Neil Horman, Jeff Moyer, Stephen Hemminger,
	bonding-devel, David Miller

Cong Wang <amwang@redhat.com> wrote:

>On 06/01/10 03:08, Flavio Leitner wrote:
>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>> Hi, Flavio,
>>>
>>> Please use the attached patch instead, try to see if it solves
>>> all your problems.
>>
>> I tried and it hangs. No backtraces this time.
>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>> notification, so I think it won't work.
>
>Ah, I thought the same.
>
>>
>> Please, correct if I'm wrong, but when a failover happens with your
>> patch applied, the netconsole would be disabled forever even with
>> another healthy slave, right?
>>
>
>Yes, this is an easy solution, because bonding has several modes,
>it is complex to make netpoll work in different modes.

	If I understand correctly, the root cause of the problem with
netconsole and bonding is that bonding is, ultimately, performing
printks with a write lock held, and when netconsole recursively calls
into bonding to send the printk over the netconsole, there is a deadlock
(when the bonding xmit function attempts to acquire the same lock for
read).

	You're trying to avoid the deadlock by shutting off netconsole
(permanently, it looks like) for one problem case: a failover, which
does some printks with a write lock held.

	This doesn't look to me like a complete solution, there are
other cases in bonding that will do printk with write locks held.  I
suspect those will also hang netconsole as things exist today, and won't
be affected by your patch below.

	For example:

	The sysfs functions to set the primary (bonding_store_primary)
or active (bonding_store_active_slave) options: a pr_info is called to
provide a log message of the results.  These could be tested by setting
the primary or active options via sysfs, e.g.,

echo eth0 > /sys/class/net/bond0/bonding/primary
echo eth0 > /sys/class/net/bond0/bonding/active

	If the kernel is defined with DEBUG, there are a few pr_debug
calls within write_locks (bond_del_vlan, for example).

	If the slave's underlying device driver's ndo_vlan_rx_register
or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
for error cases, e.g., igbvf, ehea, enic), those would also presumably
deadlock (because bonding holds its write_lock when calling the ndo_
vlan functions).

	It also appears that (with the patch below) some nominally
normal usage patterns will immediately disable netconsole.  The one that
comes to mind is if the primary= option is set (to "eth1" for this
example), but that slave not enslaved first (the slaves are added, say,
eth0 then eth1).  In that situation, when the primary slave (eth1 here)
is added, the first thing that will happen is a failover, and that will
disable netconsole.

	Thoughts?

	-J

>Would you like to test the following patch?
>
>Thanks much!
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 5e12462..59ade92 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1109,6 +1109,14 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> 	if (old_active == new_active)
> 		return;
>
>+	write_unlock_bh(&bond->curr_slave_lock);
>+	read_unlock(&bond->lock);
>+
>+	netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>+
>+	read_lock(&bond->lock);
>+	write_lock_bh(&bond->curr_slave_lock);
>+
> 	if (new_active) {
> 		new_active->jiffies = jiffies;
>

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-01 18:42               ` Jay Vosburgh
  0 siblings, 0 replies; 73+ messages in thread
From: Jay Vosburgh @ 2010-06-01 18:42 UTC (permalink / raw)
  To: Cong Wang
  Cc: Neil Horman, netdev, Matt Mackall, bridge, linux-kernel,
	David Miller, Flavio Leitner, Jeff Moyer, Andy Gospodarek,
	bonding-devel

Cong Wang <amwang@redhat.com> wrote:

>On 06/01/10 03:08, Flavio Leitner wrote:
>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>> Hi, Flavio,
>>>
>>> Please use the attached patch instead, try to see if it solves
>>> all your problems.
>>
>> I tried and it hangs. No backtraces this time.
>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>> notification, so I think it won't work.
>
>Ah, I thought the same.
>
>>
>> Please, correct if I'm wrong, but when a failover happens with your
>> patch applied, the netconsole would be disabled forever even with
>> another healthy slave, right?
>>
>
>Yes, this is an easy solution, because bonding has several modes,
>it is complex to make netpoll work in different modes.

	If I understand correctly, the root cause of the problem with
netconsole and bonding is that bonding is, ultimately, performing
printks with a write lock held, and when netconsole recursively calls
into bonding to send the printk over the netconsole, there is a deadlock
(when the bonding xmit function attempts to acquire the same lock for
read).

	You're trying to avoid the deadlock by shutting off netconsole
(permanently, it looks like) for one problem case: a failover, which
does some printks with a write lock held.

	This doesn't look to me like a complete solution, there are
other cases in bonding that will do printk with write locks held.  I
suspect those will also hang netconsole as things exist today, and won't
be affected by your patch below.

	For example:

	The sysfs functions to set the primary (bonding_store_primary)
or active (bonding_store_active_slave) options: a pr_info is called to
provide a log message of the results.  These could be tested by setting
the primary or active options via sysfs, e.g.,

echo eth0 > /sys/class/net/bond0/bonding/primary
echo eth0 > /sys/class/net/bond0/bonding/active

	If the kernel is defined with DEBUG, there are a few pr_debug
calls within write_locks (bond_del_vlan, for example).

	If the slave's underlying device driver's ndo_vlan_rx_register
or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
for error cases, e.g., igbvf, ehea, enic), those would also presumably
deadlock (because bonding holds its write_lock when calling the ndo_
vlan functions).

	It also appears that (with the patch below) some nominally
normal usage patterns will immediately disable netconsole.  The one that
comes to mind is if the primary= option is set (to "eth1" for this
example), but that slave not enslaved first (the slaves are added, say,
eth0 then eth1).  In that situation, when the primary slave (eth1 here)
is added, the first thing that will happen is a failover, and that will
disable netconsole.

	Thoughts?

	-J

>Would you like to test the following patch?
>
>Thanks much!
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 5e12462..59ade92 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1109,6 +1109,14 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
> 	if (old_active == new_active)
> 		return;
>
>+	write_unlock_bh(&bond->curr_slave_lock);
>+	read_unlock(&bond->lock);
>+
>+	netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE);
>+
>+	read_lock(&bond->lock);
>+	write_lock_bh(&bond->curr_slave_lock);
>+
> 	if (new_active) {
> 		new_active->jiffies = jiffies;
>

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-01 18:42               ` [Bridge] " Jay Vosburgh
@ 2010-06-02 10:04                 ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-02 10:04 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Flavio Leitner, linux-kernel, Matt Mackall, netdev, bridge,
	Andy Gospodarek, Neil Horman, Jeff Moyer, Stephen Hemminger,
	bonding-devel, David Miller

On 06/02/10 02:42, Jay Vosburgh wrote:
> Cong Wang<amwang@redhat.com>  wrote:
>
>> On 06/01/10 03:08, Flavio Leitner wrote:
>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>> Hi, Flavio,
>>>>
>>>> Please use the attached patch instead, try to see if it solves
>>>> all your problems.
>>>
>>> I tried and it hangs. No backtraces this time.
>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>> notification, so I think it won't work.
>>
>> Ah, I thought the same.
>>
>>>
>>> Please, correct if I'm wrong, but when a failover happens with your
>>> patch applied, the netconsole would be disabled forever even with
>>> another healthy slave, right?
>>>
>>
>> Yes, this is an easy solution, because bonding has several modes,
>> it is complex to make netpoll work in different modes.
>
> 	If I understand correctly, the root cause of the problem with
> netconsole and bonding is that bonding is, ultimately, performing
> printks with a write lock held, and when netconsole recursively calls
> into bonding to send the printk over the netconsole, there is a deadlock
> (when the bonding xmit function attempts to acquire the same lock for
> read).


Yes.

>
> 	You're trying to avoid the deadlock by shutting off netconsole
> (permanently, it looks like) for one problem case: a failover, which
> does some printks with a write lock held.
>
> 	This doesn't look to me like a complete solution, there are
> other cases in bonding that will do printk with write locks held.  I
> suspect those will also hang netconsole as things exist today, and won't
> be affected by your patch below.


I can expect that, bonding modes are complex.

>
> 	For example:
>
> 	The sysfs functions to set the primary (bonding_store_primary)
> or active (bonding_store_active_slave) options: a pr_info is called to
> provide a log message of the results.  These could be tested by setting
> the primary or active options via sysfs, e.g.,
>
> echo eth0>  /sys/class/net/bond0/bonding/primary
> echo eth0>  /sys/class/net/bond0/bonding/active
>
> 	If the kernel is defined with DEBUG, there are a few pr_debug
> calls within write_locks (bond_del_vlan, for example).
>
> 	If the slave's underlying device driver's ndo_vlan_rx_register
> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
> for error cases, e.g., igbvf, ehea, enic), those would also presumably
> deadlock (because bonding holds its write_lock when calling the ndo_
> vlan functions).
>
> 	It also appears that (with the patch below) some nominally
> normal usage patterns will immediately disable netconsole.  The one that
> comes to mind is if the primary= option is set (to "eth1" for this
> example), but that slave not enslaved first (the slaves are added, say,
> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
> is added, the first thing that will happen is a failover, and that will
> disable netconsole.
>

Thanks for your detailed explanation!

This is why I said bonding is complex. I guess we would have to adjust
netpoll code for different bonding cases, one solution seems not fix all.
I am not sure how much work to do, since I am not familiar with bonding
code. Maybe Andy can help?

For the previous patch, it at least can make Flavio happy. :)

Thanks!

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-02 10:04                 ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-02 10:04 UTC (permalink / raw)
  To: Jay Vosburgh
  Cc: Neil Horman, netdev, Matt Mackall, bridge, linux-kernel,
	David Miller, Flavio Leitner, Jeff Moyer, Andy Gospodarek,
	bonding-devel

On 06/02/10 02:42, Jay Vosburgh wrote:
> Cong Wang<amwang@redhat.com>  wrote:
>
>> On 06/01/10 03:08, Flavio Leitner wrote:
>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>> Hi, Flavio,
>>>>
>>>> Please use the attached patch instead, try to see if it solves
>>>> all your problems.
>>>
>>> I tried and it hangs. No backtraces this time.
>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>> notification, so I think it won't work.
>>
>> Ah, I thought the same.
>>
>>>
>>> Please, correct if I'm wrong, but when a failover happens with your
>>> patch applied, the netconsole would be disabled forever even with
>>> another healthy slave, right?
>>>
>>
>> Yes, this is an easy solution, because bonding has several modes,
>> it is complex to make netpoll work in different modes.
>
> 	If I understand correctly, the root cause of the problem with
> netconsole and bonding is that bonding is, ultimately, performing
> printks with a write lock held, and when netconsole recursively calls
> into bonding to send the printk over the netconsole, there is a deadlock
> (when the bonding xmit function attempts to acquire the same lock for
> read).


Yes.

>
> 	You're trying to avoid the deadlock by shutting off netconsole
> (permanently, it looks like) for one problem case: a failover, which
> does some printks with a write lock held.
>
> 	This doesn't look to me like a complete solution, there are
> other cases in bonding that will do printk with write locks held.  I
> suspect those will also hang netconsole as things exist today, and won't
> be affected by your patch below.


I can expect that, bonding modes are complex.

>
> 	For example:
>
> 	The sysfs functions to set the primary (bonding_store_primary)
> or active (bonding_store_active_slave) options: a pr_info is called to
> provide a log message of the results.  These could be tested by setting
> the primary or active options via sysfs, e.g.,
>
> echo eth0>  /sys/class/net/bond0/bonding/primary
> echo eth0>  /sys/class/net/bond0/bonding/active
>
> 	If the kernel is defined with DEBUG, there are a few pr_debug
> calls within write_locks (bond_del_vlan, for example).
>
> 	If the slave's underlying device driver's ndo_vlan_rx_register
> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
> for error cases, e.g., igbvf, ehea, enic), those would also presumably
> deadlock (because bonding holds its write_lock when calling the ndo_
> vlan functions).
>
> 	It also appears that (with the patch below) some nominally
> normal usage patterns will immediately disable netconsole.  The one that
> comes to mind is if the primary= option is set (to "eth1" for this
> example), but that slave not enslaved first (the slaves are added, say,
> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
> is added, the first thing that will happen is a failover, and that will
> disable netconsole.
>

Thanks for your detailed explanation!

This is why I said bonding is complex. I guess we would have to adjust
netpoll code for different bonding cases, one solution seems not fix all.
I am not sure how much work to do, since I am not familiar with bonding
code. Maybe Andy can help?

For the previous patch, it at least can make Flavio happy. :)

Thanks!

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-02 10:04                 ` [Bridge] " Cong Wang
@ 2010-06-04 19:18                   ` Andy Gospodarek
  -1 siblings, 0 replies; 73+ messages in thread
From: Andy Gospodarek @ 2010-06-04 19:18 UTC (permalink / raw)
  To: Cong Wang
  Cc: Jay Vosburgh, Flavio Leitner, linux-kernel, Matt Mackall, netdev,
	bridge, Andy Gospodarek, Neil Horman, Jeff Moyer,
	Stephen Hemminger, bonding-devel, David Miller

On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
> On 06/02/10 02:42, Jay Vosburgh wrote:
>> Cong Wang<amwang@redhat.com>  wrote:
>>
>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>> Hi, Flavio,
>>>>>
>>>>> Please use the attached patch instead, try to see if it solves
>>>>> all your problems.
>>>>
>>>> I tried and it hangs. No backtraces this time.
>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>> notification, so I think it won't work.
>>>
>>> Ah, I thought the same.
>>>
>>>>
>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>> patch applied, the netconsole would be disabled forever even with
>>>> another healthy slave, right?
>>>>
>>>
>>> Yes, this is an easy solution, because bonding has several modes,
>>> it is complex to make netpoll work in different modes.
>>
>> 	If I understand correctly, the root cause of the problem with
>> netconsole and bonding is that bonding is, ultimately, performing
>> printks with a write lock held, and when netconsole recursively calls
>> into bonding to send the printk over the netconsole, there is a deadlock
>> (when the bonding xmit function attempts to acquire the same lock for
>> read).
>
>
> Yes.
>
>>
>> 	You're trying to avoid the deadlock by shutting off netconsole
>> (permanently, it looks like) for one problem case: a failover, which
>> does some printks with a write lock held.
>>
>> 	This doesn't look to me like a complete solution, there are
>> other cases in bonding that will do printk with write locks held.  I
>> suspect those will also hang netconsole as things exist today, and won't
>> be affected by your patch below.
>
>
> I can expect that, bonding modes are complex.
>
>>
>> 	For example:
>>
>> 	The sysfs functions to set the primary (bonding_store_primary)
>> or active (bonding_store_active_slave) options: a pr_info is called to
>> provide a log message of the results.  These could be tested by setting
>> the primary or active options via sysfs, e.g.,
>>
>> echo eth0>  /sys/class/net/bond0/bonding/primary
>> echo eth0>  /sys/class/net/bond0/bonding/active
>>
>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>> calls within write_locks (bond_del_vlan, for example).
>>
>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>> deadlock (because bonding holds its write_lock when calling the ndo_
>> vlan functions).
>>
>> 	It also appears that (with the patch below) some nominally
>> normal usage patterns will immediately disable netconsole.  The one that
>> comes to mind is if the primary= option is set (to "eth1" for this
>> example), but that slave not enslaved first (the slaves are added, say,
>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>> is added, the first thing that will happen is a failover, and that will
>> disable netconsole.
>>
>
> Thanks for your detailed explanation!
>
> This is why I said bonding is complex. I guess we would have to adjust
> netpoll code for different bonding cases, one solution seems not fix all.
> I am not sure how much work to do, since I am not familiar with bonding
> code. Maybe Andy can help?
>

Sorry I've been silent until now.  This does seem quite similar to a
problem I've previously encountered when dealing with bonding+netpoll on
some old 2.6.9-based kernels.  There is no guarantee the methods used
there will apply here, but I'll talk about them anyway.

As Flavio noticed, recursive calls into the bond transmit routines were
not a good idea.  I discovered the same and worked around this issue by
checking to see if we could take the bond->lock for writing before
continuing.  If we could not get, I wanted to signal that this should be
queued for transmission later.  Based on the flow of netpoll_send_skb
(or possibly for another reason that is escaping me right now) I added
one of these checks in bond_poll_controller too.  These aren't the
prettiest fixes, but seemed to work well for me when I did this work in
the past.  I realize the differences are not that great compared to some
of the patches posted by Flavio, but I think they are worth trying.

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ef60244..d7b9b99 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1290,6 +1290,12 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
 static void bond_poll_controller(struct net_device *bond_dev)
 {
 	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
+	struct bonding *bond = netdev_priv(bond_dev);
+
+	if (!write_trylock(&bond->lock))
+		return;
+	write_unlock(&bond->lock);
+
 	if (dev != bond_dev)
 		netpoll_poll_dev(dev);
 }
@@ -4418,7 +4424,11 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
 
 static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	const struct bonding *bond = netdev_priv(dev);
+	struct bonding *bond = netdev_priv(dev);
+
+	if (!write_trylock(&bond->lock))
+		return NETDEV_TX_BUSY;
+	write_unlock(&bond->lock);
 
 	switch (bond->params.mode) {
 	case BOND_MODE_ROUNDROBIN:

The other key to all of this is to make sure that queuing is done
correctly now that we expect to queue these frames and have them sent at
some point when there is a member of the bond that is actually capable
of sending them out.

The new style of sending queued skbs in a workqueue is much better than
what was done in the 2.6.9 timeframe, but careful attention should still
be paid to txq lock and which processor is the owner.  Returning
something other than NETDEV_TX_OK from bond_start_xmit and checking for
locks being held there should also help with any deadlocks that show up
while running in queue_process (though they would not be recursive).

I'm not in a good spot to test this right now, but I can take a look at
next week and we can try and track down any of the other deadlocks that
currently exist as I suspect this will not resolve all of the issues.

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-04 19:18                   ` Andy Gospodarek
  0 siblings, 0 replies; 73+ messages in thread
From: Andy Gospodarek @ 2010-06-04 19:18 UTC (permalink / raw)
  To: Cong Wang
  Cc: Jay Vosburgh, Neil Horman, netdev, Andy Gospodarek, bridge,
	linux-kernel, David Miller, Flavio Leitner, Jeff Moyer,
	Matt Mackall, bonding-devel

On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
> On 06/02/10 02:42, Jay Vosburgh wrote:
>> Cong Wang<amwang@redhat.com>  wrote:
>>
>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>> Hi, Flavio,
>>>>>
>>>>> Please use the attached patch instead, try to see if it solves
>>>>> all your problems.
>>>>
>>>> I tried and it hangs. No backtraces this time.
>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>> notification, so I think it won't work.
>>>
>>> Ah, I thought the same.
>>>
>>>>
>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>> patch applied, the netconsole would be disabled forever even with
>>>> another healthy slave, right?
>>>>
>>>
>>> Yes, this is an easy solution, because bonding has several modes,
>>> it is complex to make netpoll work in different modes.
>>
>> 	If I understand correctly, the root cause of the problem with
>> netconsole and bonding is that bonding is, ultimately, performing
>> printks with a write lock held, and when netconsole recursively calls
>> into bonding to send the printk over the netconsole, there is a deadlock
>> (when the bonding xmit function attempts to acquire the same lock for
>> read).
>
>
> Yes.
>
>>
>> 	You're trying to avoid the deadlock by shutting off netconsole
>> (permanently, it looks like) for one problem case: a failover, which
>> does some printks with a write lock held.
>>
>> 	This doesn't look to me like a complete solution, there are
>> other cases in bonding that will do printk with write locks held.  I
>> suspect those will also hang netconsole as things exist today, and won't
>> be affected by your patch below.
>
>
> I can expect that, bonding modes are complex.
>
>>
>> 	For example:
>>
>> 	The sysfs functions to set the primary (bonding_store_primary)
>> or active (bonding_store_active_slave) options: a pr_info is called to
>> provide a log message of the results.  These could be tested by setting
>> the primary or active options via sysfs, e.g.,
>>
>> echo eth0>  /sys/class/net/bond0/bonding/primary
>> echo eth0>  /sys/class/net/bond0/bonding/active
>>
>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>> calls within write_locks (bond_del_vlan, for example).
>>
>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>> deadlock (because bonding holds its write_lock when calling the ndo_
>> vlan functions).
>>
>> 	It also appears that (with the patch below) some nominally
>> normal usage patterns will immediately disable netconsole.  The one that
>> comes to mind is if the primary= option is set (to "eth1" for this
>> example), but that slave not enslaved first (the slaves are added, say,
>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>> is added, the first thing that will happen is a failover, and that will
>> disable netconsole.
>>
>
> Thanks for your detailed explanation!
>
> This is why I said bonding is complex. I guess we would have to adjust
> netpoll code for different bonding cases, one solution seems not fix all.
> I am not sure how much work to do, since I am not familiar with bonding
> code. Maybe Andy can help?
>

Sorry I've been silent until now.  This does seem quite similar to a
problem I've previously encountered when dealing with bonding+netpoll on
some old 2.6.9-based kernels.  There is no guarantee the methods used
there will apply here, but I'll talk about them anyway.

As Flavio noticed, recursive calls into the bond transmit routines were
not a good idea.  I discovered the same and worked around this issue by
checking to see if we could take the bond->lock for writing before
continuing.  If we could not get, I wanted to signal that this should be
queued for transmission later.  Based on the flow of netpoll_send_skb
(or possibly for another reason that is escaping me right now) I added
one of these checks in bond_poll_controller too.  These aren't the
prettiest fixes, but seemed to work well for me when I did this work in
the past.  I realize the differences are not that great compared to some
of the patches posted by Flavio, but I think they are worth trying.

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ef60244..d7b9b99 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1290,6 +1290,12 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
 static void bond_poll_controller(struct net_device *bond_dev)
 {
 	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
+	struct bonding *bond = netdev_priv(bond_dev);
+
+	if (!write_trylock(&bond->lock))
+		return;
+	write_unlock(&bond->lock);
+
 	if (dev != bond_dev)
 		netpoll_poll_dev(dev);
 }
@@ -4418,7 +4424,11 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
 
 static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	const struct bonding *bond = netdev_priv(dev);
+	struct bonding *bond = netdev_priv(dev);
+
+	if (!write_trylock(&bond->lock))
+		return NETDEV_TX_BUSY;
+	write_unlock(&bond->lock);
 
 	switch (bond->params.mode) {
 	case BOND_MODE_ROUNDROBIN:

The other key to all of this is to make sure that queuing is done
correctly now that we expect to queue these frames and have them sent at
some point when there is a member of the bond that is actually capable
of sending them out.

The new style of sending queued skbs in a workqueue is much better than
what was done in the 2.6.9 timeframe, but careful attention should still
be paid to txq lock and which processor is the owner.  Returning
something other than NETDEV_TX_OK from bond_start_xmit and checking for
locks being held there should also help with any deadlocks that show up
while running in queue_process (though they would not be recursive).

I'm not in a good spot to test this right now, but I can take a look at
next week and we can try and track down any of the other deadlocks that
currently exist as I suspect this will not resolve all of the issues.

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-04 19:18                   ` [Bridge] " Andy Gospodarek
@ 2010-06-07  9:57                     ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-07  9:57 UTC (permalink / raw)
  To: Andy Gospodarek
  Cc: Jay Vosburgh, Flavio Leitner, linux-kernel, Matt Mackall, netdev,
	bridge, Andy Gospodarek, Neil Horman, Jeff Moyer,
	Stephen Hemminger, bonding-devel, David Miller

On 06/05/10 03:18, Andy Gospodarek wrote:
> On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
>> On 06/02/10 02:42, Jay Vosburgh wrote:
>>> Cong Wang<amwang@redhat.com>   wrote:
>>>
>>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>>> Hi, Flavio,
>>>>>>
>>>>>> Please use the attached patch instead, try to see if it solves
>>>>>> all your problems.
>>>>>
>>>>> I tried and it hangs. No backtraces this time.
>>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>>> notification, so I think it won't work.
>>>>
>>>> Ah, I thought the same.
>>>>
>>>>>
>>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>>> patch applied, the netconsole would be disabled forever even with
>>>>> another healthy slave, right?
>>>>>
>>>>
>>>> Yes, this is an easy solution, because bonding has several modes,
>>>> it is complex to make netpoll work in different modes.
>>>
>>> 	If I understand correctly, the root cause of the problem with
>>> netconsole and bonding is that bonding is, ultimately, performing
>>> printks with a write lock held, and when netconsole recursively calls
>>> into bonding to send the printk over the netconsole, there is a deadlock
>>> (when the bonding xmit function attempts to acquire the same lock for
>>> read).
>>
>>
>> Yes.
>>
>>>
>>> 	You're trying to avoid the deadlock by shutting off netconsole
>>> (permanently, it looks like) for one problem case: a failover, which
>>> does some printks with a write lock held.
>>>
>>> 	This doesn't look to me like a complete solution, there are
>>> other cases in bonding that will do printk with write locks held.  I
>>> suspect those will also hang netconsole as things exist today, and won't
>>> be affected by your patch below.
>>
>>
>> I can expect that, bonding modes are complex.
>>
>>>
>>> 	For example:
>>>
>>> 	The sysfs functions to set the primary (bonding_store_primary)
>>> or active (bonding_store_active_slave) options: a pr_info is called to
>>> provide a log message of the results.  These could be tested by setting
>>> the primary or active options via sysfs, e.g.,
>>>
>>> echo eth0>   /sys/class/net/bond0/bonding/primary
>>> echo eth0>   /sys/class/net/bond0/bonding/active
>>>
>>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>>> calls within write_locks (bond_del_vlan, for example).
>>>
>>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>>> deadlock (because bonding holds its write_lock when calling the ndo_
>>> vlan functions).
>>>
>>> 	It also appears that (with the patch below) some nominally
>>> normal usage patterns will immediately disable netconsole.  The one that
>>> comes to mind is if the primary= option is set (to "eth1" for this
>>> example), but that slave not enslaved first (the slaves are added, say,
>>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>>> is added, the first thing that will happen is a failover, and that will
>>> disable netconsole.
>>>
>>
>> Thanks for your detailed explanation!
>>
>> This is why I said bonding is complex. I guess we would have to adjust
>> netpoll code for different bonding cases, one solution seems not fix all.
>> I am not sure how much work to do, since I am not familiar with bonding
>> code. Maybe Andy can help?
>>
>
> Sorry I've been silent until now.  This does seem quite similar to a
> problem I've previously encountered when dealing with bonding+netpoll on
> some old 2.6.9-based kernels.  There is no guarantee the methods used
> there will apply here, but I'll talk about them anyway.
>
> As Flavio noticed, recursive calls into the bond transmit routines were
> not a good idea.  I discovered the same and worked around this issue by
> checking to see if we could take the bond->lock for writing before
> continuing.  If we could not get, I wanted to signal that this should be
> queued for transmission later.  Based on the flow of netpoll_send_skb
> (or possibly for another reason that is escaping me right now) I added
> one of these checks in bond_poll_controller too.  These aren't the
> prettiest fixes, but seemed to work well for me when I did this work in
> the past.  I realize the differences are not that great compared to some
> of the patches posted by Flavio, but I think they are worth trying.


Hmm, I still feel like this way is ugly, although it may work.
I guess David doesn't like it either.

Anyway, Flavio, could you try the following patch as well?

Thanks a lot!

>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index ef60244..d7b9b99 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1290,6 +1290,12 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
>   static void bond_poll_controller(struct net_device *bond_dev)
>   {
>   	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
> +	struct bonding *bond = netdev_priv(bond_dev);
> +
> +	if (!write_trylock(&bond->lock))
> +		return;
> +	write_unlock(&bond->lock);
> +
>   	if (dev != bond_dev)
>   		netpoll_poll_dev(dev);
>   }
> @@ -4418,7 +4424,11 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
>
>   static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
>   {
> -	const struct bonding *bond = netdev_priv(dev);
> +	struct bonding *bond = netdev_priv(dev);
> +
> +	if (!write_trylock(&bond->lock))
> +		return NETDEV_TX_BUSY;
> +	write_unlock(&bond->lock);
>
>   	switch (bond->params.mode) {
>   	case BOND_MODE_ROUNDROBIN:
>
> The other key to all of this is to make sure that queuing is done
> correctly now that we expect to queue these frames and have them sent at
> some point when there is a member of the bond that is actually capable
> of sending them out.
>
> The new style of sending queued skbs in a workqueue is much better than
> what was done in the 2.6.9 timeframe, but careful attention should still
> be paid to txq lock and which processor is the owner.  Returning
> something other than NETDEV_TX_OK from bond_start_xmit and checking for
> locks being held there should also help with any deadlocks that show up
> while running in queue_process (though they would not be recursive).
>
> I'm not in a good spot to test this right now, but I can take a look at
> next week and we can try and track down any of the other deadlocks that
> currently exist as I suspect this will not resolve all of the issues.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-07  9:57                     ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-07  9:57 UTC (permalink / raw)
  To: Andy Gospodarek
  Cc: Jay Vosburgh, Neil Horman, netdev, Andy Gospodarek, bridge,
	linux-kernel, David Miller, Flavio Leitner, Jeff Moyer,
	Matt Mackall, bonding-devel

On 06/05/10 03:18, Andy Gospodarek wrote:
> On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
>> On 06/02/10 02:42, Jay Vosburgh wrote:
>>> Cong Wang<amwang@redhat.com>   wrote:
>>>
>>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>>> Hi, Flavio,
>>>>>>
>>>>>> Please use the attached patch instead, try to see if it solves
>>>>>> all your problems.
>>>>>
>>>>> I tried and it hangs. No backtraces this time.
>>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>>> notification, so I think it won't work.
>>>>
>>>> Ah, I thought the same.
>>>>
>>>>>
>>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>>> patch applied, the netconsole would be disabled forever even with
>>>>> another healthy slave, right?
>>>>>
>>>>
>>>> Yes, this is an easy solution, because bonding has several modes,
>>>> it is complex to make netpoll work in different modes.
>>>
>>> 	If I understand correctly, the root cause of the problem with
>>> netconsole and bonding is that bonding is, ultimately, performing
>>> printks with a write lock held, and when netconsole recursively calls
>>> into bonding to send the printk over the netconsole, there is a deadlock
>>> (when the bonding xmit function attempts to acquire the same lock for
>>> read).
>>
>>
>> Yes.
>>
>>>
>>> 	You're trying to avoid the deadlock by shutting off netconsole
>>> (permanently, it looks like) for one problem case: a failover, which
>>> does some printks with a write lock held.
>>>
>>> 	This doesn't look to me like a complete solution, there are
>>> other cases in bonding that will do printk with write locks held.  I
>>> suspect those will also hang netconsole as things exist today, and won't
>>> be affected by your patch below.
>>
>>
>> I can expect that, bonding modes are complex.
>>
>>>
>>> 	For example:
>>>
>>> 	The sysfs functions to set the primary (bonding_store_primary)
>>> or active (bonding_store_active_slave) options: a pr_info is called to
>>> provide a log message of the results.  These could be tested by setting
>>> the primary or active options via sysfs, e.g.,
>>>
>>> echo eth0>   /sys/class/net/bond0/bonding/primary
>>> echo eth0>   /sys/class/net/bond0/bonding/active
>>>
>>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>>> calls within write_locks (bond_del_vlan, for example).
>>>
>>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>>> deadlock (because bonding holds its write_lock when calling the ndo_
>>> vlan functions).
>>>
>>> 	It also appears that (with the patch below) some nominally
>>> normal usage patterns will immediately disable netconsole.  The one that
>>> comes to mind is if the primary= option is set (to "eth1" for this
>>> example), but that slave not enslaved first (the slaves are added, say,
>>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>>> is added, the first thing that will happen is a failover, and that will
>>> disable netconsole.
>>>
>>
>> Thanks for your detailed explanation!
>>
>> This is why I said bonding is complex. I guess we would have to adjust
>> netpoll code for different bonding cases, one solution seems not fix all.
>> I am not sure how much work to do, since I am not familiar with bonding
>> code. Maybe Andy can help?
>>
>
> Sorry I've been silent until now.  This does seem quite similar to a
> problem I've previously encountered when dealing with bonding+netpoll on
> some old 2.6.9-based kernels.  There is no guarantee the methods used
> there will apply here, but I'll talk about them anyway.
>
> As Flavio noticed, recursive calls into the bond transmit routines were
> not a good idea.  I discovered the same and worked around this issue by
> checking to see if we could take the bond->lock for writing before
> continuing.  If we could not get, I wanted to signal that this should be
> queued for transmission later.  Based on the flow of netpoll_send_skb
> (or possibly for another reason that is escaping me right now) I added
> one of these checks in bond_poll_controller too.  These aren't the
> prettiest fixes, but seemed to work well for me when I did this work in
> the past.  I realize the differences are not that great compared to some
> of the patches posted by Flavio, but I think they are worth trying.


Hmm, I still feel like this way is ugly, although it may work.
I guess David doesn't like it either.

Anyway, Flavio, could you try the following patch as well?

Thanks a lot!

>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index ef60244..d7b9b99 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1290,6 +1290,12 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
>   static void bond_poll_controller(struct net_device *bond_dev)
>   {
>   	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
> +	struct bonding *bond = netdev_priv(bond_dev);
> +
> +	if (!write_trylock(&bond->lock))
> +		return;
> +	write_unlock(&bond->lock);
> +
>   	if (dev != bond_dev)
>   		netpoll_poll_dev(dev);
>   }
> @@ -4418,7 +4424,11 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
>
>   static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
>   {
> -	const struct bonding *bond = netdev_priv(dev);
> +	struct bonding *bond = netdev_priv(dev);
> +
> +	if (!write_trylock(&bond->lock))
> +		return NETDEV_TX_BUSY;
> +	write_unlock(&bond->lock);
>
>   	switch (bond->params.mode) {
>   	case BOND_MODE_ROUNDROBIN:
>
> The other key to all of this is to make sure that queuing is done
> correctly now that we expect to queue these frames and have them sent at
> some point when there is a member of the bond that is actually capable
> of sending them out.
>
> The new style of sending queued skbs in a workqueue is much better than
> what was done in the 2.6.9 timeframe, but careful attention should still
> be paid to txq lock and which processor is the owner.  Returning
> something other than NETDEV_TX_OK from bond_start_xmit and checking for
> locks being held there should also help with any deadlocks that show up
> while running in queue_process (though they would not be recursive).
>
> I'm not in a good spot to test this right now, but I can take a look at
> next week and we can try and track down any of the other deadlocks that
> currently exist as I suspect this will not resolve all of the issues.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-07  9:57                     ` [Bridge] " Cong Wang
@ 2010-06-07 10:01                       ` David Miller
  -1 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-06-07 10:01 UTC (permalink / raw)
  To: amwang
  Cc: andy, fubar, fbl, linux-kernel, mpm, netdev, bridge, gospo,
	nhorman, jmoyer, shemminger, bonding-devel

From: Cong Wang <amwang@redhat.com>
Date: Mon, 07 Jun 2010 17:57:49 +0800

> Hmm, I still feel like this way is ugly, although it may work.
> I guess David doesn't like it either.

Of course I don't like it. :-)

I suspect the locking scheme will need to be changed.

Besides, if we're going to hack this up and do write lock attempts in
the read locking paths, there is no point in using a rwlock any more.
And I'm personally in disfavor of all rwlock usage anyways (it dirties
the cacheline for readers just as equally for writers, and if the
critically protected code path is short enough, that shared cache
line atomic operation will be the predominant cost).

So I'd say, 1) make this a spinlock and 2) try to use RCU for the
read path.

That would fix everything.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-07 10:01                       ` David Miller
  0 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-06-07 10:01 UTC (permalink / raw)
  To: amwang
  Cc: bridge, nhorman, netdev, gospo, fubar, linux-kernel, fbl, jmoyer,
	mpm, bonding-devel

From: Cong Wang <amwang@redhat.com>
Date: Mon, 07 Jun 2010 17:57:49 +0800

> Hmm, I still feel like this way is ugly, although it may work.
> I guess David doesn't like it either.

Of course I don't like it. :-)

I suspect the locking scheme will need to be changed.

Besides, if we're going to hack this up and do write lock attempts in
the read locking paths, there is no point in using a rwlock any more.
And I'm personally in disfavor of all rwlock usage anyways (it dirties
the cacheline for readers just as equally for writers, and if the
critically protected code path is short enough, that shared cache
line atomic operation will be the predominant cost).

So I'd say, 1) make this a spinlock and 2) try to use RCU for the
read path.

That would fix everything.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-07  9:57                     ` [Bridge] " Cong Wang
@ 2010-06-07 13:03                       ` Andy Gospodarek
  -1 siblings, 0 replies; 73+ messages in thread
From: Andy Gospodarek @ 2010-06-07 13:03 UTC (permalink / raw)
  To: Cong Wang
  Cc: Andy Gospodarek, Jay Vosburgh, Flavio Leitner, linux-kernel,
	Matt Mackall, netdev, bridge, Andy Gospodarek, Neil Horman,
	Jeff Moyer, Stephen Hemminger, bonding-devel, David Miller

On Mon, Jun 07, 2010 at 05:57:49PM +0800, Cong Wang wrote:
> On 06/05/10 03:18, Andy Gospodarek wrote:
>> On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
>>> On 06/02/10 02:42, Jay Vosburgh wrote:
>>>> Cong Wang<amwang@redhat.com>   wrote:
>>>>
>>>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>>>> Hi, Flavio,
>>>>>>>
>>>>>>> Please use the attached patch instead, try to see if it solves
>>>>>>> all your problems.
>>>>>>
>>>>>> I tried and it hangs. No backtraces this time.
>>>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>>>> notification, so I think it won't work.
>>>>>
>>>>> Ah, I thought the same.
>>>>>
>>>>>>
>>>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>>>> patch applied, the netconsole would be disabled forever even with
>>>>>> another healthy slave, right?
>>>>>>
>>>>>
>>>>> Yes, this is an easy solution, because bonding has several modes,
>>>>> it is complex to make netpoll work in different modes.
>>>>
>>>> 	If I understand correctly, the root cause of the problem with
>>>> netconsole and bonding is that bonding is, ultimately, performing
>>>> printks with a write lock held, and when netconsole recursively calls
>>>> into bonding to send the printk over the netconsole, there is a deadlock
>>>> (when the bonding xmit function attempts to acquire the same lock for
>>>> read).
>>>
>>>
>>> Yes.
>>>
>>>>
>>>> 	You're trying to avoid the deadlock by shutting off netconsole
>>>> (permanently, it looks like) for one problem case: a failover, which
>>>> does some printks with a write lock held.
>>>>
>>>> 	This doesn't look to me like a complete solution, there are
>>>> other cases in bonding that will do printk with write locks held.  I
>>>> suspect those will also hang netconsole as things exist today, and won't
>>>> be affected by your patch below.
>>>
>>>
>>> I can expect that, bonding modes are complex.
>>>
>>>>
>>>> 	For example:
>>>>
>>>> 	The sysfs functions to set the primary (bonding_store_primary)
>>>> or active (bonding_store_active_slave) options: a pr_info is called to
>>>> provide a log message of the results.  These could be tested by setting
>>>> the primary or active options via sysfs, e.g.,
>>>>
>>>> echo eth0>   /sys/class/net/bond0/bonding/primary
>>>> echo eth0>   /sys/class/net/bond0/bonding/active
>>>>
>>>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>>>> calls within write_locks (bond_del_vlan, for example).
>>>>
>>>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>>>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>>>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>>>> deadlock (because bonding holds its write_lock when calling the ndo_
>>>> vlan functions).
>>>>
>>>> 	It also appears that (with the patch below) some nominally
>>>> normal usage patterns will immediately disable netconsole.  The one that
>>>> comes to mind is if the primary= option is set (to "eth1" for this
>>>> example), but that slave not enslaved first (the slaves are added, say,
>>>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>>>> is added, the first thing that will happen is a failover, and that will
>>>> disable netconsole.
>>>>
>>>
>>> Thanks for your detailed explanation!
>>>
>>> This is why I said bonding is complex. I guess we would have to adjust
>>> netpoll code for different bonding cases, one solution seems not fix all.
>>> I am not sure how much work to do, since I am not familiar with bonding
>>> code. Maybe Andy can help?
>>>
>>
>> Sorry I've been silent until now.  This does seem quite similar to a
>> problem I've previously encountered when dealing with bonding+netpoll on
>> some old 2.6.9-based kernels.  There is no guarantee the methods used
>> there will apply here, but I'll talk about them anyway.
>>
>> As Flavio noticed, recursive calls into the bond transmit routines were
>> not a good idea.  I discovered the same and worked around this issue by
>> checking to see if we could take the bond->lock for writing before
>> continuing.  If we could not get, I wanted to signal that this should be
>> queued for transmission later.  Based on the flow of netpoll_send_skb
>> (or possibly for another reason that is escaping me right now) I added
>> one of these checks in bond_poll_controller too.  These aren't the
>> prettiest fixes, but seemed to work well for me when I did this work in
>> the past.  I realize the differences are not that great compared to some
>> of the patches posted by Flavio, but I think they are worth trying.
>
>
> Hmm, I still feel like this way is ugly, although it may work.
> I guess David doesn't like it either.
>

Notice how I referred to it as a work-around? :)

It certainly isn't a great way to resolve the issue, but I wanted to
offer my opinon on the issue since you asked.

> Anyway, Flavio, could you try the following patch as well?
>
> Thanks a lot!
>
>>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index ef60244..d7b9b99 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -1290,6 +1290,12 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
>>   static void bond_poll_controller(struct net_device *bond_dev)
>>   {
>>   	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
>> +	struct bonding *bond = netdev_priv(bond_dev);
>> +
>> +	if (!write_trylock(&bond->lock))
>> +		return;
>> +	write_unlock(&bond->lock);
>> +
>>   	if (dev != bond_dev)
>>   		netpoll_poll_dev(dev);
>>   }
>> @@ -4418,7 +4424,11 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
>>
>>   static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>> -	const struct bonding *bond = netdev_priv(dev);
>> +	struct bonding *bond = netdev_priv(dev);
>> +
>> +	if (!write_trylock(&bond->lock))
>> +		return NETDEV_TX_BUSY;
>> +	write_unlock(&bond->lock);
>>
>>   	switch (bond->params.mode) {
>>   	case BOND_MODE_ROUNDROBIN:
>>
>> The other key to all of this is to make sure that queuing is done
>> correctly now that we expect to queue these frames and have them sent at
>> some point when there is a member of the bond that is actually capable
>> of sending them out.
>>
>> The new style of sending queued skbs in a workqueue is much better than
>> what was done in the 2.6.9 timeframe, but careful attention should still
>> be paid to txq lock and which processor is the owner.  Returning
>> something other than NETDEV_TX_OK from bond_start_xmit and checking for
>> locks being held there should also help with any deadlocks that show up
>> while running in queue_process (though they would not be recursive).
>>
>> I'm not in a good spot to test this right now, but I can take a look at
>> next week and we can try and track down any of the other deadlocks that
>> currently exist as I suspect this will not resolve all of the issues.
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-07 13:03                       ` Andy Gospodarek
  0 siblings, 0 replies; 73+ messages in thread
From: Andy Gospodarek @ 2010-06-07 13:03 UTC (permalink / raw)
  To: Cong Wang
  Cc: bridge, Neil Horman, netdev, Andy Gospodarek, Jay Vosburgh,
	linux-kernel, David Miller, Flavio Leitner, Jeff Moyer,
	Matt Mackall, bonding-devel

On Mon, Jun 07, 2010 at 05:57:49PM +0800, Cong Wang wrote:
> On 06/05/10 03:18, Andy Gospodarek wrote:
>> On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
>>> On 06/02/10 02:42, Jay Vosburgh wrote:
>>>> Cong Wang<amwang@redhat.com>   wrote:
>>>>
>>>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>>>> Hi, Flavio,
>>>>>>>
>>>>>>> Please use the attached patch instead, try to see if it solves
>>>>>>> all your problems.
>>>>>>
>>>>>> I tried and it hangs. No backtraces this time.
>>>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>>>> notification, so I think it won't work.
>>>>>
>>>>> Ah, I thought the same.
>>>>>
>>>>>>
>>>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>>>> patch applied, the netconsole would be disabled forever even with
>>>>>> another healthy slave, right?
>>>>>>
>>>>>
>>>>> Yes, this is an easy solution, because bonding has several modes,
>>>>> it is complex to make netpoll work in different modes.
>>>>
>>>> 	If I understand correctly, the root cause of the problem with
>>>> netconsole and bonding is that bonding is, ultimately, performing
>>>> printks with a write lock held, and when netconsole recursively calls
>>>> into bonding to send the printk over the netconsole, there is a deadlock
>>>> (when the bonding xmit function attempts to acquire the same lock for
>>>> read).
>>>
>>>
>>> Yes.
>>>
>>>>
>>>> 	You're trying to avoid the deadlock by shutting off netconsole
>>>> (permanently, it looks like) for one problem case: a failover, which
>>>> does some printks with a write lock held.
>>>>
>>>> 	This doesn't look to me like a complete solution, there are
>>>> other cases in bonding that will do printk with write locks held.  I
>>>> suspect those will also hang netconsole as things exist today, and won't
>>>> be affected by your patch below.
>>>
>>>
>>> I can expect that, bonding modes are complex.
>>>
>>>>
>>>> 	For example:
>>>>
>>>> 	The sysfs functions to set the primary (bonding_store_primary)
>>>> or active (bonding_store_active_slave) options: a pr_info is called to
>>>> provide a log message of the results.  These could be tested by setting
>>>> the primary or active options via sysfs, e.g.,
>>>>
>>>> echo eth0>   /sys/class/net/bond0/bonding/primary
>>>> echo eth0>   /sys/class/net/bond0/bonding/active
>>>>
>>>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>>>> calls within write_locks (bond_del_vlan, for example).
>>>>
>>>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>>>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>>>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>>>> deadlock (because bonding holds its write_lock when calling the ndo_
>>>> vlan functions).
>>>>
>>>> 	It also appears that (with the patch below) some nominally
>>>> normal usage patterns will immediately disable netconsole.  The one that
>>>> comes to mind is if the primary= option is set (to "eth1" for this
>>>> example), but that slave not enslaved first (the slaves are added, say,
>>>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>>>> is added, the first thing that will happen is a failover, and that will
>>>> disable netconsole.
>>>>
>>>
>>> Thanks for your detailed explanation!
>>>
>>> This is why I said bonding is complex. I guess we would have to adjust
>>> netpoll code for different bonding cases, one solution seems not fix all.
>>> I am not sure how much work to do, since I am not familiar with bonding
>>> code. Maybe Andy can help?
>>>
>>
>> Sorry I've been silent until now.  This does seem quite similar to a
>> problem I've previously encountered when dealing with bonding+netpoll on
>> some old 2.6.9-based kernels.  There is no guarantee the methods used
>> there will apply here, but I'll talk about them anyway.
>>
>> As Flavio noticed, recursive calls into the bond transmit routines were
>> not a good idea.  I discovered the same and worked around this issue by
>> checking to see if we could take the bond->lock for writing before
>> continuing.  If we could not get, I wanted to signal that this should be
>> queued for transmission later.  Based on the flow of netpoll_send_skb
>> (or possibly for another reason that is escaping me right now) I added
>> one of these checks in bond_poll_controller too.  These aren't the
>> prettiest fixes, but seemed to work well for me when I did this work in
>> the past.  I realize the differences are not that great compared to some
>> of the patches posted by Flavio, but I think they are worth trying.
>
>
> Hmm, I still feel like this way is ugly, although it may work.
> I guess David doesn't like it either.
>

Notice how I referred to it as a work-around? :)

It certainly isn't a great way to resolve the issue, but I wanted to
offer my opinon on the issue since you asked.

> Anyway, Flavio, could you try the following patch as well?
>
> Thanks a lot!
>
>>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index ef60244..d7b9b99 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -1290,6 +1290,12 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
>>   static void bond_poll_controller(struct net_device *bond_dev)
>>   {
>>   	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
>> +	struct bonding *bond = netdev_priv(bond_dev);
>> +
>> +	if (!write_trylock(&bond->lock))
>> +		return;
>> +	write_unlock(&bond->lock);
>> +
>>   	if (dev != bond_dev)
>>   		netpoll_poll_dev(dev);
>>   }
>> @@ -4418,7 +4424,11 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
>>
>>   static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>> -	const struct bonding *bond = netdev_priv(dev);
>> +	struct bonding *bond = netdev_priv(dev);
>> +
>> +	if (!write_trylock(&bond->lock))
>> +		return NETDEV_TX_BUSY;
>> +	write_unlock(&bond->lock);
>>
>>   	switch (bond->params.mode) {
>>   	case BOND_MODE_ROUNDROBIN:
>>
>> The other key to all of this is to make sure that queuing is done
>> correctly now that we expect to queue these frames and have them sent at
>> some point when there is a member of the bond that is actually capable
>> of sending them out.
>>
>> The new style of sending queued skbs in a workqueue is much better than
>> what was done in the 2.6.9 timeframe, but careful attention should still
>> be paid to txq lock and which processor is the owner.  Returning
>> something other than NETDEV_TX_OK from bond_start_xmit and checking for
>> locks being held there should also help with any deadlocks that show up
>> while running in queue_process (though they would not be recursive).
>>
>> I'm not in a good spot to test this right now, but I can take a look at
>> next week and we can try and track down any of the other deadlocks that
>> currently exist as I suspect this will not resolve all of the issues.
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [PATCH] netconsole: queue console messages to send later
  2010-06-01 18:42               ` [Bridge] " Jay Vosburgh
@ 2010-06-07 19:24                 ` Flavio Leitner
  -1 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-06-07 19:24 UTC (permalink / raw)
  To: netdev
  Cc: David Miller, Cong Wang, Jay Vosburgh, Flavio Leitner,
	Matt Mackall, Andy Gospodarek, Neil Horman, Jeff Moyer,
	Stephen Hemminger, lkml, bridge, bonding-devel, Flavio Leitner

There are some networking drivers that hold a lock in the
transmit path. Therefore, if a console message is printed
after that, netconsole will push it through the transmit path,
resulting in a deadlock.

This patch fixes the re-injection problem by queuing the console
messages in a preallocated circular buffer and then scheduling a
workqueue to send them later with another context.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 drivers/net/netconsole.c |  156 +++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 133 insertions(+), 23 deletions(-)

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index ca142c4..874376d 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -44,6 +44,8 @@
 #include <linux/netpoll.h>
 #include <linux/inet.h>
 #include <linux/configfs.h>
+#include <linux/workqueue.h>
+#include <linux/circ_buf.h>
 
 MODULE_AUTHOR("Maintainer: Matt Mackall <mpm@selenic.com>");
 MODULE_DESCRIPTION("Console driver for network interfaces");
@@ -56,6 +58,10 @@ static char config[MAX_PARAM_LENGTH];
 module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0);
 MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]");
 
+static int logsize = PAGE_SIZE;
+module_param(logsize, int, 0444);
+MODULE_PARM_DESC(logsize, "netconsole buffer size");
+
 #ifndef	MODULE
 static int __init option_setup(char *opt)
 {
@@ -100,6 +106,75 @@ struct netconsole_target {
 	struct netpoll		np;
 };
 
+struct netconsole_msg_ctl {
+	struct circ_buf		messages;
+	unsigned long		ring_size;
+	struct page		*buffer_page;
+	struct work_struct 	tx_work;
+};
+static struct netconsole_msg_ctl *netconsole_ctl;
+
+#define RING_INC_POS(pos, inc, size) ((pos + inc) & (size - 1))
+
+static void netconsole_target_get(struct netconsole_target *nt);
+static void netconsole_target_put(struct netconsole_target *nt);
+
+static void netconsole_start_xmit(const char *msg, unsigned int length)
+{
+	int frag, left;
+	unsigned long flags;
+	struct netconsole_target *nt;
+	const char *tmp;
+
+	/* Avoid taking lock and disabling interrupts unnecessarily */
+	if (list_empty(&target_list))
+		return;
+
+	spin_lock_irqsave(&target_list_lock, flags);
+	list_for_each_entry(nt, &target_list, list) {
+		netconsole_target_get(nt);
+		if (nt->enabled && netif_running(nt->np.dev)) {
+			/*
+			 * We nest this inside the for-each-target loop above
+			 * so that we're able to get as much logging out to
+			 * at least one target if we die inside here, instead
+			 * of unnecessarily keeping all targets in lock-step.
+			 */
+			tmp = msg;
+			for (left = length; left;) {
+				frag = min(left, MAX_PRINT_CHUNK);
+				netpoll_send_udp(&nt->np, tmp, frag);
+				tmp += frag;
+				left -= frag;
+			}
+		}
+		netconsole_target_put(nt);
+	}
+	spin_unlock_irqrestore(&target_list_lock, flags);
+}
+
+static void netconsole_process_queue(struct work_struct *work)
+{
+	struct circ_buf *messages = &netconsole_ctl->messages;
+	unsigned long ring_size = netconsole_ctl->ring_size;
+	unsigned long head = ACCESS_ONCE(messages->head);
+	unsigned long len;
+
+	while (CIRC_CNT(head, messages->tail, ring_size) >= 1) {
+		/* read index before reading contents at that index */
+		smp_read_barrier_depends();
+
+		/* pick up a length that don't wrap in the middle */
+		len = CIRC_CNT_TO_END(head, messages->tail, ring_size);
+		netconsole_start_xmit(&messages->buf[messages->tail], len);
+
+		/* finish reading descriptor before incrementing tail */
+		smp_mb();
+		messages->tail = RING_INC_POS(messages->tail, len, ring_size);
+		head = ACCESS_ONCE(messages->head);
+	}
+}
+
 #ifdef	CONFIG_NETCONSOLE_DYNAMIC
 
 static struct configfs_subsystem netconsole_subsys;
@@ -702,38 +777,43 @@ static struct notifier_block netconsole_netdev_notifier = {
 	.notifier_call  = netconsole_netdev_event,
 };
 
+/* called with console sem, interrupts disabled */
 static void write_msg(struct console *con, const char *msg, unsigned int len)
 {
-	int frag, left;
-	unsigned long flags;
-	struct netconsole_target *nt;
-	const char *tmp;
+	struct circ_buf *messages = &netconsole_ctl->messages;
+	unsigned long ring_size = netconsole_ctl->ring_size;
+	unsigned long tail = ACCESS_ONCE(messages->tail);
+	unsigned long left;
+	unsigned long end;
+	unsigned long pos;
 
 	/* Avoid taking lock and disabling interrupts unnecessarily */
 	if (list_empty(&target_list))
 		return;
 
-	spin_lock_irqsave(&target_list_lock, flags);
-	list_for_each_entry(nt, &target_list, list) {
-		netconsole_target_get(nt);
-		if (nt->enabled && netif_running(nt->np.dev)) {
-			/*
-			 * We nest this inside the for-each-target loop above
-			 * so that we're able to get as much logging out to
-			 * at least one target if we die inside here, instead
-			 * of unnecessarily keeping all targets in lock-step.
-			 */
-			tmp = msg;
-			for (left = len; left;) {
-				frag = min(left, MAX_PRINT_CHUNK);
-				netpoll_send_udp(&nt->np, tmp, frag);
-				tmp += frag;
-				left -= frag;
-			}
+	pos = 0;
+	left = len;
+	while (left && CIRC_SPACE(messages->head, tail, ring_size) >= 1) {
+		end = CIRC_SPACE_TO_END(messages->head, tail, ring_size);
+		/* fast path, no wrapping is needed */
+		if (end >= left) {
+			memcpy(&messages->buf[messages->head], &msg[pos], left);
+			smp_wmb(); 
+			messages->head = RING_INC_POS(messages->head, left, ring_size);
+			left = 0;
 		}
-		netconsole_target_put(nt);
+		else {
+			/* copy up to the end */
+			memcpy(&messages->buf[messages->head], &msg[pos], end);
+			smp_wmb(); 
+			messages->head = RING_INC_POS(messages->head, end, ring_size);
+			left -= end;
+			pos += end;
+		}
+
 	}
-	spin_unlock_irqrestore(&target_list_lock, flags);
+
+	schedule_work(&netconsole_ctl->tx_work);
 }
 
 static struct console netconsole = {
@@ -746,9 +826,25 @@ static int __init init_netconsole(void)
 {
 	int err;
 	struct netconsole_target *nt, *tmp;
+	struct circ_buf *messages;
 	unsigned long flags;
 	char *target_config;
 	char *input = config;
+	int order = get_order(logsize);
+
+	err = -ENOMEM;
+	netconsole_ctl = kzalloc(sizeof(*netconsole_ctl), GFP_KERNEL);
+	if (netconsole_ctl == NULL)
+		goto nomem;
+
+	netconsole_ctl->buffer_page = alloc_pages(GFP_KERNEL, order);
+	if (netconsole_ctl->buffer_page == NULL)
+		goto nopage;
+
+	netconsole_ctl->ring_size = (PAGE_SIZE << order);
+	messages = &netconsole_ctl->messages;
+	messages->buf = page_address(netconsole_ctl->buffer_page);
+	INIT_WORK(&netconsole_ctl->tx_work, netconsole_process_queue);
 
 	if (strnlen(input, MAX_PARAM_LENGTH)) {
 		while ((target_config = strsep(&input, ";"))) {
@@ -795,6 +891,11 @@ fail:
 		free_param_target(nt);
 	}
 
+	__free_pages(netconsole_ctl->buffer_page, order);
+nopage:
+	kfree(netconsole_ctl);
+
+nomem:
 	return err;
 }
 
@@ -806,6 +907,10 @@ static void __exit cleanup_netconsole(void)
 	dynamic_netconsole_exit();
 	unregister_netdevice_notifier(&netconsole_netdev_notifier);
 
+	flush_work(&netconsole_ctl->tx_work);
+	cancel_work_sync(&netconsole_ctl->tx_work);
+	netconsole_process_queue(NULL);
+
 	/*
 	 * Targets created via configfs pin references on our module
 	 * and would first be rmdir(2)'ed from userspace. We reach
@@ -818,6 +923,11 @@ static void __exit cleanup_netconsole(void)
 		list_del(&nt->list);
 		free_param_target(nt);
 	}
+
+	__free_pages(netconsole_ctl->buffer_page,
+			get_order(netconsole_ctl->ring_size));
+
+	kfree(netconsole_ctl);
 }
 
 module_init(init_netconsole);
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-07 19:24                 ` Flavio Leitner
  0 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-06-07 19:24 UTC (permalink / raw)
  To: netdev
  Cc: bridge, Cong Wang, Neil Horman, Andy Gospodarek, Jay Vosburgh,
	lkml, bonding-devel, Flavio Leitner, Jeff Moyer, Flavio Leitner,
	Matt Mackall, David Miller

There are some networking drivers that hold a lock in the
transmit path. Therefore, if a console message is printed
after that, netconsole will push it through the transmit path,
resulting in a deadlock.

This patch fixes the re-injection problem by queuing the console
messages in a preallocated circular buffer and then scheduling a
workqueue to send them later with another context.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 drivers/net/netconsole.c |  156 +++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 133 insertions(+), 23 deletions(-)

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index ca142c4..874376d 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -44,6 +44,8 @@
 #include <linux/netpoll.h>
 #include <linux/inet.h>
 #include <linux/configfs.h>
+#include <linux/workqueue.h>
+#include <linux/circ_buf.h>
 
 MODULE_AUTHOR("Maintainer: Matt Mackall <mpm@selenic.com>");
 MODULE_DESCRIPTION("Console driver for network interfaces");
@@ -56,6 +58,10 @@ static char config[MAX_PARAM_LENGTH];
 module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0);
 MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]");
 
+static int logsize = PAGE_SIZE;
+module_param(logsize, int, 0444);
+MODULE_PARM_DESC(logsize, "netconsole buffer size");
+
 #ifndef	MODULE
 static int __init option_setup(char *opt)
 {
@@ -100,6 +106,75 @@ struct netconsole_target {
 	struct netpoll		np;
 };
 
+struct netconsole_msg_ctl {
+	struct circ_buf		messages;
+	unsigned long		ring_size;
+	struct page		*buffer_page;
+	struct work_struct 	tx_work;
+};
+static struct netconsole_msg_ctl *netconsole_ctl;
+
+#define RING_INC_POS(pos, inc, size) ((pos + inc) & (size - 1))
+
+static void netconsole_target_get(struct netconsole_target *nt);
+static void netconsole_target_put(struct netconsole_target *nt);
+
+static void netconsole_start_xmit(const char *msg, unsigned int length)
+{
+	int frag, left;
+	unsigned long flags;
+	struct netconsole_target *nt;
+	const char *tmp;
+
+	/* Avoid taking lock and disabling interrupts unnecessarily */
+	if (list_empty(&target_list))
+		return;
+
+	spin_lock_irqsave(&target_list_lock, flags);
+	list_for_each_entry(nt, &target_list, list) {
+		netconsole_target_get(nt);
+		if (nt->enabled && netif_running(nt->np.dev)) {
+			/*
+			 * We nest this inside the for-each-target loop above
+			 * so that we're able to get as much logging out to
+			 * at least one target if we die inside here, instead
+			 * of unnecessarily keeping all targets in lock-step.
+			 */
+			tmp = msg;
+			for (left = length; left;) {
+				frag = min(left, MAX_PRINT_CHUNK);
+				netpoll_send_udp(&nt->np, tmp, frag);
+				tmp += frag;
+				left -= frag;
+			}
+		}
+		netconsole_target_put(nt);
+	}
+	spin_unlock_irqrestore(&target_list_lock, flags);
+}
+
+static void netconsole_process_queue(struct work_struct *work)
+{
+	struct circ_buf *messages = &netconsole_ctl->messages;
+	unsigned long ring_size = netconsole_ctl->ring_size;
+	unsigned long head = ACCESS_ONCE(messages->head);
+	unsigned long len;
+
+	while (CIRC_CNT(head, messages->tail, ring_size) >= 1) {
+		/* read index before reading contents at that index */
+		smp_read_barrier_depends();
+
+		/* pick up a length that don't wrap in the middle */
+		len = CIRC_CNT_TO_END(head, messages->tail, ring_size);
+		netconsole_start_xmit(&messages->buf[messages->tail], len);
+
+		/* finish reading descriptor before incrementing tail */
+		smp_mb();
+		messages->tail = RING_INC_POS(messages->tail, len, ring_size);
+		head = ACCESS_ONCE(messages->head);
+	}
+}
+
 #ifdef	CONFIG_NETCONSOLE_DYNAMIC
 
 static struct configfs_subsystem netconsole_subsys;
@@ -702,38 +777,43 @@ static struct notifier_block netconsole_netdev_notifier = {
 	.notifier_call  = netconsole_netdev_event,
 };
 
+/* called with console sem, interrupts disabled */
 static void write_msg(struct console *con, const char *msg, unsigned int len)
 {
-	int frag, left;
-	unsigned long flags;
-	struct netconsole_target *nt;
-	const char *tmp;
+	struct circ_buf *messages = &netconsole_ctl->messages;
+	unsigned long ring_size = netconsole_ctl->ring_size;
+	unsigned long tail = ACCESS_ONCE(messages->tail);
+	unsigned long left;
+	unsigned long end;
+	unsigned long pos;
 
 	/* Avoid taking lock and disabling interrupts unnecessarily */
 	if (list_empty(&target_list))
 		return;
 
-	spin_lock_irqsave(&target_list_lock, flags);
-	list_for_each_entry(nt, &target_list, list) {
-		netconsole_target_get(nt);
-		if (nt->enabled && netif_running(nt->np.dev)) {
-			/*
-			 * We nest this inside the for-each-target loop above
-			 * so that we're able to get as much logging out to
-			 * at least one target if we die inside here, instead
-			 * of unnecessarily keeping all targets in lock-step.
-			 */
-			tmp = msg;
-			for (left = len; left;) {
-				frag = min(left, MAX_PRINT_CHUNK);
-				netpoll_send_udp(&nt->np, tmp, frag);
-				tmp += frag;
-				left -= frag;
-			}
+	pos = 0;
+	left = len;
+	while (left && CIRC_SPACE(messages->head, tail, ring_size) >= 1) {
+		end = CIRC_SPACE_TO_END(messages->head, tail, ring_size);
+		/* fast path, no wrapping is needed */
+		if (end >= left) {
+			memcpy(&messages->buf[messages->head], &msg[pos], left);
+			smp_wmb(); 
+			messages->head = RING_INC_POS(messages->head, left, ring_size);
+			left = 0;
 		}
-		netconsole_target_put(nt);
+		else {
+			/* copy up to the end */
+			memcpy(&messages->buf[messages->head], &msg[pos], end);
+			smp_wmb(); 
+			messages->head = RING_INC_POS(messages->head, end, ring_size);
+			left -= end;
+			pos += end;
+		}
+
 	}
-	spin_unlock_irqrestore(&target_list_lock, flags);
+
+	schedule_work(&netconsole_ctl->tx_work);
 }
 
 static struct console netconsole = {
@@ -746,9 +826,25 @@ static int __init init_netconsole(void)
 {
 	int err;
 	struct netconsole_target *nt, *tmp;
+	struct circ_buf *messages;
 	unsigned long flags;
 	char *target_config;
 	char *input = config;
+	int order = get_order(logsize);
+
+	err = -ENOMEM;
+	netconsole_ctl = kzalloc(sizeof(*netconsole_ctl), GFP_KERNEL);
+	if (netconsole_ctl == NULL)
+		goto nomem;
+
+	netconsole_ctl->buffer_page = alloc_pages(GFP_KERNEL, order);
+	if (netconsole_ctl->buffer_page == NULL)
+		goto nopage;
+
+	netconsole_ctl->ring_size = (PAGE_SIZE << order);
+	messages = &netconsole_ctl->messages;
+	messages->buf = page_address(netconsole_ctl->buffer_page);
+	INIT_WORK(&netconsole_ctl->tx_work, netconsole_process_queue);
 
 	if (strnlen(input, MAX_PARAM_LENGTH)) {
 		while ((target_config = strsep(&input, ";"))) {
@@ -795,6 +891,11 @@ fail:
 		free_param_target(nt);
 	}
 
+	__free_pages(netconsole_ctl->buffer_page, order);
+nopage:
+	kfree(netconsole_ctl);
+
+nomem:
 	return err;
 }
 
@@ -806,6 +907,10 @@ static void __exit cleanup_netconsole(void)
 	dynamic_netconsole_exit();
 	unregister_netdevice_notifier(&netconsole_netdev_notifier);
 
+	flush_work(&netconsole_ctl->tx_work);
+	cancel_work_sync(&netconsole_ctl->tx_work);
+	netconsole_process_queue(NULL);
+
 	/*
 	 * Targets created via configfs pin references on our module
 	 * and would first be rmdir(2)'ed from userspace. We reach
@@ -818,6 +923,11 @@ static void __exit cleanup_netconsole(void)
 		list_del(&nt->list);
 		free_param_target(nt);
 	}
+
+	__free_pages(netconsole_ctl->buffer_page,
+			get_order(netconsole_ctl->ring_size));
+
+	kfree(netconsole_ctl);
 }
 
 module_init(init_netconsole);
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH] netconsole: queue console messages to send later
  2010-06-07 19:24                 ` [Bridge] " Flavio Leitner
@ 2010-06-07 19:50                   ` Matt Mackall
  -1 siblings, 0 replies; 73+ messages in thread
From: Matt Mackall @ 2010-06-07 19:50 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: netdev, David Miller, Cong Wang, Jay Vosburgh, Flavio Leitner,
	Andy Gospodarek, Neil Horman, Jeff Moyer, Stephen Hemminger,
	lkml, bridge, bonding-devel

On Mon, 2010-06-07 at 16:24 -0300, Flavio Leitner wrote:
> There are some networking drivers that hold a lock in the
> transmit path. Therefore, if a console message is printed
> after that, netconsole will push it through the transmit path,
> resulting in a deadlock.

This is an ongoing pain we've known about since before introducing the
netpoll code to the tree.

My take has always been that any form of queueing is contrary to the
goal of netpoll: timely delivery of messages even during machine-killing
situations like oopses. There may never be a second chance to deliver
the message as the machine may be locked solid. And there may be no
other way to get the message out of the box in such situations. Adding
queueing is a throwing-the-baby-out-with-the-bathwater fix.

I think Dave agrees with me here, and I believe he's said in the past
that drivers trying to print messages in such contexts should be
considered buggy.

-- 
Mathematics is the supreme nostalgia of our time.



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-07 19:50                   ` Matt Mackall
  0 siblings, 0 replies; 73+ messages in thread
From: Matt Mackall @ 2010-06-07 19:50 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: bridge, Cong Wang, Neil Horman, netdev, Jay Vosburgh, lkml,
	bonding-devel, Flavio Leitner, Jeff Moyer, Andy Gospodarek,
	David Miller

On Mon, 2010-06-07 at 16:24 -0300, Flavio Leitner wrote:
> There are some networking drivers that hold a lock in the
> transmit path. Therefore, if a console message is printed
> after that, netconsole will push it through the transmit path,
> resulting in a deadlock.

This is an ongoing pain we've known about since before introducing the
netpoll code to the tree.

My take has always been that any form of queueing is contrary to the
goal of netpoll: timely delivery of messages even during machine-killing
situations like oopses. There may never be a second chance to deliver
the message as the machine may be locked solid. And there may be no
other way to get the message out of the box in such situations. Adding
queueing is a throwing-the-baby-out-with-the-bathwater fix.

I think Dave agrees with me here, and I believe he's said in the past
that drivers trying to print messages in such contexts should be
considered buggy.

-- 
Mathematics is the supreme nostalgia of our time.



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] netconsole: queue console messages to send later
  2010-06-07 19:50                   ` [Bridge] " Matt Mackall
@ 2010-06-07 20:00                     ` Stephen Hemminger
  -1 siblings, 0 replies; 73+ messages in thread
From: Stephen Hemminger @ 2010-06-07 20:00 UTC (permalink / raw)
  To: Matt Mackall
  Cc: Flavio Leitner, netdev, David Miller, Cong Wang, Jay Vosburgh,
	Flavio Leitner, Andy Gospodarek, Neil Horman, Jeff Moyer, lkml,
	bridge, bonding-devel

On Mon, 07 Jun 2010 14:50:48 -0500
Matt Mackall <mpm@selenic.com> wrote:

> On Mon, 2010-06-07 at 16:24 -0300, Flavio Leitner wrote:
> > There are some networking drivers that hold a lock in the
> > transmit path. Therefore, if a console message is printed
> > after that, netconsole will push it through the transmit path,
> > resulting in a deadlock.
> 
> This is an ongoing pain we've known about since before introducing the
> netpoll code to the tree.
> 
> My take has always been that any form of queueing is contrary to the
> goal of netpoll: timely delivery of messages even during machine-killing
> situations like oopses. There may never be a second chance to deliver
> the message as the machine may be locked solid. And there may be no
> other way to get the message out of the box in such situations. Adding
> queueing is a throwing-the-baby-out-with-the-bathwater fix.
> 
> I think Dave agrees with me here, and I believe he's said in the past
> that drivers trying to print messages in such contexts should be
> considered buggy.
> 

Because it to hard to fix all possible device configurations.
There should be any way to detect recursion and just drop the message to
avoid deadlock.

-- 

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-07 20:00                     ` Stephen Hemminger
  0 siblings, 0 replies; 73+ messages in thread
From: Stephen Hemminger @ 2010-06-07 20:00 UTC (permalink / raw)
  To: Matt Mackall
  Cc: bridge, Flavio Leitner, bonding-devel, Cong Wang, Neil Horman,
	netdev, Jay Vosburgh, lkml, David, Flavio Leitner, Jeff Moyer,
	Andy Gospodarek, Miller

On Mon, 07 Jun 2010 14:50:48 -0500
Matt Mackall <mpm@selenic.com> wrote:

> On Mon, 2010-06-07 at 16:24 -0300, Flavio Leitner wrote:
> > There are some networking drivers that hold a lock in the
> > transmit path. Therefore, if a console message is printed
> > after that, netconsole will push it through the transmit path,
> > resulting in a deadlock.
> 
> This is an ongoing pain we've known about since before introducing the
> netpoll code to the tree.
> 
> My take has always been that any form of queueing is contrary to the
> goal of netpoll: timely delivery of messages even during machine-killing
> situations like oopses. There may never be a second chance to deliver
> the message as the machine may be locked solid. And there may be no
> other way to get the message out of the box in such situations. Adding
> queueing is a throwing-the-baby-out-with-the-bathwater fix.
> 
> I think Dave agrees with me here, and I believe he's said in the past
> that drivers trying to print messages in such contexts should be
> considered buggy.
> 

Because it to hard to fix all possible device configurations.
There should be any way to detect recursion and just drop the message to
avoid deadlock.

-- 

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] netconsole: queue console messages to send later
  2010-06-07 20:00                     ` [Bridge] " Stephen Hemminger
@ 2010-06-07 20:21                       ` Matt Mackall
  -1 siblings, 0 replies; 73+ messages in thread
From: Matt Mackall @ 2010-06-07 20:21 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Flavio Leitner, netdev, David Miller, Cong Wang, Jay Vosburgh,
	Flavio Leitner, Andy Gospodarek, Neil Horman, Jeff Moyer, lkml,
	bridge, bonding-devel

On Mon, 2010-06-07 at 13:00 -0700, Stephen Hemminger wrote:
> On Mon, 07 Jun 2010 14:50:48 -0500
> Matt Mackall <mpm@selenic.com> wrote:
> 
> > On Mon, 2010-06-07 at 16:24 -0300, Flavio Leitner wrote:
> > > There are some networking drivers that hold a lock in the
> > > transmit path. Therefore, if a console message is printed
> > > after that, netconsole will push it through the transmit path,
> > > resulting in a deadlock.
> > 
> > This is an ongoing pain we've known about since before introducing the
> > netpoll code to the tree.
> > 
> > My take has always been that any form of queueing is contrary to the
> > goal of netpoll: timely delivery of messages even during machine-killing
> > situations like oopses. There may never be a second chance to deliver
> > the message as the machine may be locked solid. And there may be no
> > other way to get the message out of the box in such situations. Adding
> > queueing is a throwing-the-baby-out-with-the-bathwater fix.
> > 
> > I think Dave agrees with me here, and I believe he's said in the past
> > that drivers trying to print messages in such contexts should be
> > considered buggy.
> > 
> 
> Because it to hard to fix all possible device configurations.
> There should be any way to detect recursion and just drop the message to
> avoid deadlock.

Open to suggestions. The locks in question are driver-internal. There
also may not be any actual recursion taking place:

driver path a takes private lock x
driver path a attempts printk
printk calls into netconsole
netconsole calls into driver path b
driver path b attempts to take lock x -> deadlock

So we can't even try to walk back the stack looking for such nonsense.
Though we could perhaps force queuing of all messages -from- the driver
bound to netconsole. Tricky, and not quite foolproof.

-- 
Mathematics is the supreme nostalgia of our time.



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-07 20:21                       ` Matt Mackall
  0 siblings, 0 replies; 73+ messages in thread
From: Matt Mackall @ 2010-06-07 20:21 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: bridge, Flavio Leitner, Cong Wang, Neil Horman, netdev,
	Jay Vosburgh, lkml, bonding-devel, Flavio Leitner, Jeff Moyer,
	Andy Gospodarek, David Miller

On Mon, 2010-06-07 at 13:00 -0700, Stephen Hemminger wrote:
> On Mon, 07 Jun 2010 14:50:48 -0500
> Matt Mackall <mpm@selenic.com> wrote:
> 
> > On Mon, 2010-06-07 at 16:24 -0300, Flavio Leitner wrote:
> > > There are some networking drivers that hold a lock in the
> > > transmit path. Therefore, if a console message is printed
> > > after that, netconsole will push it through the transmit path,
> > > resulting in a deadlock.
> > 
> > This is an ongoing pain we've known about since before introducing the
> > netpoll code to the tree.
> > 
> > My take has always been that any form of queueing is contrary to the
> > goal of netpoll: timely delivery of messages even during machine-killing
> > situations like oopses. There may never be a second chance to deliver
> > the message as the machine may be locked solid. And there may be no
> > other way to get the message out of the box in such situations. Adding
> > queueing is a throwing-the-baby-out-with-the-bathwater fix.
> > 
> > I think Dave agrees with me here, and I believe he's said in the past
> > that drivers trying to print messages in such contexts should be
> > considered buggy.
> > 
> 
> Because it to hard to fix all possible device configurations.
> There should be any way to detect recursion and just drop the message to
> avoid deadlock.

Open to suggestions. The locks in question are driver-internal. There
also may not be any actual recursion taking place:

driver path a takes private lock x
driver path a attempts printk
printk calls into netconsole
netconsole calls into driver path b
driver path b attempts to take lock x -> deadlock

So we can't even try to walk back the stack looking for such nonsense.
Though we could perhaps force queuing of all messages -from- the driver
bound to netconsole. Tricky, and not quite foolproof.

-- 
Mathematics is the supreme nostalgia of our time.



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] netconsole: queue console messages to send later
  2010-06-07 19:24                 ` [Bridge] " Flavio Leitner
@ 2010-06-07 23:50                   ` David Miller
  -1 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-06-07 23:50 UTC (permalink / raw)
  To: fleitner
  Cc: netdev, amwang, fubar, fbl, mpm, gospo, nhorman, jmoyer,
	shemminger, linux-kernel, bridge, bonding-devel

From: Flavio Leitner <fleitner@redhat.com>
Date: Mon,  7 Jun 2010 16:24:52 -0300

> There are some networking drivers that hold a lock in the
> transmit path. Therefore, if a console message is printed
> after that, netconsole will push it through the transmit path,
> resulting in a deadlock.
> 
> This patch fixes the re-injection problem by queuing the console
> messages in a preallocated circular buffer and then scheduling a
> workqueue to send them later with another context.
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>

You absolutely and positively MUST NOT do this.  Otherwise netconsole
becomes completely useless.  Your idea has been proposed several times
as far back as 6 years ago, it was unacceptable then and it's
unacceptable now.

The whole point of netconsole is that we may be deep in an interrupt
or other atomic context, the machine is about to hard hang, and it's
absolutely essential that we get out any and all kernel logging
messages that we can, immediately.

There may not be another timer or workqueue able to execute after the
printk() we're trying to emit.  We may never get to that point.

So if we defer messages, that means we won't get the message and we
won't be able to debug the problem.

Fix the locking in the drivers or layers that cause the issue instead
of breaking netconsole.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-07 23:50                   ` David Miller
  0 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-06-07 23:50 UTC (permalink / raw)
  To: fleitner
  Cc: bridge, amwang, nhorman, netdev, gospo, fubar, linux-kernel, fbl,
	jmoyer, mpm, bonding-devel

From: Flavio Leitner <fleitner@redhat.com>
Date: Mon,  7 Jun 2010 16:24:52 -0300

> There are some networking drivers that hold a lock in the
> transmit path. Therefore, if a console message is printed
> after that, netconsole will push it through the transmit path,
> resulting in a deadlock.
> 
> This patch fixes the re-injection problem by queuing the console
> messages in a preallocated circular buffer and then scheduling a
> workqueue to send them later with another context.
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>

You absolutely and positively MUST NOT do this.  Otherwise netconsole
becomes completely useless.  Your idea has been proposed several times
as far back as 6 years ago, it was unacceptable then and it's
unacceptable now.

The whole point of netconsole is that we may be deep in an interrupt
or other atomic context, the machine is about to hard hang, and it's
absolutely essential that we get out any and all kernel logging
messages that we can, immediately.

There may not be another timer or workqueue able to execute after the
printk() we're trying to emit.  We may never get to that point.

So if we defer messages, that means we won't get the message and we
won't be able to debug the problem.

Fix the locking in the drivers or layers that cause the issue instead
of breaking netconsole.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] netconsole: queue console messages to send later
  2010-06-07 20:21                       ` [Bridge] " Matt Mackall
@ 2010-06-07 23:52                         ` David Miller
  -1 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-06-07 23:52 UTC (permalink / raw)
  To: mpm
  Cc: shemminger, fleitner, netdev, amwang, fubar, fbl, gospo, nhorman,
	jmoyer, linux-kernel, bridge, bonding-devel

From: Matt Mackall <mpm@selenic.com>
Date: Mon, 07 Jun 2010 15:21:31 -0500

> Open to suggestions. The locks in question are driver-internal. There
> also may not be any actual recursion taking place:
> 
> driver path a takes private lock x
> driver path a attempts printk
> printk calls into netconsole
> netconsole calls into driver path b
> driver path b attempts to take lock x -> deadlock
> 
> So we can't even try to walk back the stack looking for such nonsense.
> Though we could perhaps force queuing of all messages -from- the driver
> bound to netconsole. Tricky, and not quite foolproof.

Look, this is all nonsense talk.

This is only coming about because of the recent discussions about
bonding, so let's fix bonding's locking.  I've made concrete
suggestions on converting it's rwlocks over to spinlocks and RCU to
fix the specific problem bonding has.

Every time we hit some new locking issue the knee jerk reaction is
to do something stupid to the generic netconsole code instead of
fixing the real source of the problem.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-07 23:52                         ` David Miller
  0 siblings, 0 replies; 73+ messages in thread
From: David Miller @ 2010-06-07 23:52 UTC (permalink / raw)
  To: mpm
  Cc: bridge, fleitner, amwang, nhorman, netdev, fubar, linux-kernel,
	fbl, jmoyer, gospo, bonding-devel

From: Matt Mackall <mpm@selenic.com>
Date: Mon, 07 Jun 2010 15:21:31 -0500

> Open to suggestions. The locks in question are driver-internal. There
> also may not be any actual recursion taking place:
> 
> driver path a takes private lock x
> driver path a attempts printk
> printk calls into netconsole
> netconsole calls into driver path b
> driver path b attempts to take lock x -> deadlock
> 
> So we can't even try to walk back the stack looking for such nonsense.
> Though we could perhaps force queuing of all messages -from- the driver
> bound to netconsole. Tricky, and not quite foolproof.

Look, this is all nonsense talk.

This is only coming about because of the recent discussions about
bonding, so let's fix bonding's locking.  I've made concrete
suggestions on converting it's rwlocks over to spinlocks and RCU to
fix the specific problem bonding has.

Every time we hit some new locking issue the knee jerk reaction is
to do something stupid to the generic netconsole code instead of
fixing the real source of the problem.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] netconsole: queue console messages to send later
  2010-06-07 23:50                   ` [Bridge] " David Miller
@ 2010-06-08  0:37                     ` Flavio Leitner
  -1 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-06-08  0:37 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, amwang, fubar, mpm, gospo, nhorman, jmoyer, shemminger,
	linux-kernel, bridge, bonding-devel

On Mon, Jun 07, 2010 at 04:50:24PM -0700, David Miller wrote:
> From: Flavio Leitner <fleitner@redhat.com>
> Date: Mon,  7 Jun 2010 16:24:52 -0300
> 
> > There are some networking drivers that hold a lock in the
> > transmit path. Therefore, if a console message is printed
> > after that, netconsole will push it through the transmit path,
> > resulting in a deadlock.
> > 
> > This patch fixes the re-injection problem by queuing the console
> > messages in a preallocated circular buffer and then scheduling a
> > workqueue to send them later with another context.
> > 
> > Signed-off-by: Flavio Leitner <fleitner@redhat.com>
> 
> You absolutely and positively MUST NOT do this.  Otherwise netconsole
> becomes completely useless.  Your idea has been proposed several times
> as far back as 6 years ago, it was unacceptable then and it's
> unacceptable now.
> 
> The whole point of netconsole is that we may be deep in an interrupt
> or other atomic context, the machine is about to hard hang, and it's
> absolutely essential that we get out any and all kernel logging
> messages that we can, immediately.

Got it. I've never assumed that netconsole would work reliable on 
such situations, so I thought as we have better ways now it would
be helpful. See another idea below.

> There may not be another timer or workqueue able to execute after the
> printk() we're trying to emit.  We may never get to that point.

What if in the netpoll, before we push the skb to the driver, we check
for a bit saying that it's already pushing another skb. In this case,
queue the new skb inside of netpoll and soon as the first call returns
and try to clear the bit, it will send the next skb?

printk("message 1")
...
netconsole called
netpoll sets the flag bit
pushes to the bonding driver which does another printk("message 2")
netconsole called again
netpoll checks for the flag, queue the message, returns.
so, bonding can finish up to send the first message
netpoll is about to return, checks for new queued messages, and pushes them.
bonding finishes up to send the second message
....

No deadlocks, skbs are ordered and still under the same opportunity
to send something. Does it sound acceptable?
It's off the top of my head, so probably this idea has some problems.


> Fix the locking in the drivers or layers that cause the issue instead
> of breaking netconsole.

Someday, somewhere, I know because I did this before, someone will
use a debugging printk() and will see the entire box hanging with
absolutely no message in any console because of this problem. 
I'm not saying that fixing driver isn't the right way to go but
it seems not enough to me.

-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-08  0:37                     ` Flavio Leitner
  0 siblings, 0 replies; 73+ messages in thread
From: Flavio Leitner @ 2010-06-08  0:37 UTC (permalink / raw)
  To: David Miller
  Cc: bridge, amwang, nhorman, netdev, mpm, fubar, linux-kernel,
	jmoyer, gospo, bonding-devel

On Mon, Jun 07, 2010 at 04:50:24PM -0700, David Miller wrote:
> From: Flavio Leitner <fleitner@redhat.com>
> Date: Mon,  7 Jun 2010 16:24:52 -0300
> 
> > There are some networking drivers that hold a lock in the
> > transmit path. Therefore, if a console message is printed
> > after that, netconsole will push it through the transmit path,
> > resulting in a deadlock.
> > 
> > This patch fixes the re-injection problem by queuing the console
> > messages in a preallocated circular buffer and then scheduling a
> > workqueue to send them later with another context.
> > 
> > Signed-off-by: Flavio Leitner <fleitner@redhat.com>
> 
> You absolutely and positively MUST NOT do this.  Otherwise netconsole
> becomes completely useless.  Your idea has been proposed several times
> as far back as 6 years ago, it was unacceptable then and it's
> unacceptable now.
> 
> The whole point of netconsole is that we may be deep in an interrupt
> or other atomic context, the machine is about to hard hang, and it's
> absolutely essential that we get out any and all kernel logging
> messages that we can, immediately.

Got it. I've never assumed that netconsole would work reliable on 
such situations, so I thought as we have better ways now it would
be helpful. See another idea below.

> There may not be another timer or workqueue able to execute after the
> printk() we're trying to emit.  We may never get to that point.

What if in the netpoll, before we push the skb to the driver, we check
for a bit saying that it's already pushing another skb. In this case,
queue the new skb inside of netpoll and soon as the first call returns
and try to clear the bit, it will send the next skb?

printk("message 1")
...
netconsole called
netpoll sets the flag bit
pushes to the bonding driver which does another printk("message 2")
netconsole called again
netpoll checks for the flag, queue the message, returns.
so, bonding can finish up to send the first message
netpoll is about to return, checks for new queued messages, and pushes them.
bonding finishes up to send the second message
....

No deadlocks, skbs are ordered and still under the same opportunity
to send something. Does it sound acceptable?
It's off the top of my head, so probably this idea has some problems.


> Fix the locking in the drivers or layers that cause the issue instead
> of breaking netconsole.

Someday, somewhere, I know because I did this before, someone will
use a debugging printk() and will see the entire box hanging with
absolutely no message in any console because of this problem. 
I'm not saying that fixing driver isn't the right way to go but
it seems not enough to me.

-- 
Flavio

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-07 10:01                       ` [Bridge] " David Miller
@ 2010-06-08  8:36                         ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-08  8:36 UTC (permalink / raw)
  To: David Miller
  Cc: andy, fubar, fbl, linux-kernel, mpm, netdev, bridge, gospo,
	nhorman, jmoyer, shemminger, bonding-devel

On 06/07/10 18:01, David Miller wrote:
> From: Cong Wang<amwang@redhat.com>
> Date: Mon, 07 Jun 2010 17:57:49 +0800
>
>> Hmm, I still feel like this way is ugly, although it may work.
>> I guess David doesn't like it either.
>
> Of course I don't like it. :-)
>
> I suspect the locking scheme will need to be changed.
>
> Besides, if we're going to hack this up and do write lock attempts in
> the read locking paths, there is no point in using a rwlock any more.
> And I'm personally in disfavor of all rwlock usage anyways (it dirties
> the cacheline for readers just as equally for writers, and if the
> critically protected code path is short enough, that shared cache
> line atomic operation will be the predominant cost).
>
> So I'd say, 1) make this a spinlock and 2) try to use RCU for the
> read path.
>
> That would fix everything.

Yeah, agreed. Even not talking about netconsole, bonding code
does have locking problems, netconsole just makes this problem
clear.

I will try your suggestions above.

Thanks!

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-08  8:36                         ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-08  8:36 UTC (permalink / raw)
  To: David Miller
  Cc: bridge, nhorman, netdev, gospo, fubar, linux-kernel, fbl, jmoyer,
	mpm, bonding-devel

On 06/07/10 18:01, David Miller wrote:
> From: Cong Wang<amwang@redhat.com>
> Date: Mon, 07 Jun 2010 17:57:49 +0800
>
>> Hmm, I still feel like this way is ugly, although it may work.
>> I guess David doesn't like it either.
>
> Of course I don't like it. :-)
>
> I suspect the locking scheme will need to be changed.
>
> Besides, if we're going to hack this up and do write lock attempts in
> the read locking paths, there is no point in using a rwlock any more.
> And I'm personally in disfavor of all rwlock usage anyways (it dirties
> the cacheline for readers just as equally for writers, and if the
> critically protected code path is short enough, that shared cache
> line atomic operation will be the predominant cost).
>
> So I'd say, 1) make this a spinlock and 2) try to use RCU for the
> read path.
>
> That would fix everything.

Yeah, agreed. Even not talking about netconsole, bonding code
does have locking problems, netconsole just makes this problem
clear.

I will try your suggestions above.

Thanks!

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
  2010-06-07 13:03                       ` [Bridge] " Andy Gospodarek
@ 2010-06-08  8:38                         ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-08  8:38 UTC (permalink / raw)
  To: Andy Gospodarek
  Cc: Jay Vosburgh, Flavio Leitner, linux-kernel, Matt Mackall, netdev,
	bridge, Andy Gospodarek, Neil Horman, Jeff Moyer,
	Stephen Hemminger, bonding-devel, David Miller

On 06/07/10 21:03, Andy Gospodarek wrote:
> On Mon, Jun 07, 2010 at 05:57:49PM +0800, Cong Wang wrote:
>> On 06/05/10 03:18, Andy Gospodarek wrote:
>>> On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
>>>> On 06/02/10 02:42, Jay Vosburgh wrote:
>>>>> Cong Wang<amwang@redhat.com>    wrote:
>>>>>
>>>>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>>>>> Hi, Flavio,
>>>>>>>>
>>>>>>>> Please use the attached patch instead, try to see if it solves
>>>>>>>> all your problems.
>>>>>>>
>>>>>>> I tried and it hangs. No backtraces this time.
>>>>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>>>>> notification, so I think it won't work.
>>>>>>
>>>>>> Ah, I thought the same.
>>>>>>
>>>>>>>
>>>>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>>>>> patch applied, the netconsole would be disabled forever even with
>>>>>>> another healthy slave, right?
>>>>>>>
>>>>>>
>>>>>> Yes, this is an easy solution, because bonding has several modes,
>>>>>> it is complex to make netpoll work in different modes.
>>>>>
>>>>> 	If I understand correctly, the root cause of the problem with
>>>>> netconsole and bonding is that bonding is, ultimately, performing
>>>>> printks with a write lock held, and when netconsole recursively calls
>>>>> into bonding to send the printk over the netconsole, there is a deadlock
>>>>> (when the bonding xmit function attempts to acquire the same lock for
>>>>> read).
>>>>
>>>>
>>>> Yes.
>>>>
>>>>>
>>>>> 	You're trying to avoid the deadlock by shutting off netconsole
>>>>> (permanently, it looks like) for one problem case: a failover, which
>>>>> does some printks with a write lock held.
>>>>>
>>>>> 	This doesn't look to me like a complete solution, there are
>>>>> other cases in bonding that will do printk with write locks held.  I
>>>>> suspect those will also hang netconsole as things exist today, and won't
>>>>> be affected by your patch below.
>>>>
>>>>
>>>> I can expect that, bonding modes are complex.
>>>>
>>>>>
>>>>> 	For example:
>>>>>
>>>>> 	The sysfs functions to set the primary (bonding_store_primary)
>>>>> or active (bonding_store_active_slave) options: a pr_info is called to
>>>>> provide a log message of the results.  These could be tested by setting
>>>>> the primary or active options via sysfs, e.g.,
>>>>>
>>>>> echo eth0>    /sys/class/net/bond0/bonding/primary
>>>>> echo eth0>    /sys/class/net/bond0/bonding/active
>>>>>
>>>>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>>>>> calls within write_locks (bond_del_vlan, for example).
>>>>>
>>>>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>>>>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>>>>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>>>>> deadlock (because bonding holds its write_lock when calling the ndo_
>>>>> vlan functions).
>>>>>
>>>>> 	It also appears that (with the patch below) some nominally
>>>>> normal usage patterns will immediately disable netconsole.  The one that
>>>>> comes to mind is if the primary= option is set (to "eth1" for this
>>>>> example), but that slave not enslaved first (the slaves are added, say,
>>>>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>>>>> is added, the first thing that will happen is a failover, and that will
>>>>> disable netconsole.
>>>>>
>>>>
>>>> Thanks for your detailed explanation!
>>>>
>>>> This is why I said bonding is complex. I guess we would have to adjust
>>>> netpoll code for different bonding cases, one solution seems not fix all.
>>>> I am not sure how much work to do, since I am not familiar with bonding
>>>> code. Maybe Andy can help?
>>>>
>>>
>>> Sorry I've been silent until now.  This does seem quite similar to a
>>> problem I've previously encountered when dealing with bonding+netpoll on
>>> some old 2.6.9-based kernels.  There is no guarantee the methods used
>>> there will apply here, but I'll talk about them anyway.
>>>
>>> As Flavio noticed, recursive calls into the bond transmit routines were
>>> not a good idea.  I discovered the same and worked around this issue by
>>> checking to see if we could take the bond->lock for writing before
>>> continuing.  If we could not get, I wanted to signal that this should be
>>> queued for transmission later.  Based on the flow of netpoll_send_skb
>>> (or possibly for another reason that is escaping me right now) I added
>>> one of these checks in bond_poll_controller too.  These aren't the
>>> prettiest fixes, but seemed to work well for me when I did this work in
>>> the past.  I realize the differences are not that great compared to some
>>> of the patches posted by Flavio, but I think they are worth trying.
>>
>>
>> Hmm, I still feel like this way is ugly, although it may work.
>> I guess David doesn't like it either.
>>
>
> Notice how I referred to it as a work-around? :)
>
> It certainly isn't a great way to resolve the issue, but I wanted to
> offer my opinon on the issue since you asked.

Sorry for my misunderstanding.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
@ 2010-06-08  8:38                         ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-08  8:38 UTC (permalink / raw)
  To: Andy Gospodarek
  Cc: Jay Vosburgh, Neil Horman, netdev, Andy Gospodarek, bridge,
	linux-kernel, David Miller, Flavio Leitner, Jeff Moyer,
	Matt Mackall, bonding-devel

On 06/07/10 21:03, Andy Gospodarek wrote:
> On Mon, Jun 07, 2010 at 05:57:49PM +0800, Cong Wang wrote:
>> On 06/05/10 03:18, Andy Gospodarek wrote:
>>> On Wed, Jun 02, 2010 at 06:04:45PM +0800, Cong Wang wrote:
>>>> On 06/02/10 02:42, Jay Vosburgh wrote:
>>>>> Cong Wang<amwang@redhat.com>    wrote:
>>>>>
>>>>>> On 06/01/10 03:08, Flavio Leitner wrote:
>>>>>>> On Mon, May 31, 2010 at 01:56:52PM +0800, Cong Wang wrote:
>>>>>>>> Hi, Flavio,
>>>>>>>>
>>>>>>>> Please use the attached patch instead, try to see if it solves
>>>>>>>> all your problems.
>>>>>>>
>>>>>>> I tried and it hangs. No backtraces this time.
>>>>>>> The bond_change_active_slave() prints before NETDEV_BONDING_FAILOVER
>>>>>>> notification, so I think it won't work.
>>>>>>
>>>>>> Ah, I thought the same.
>>>>>>
>>>>>>>
>>>>>>> Please, correct if I'm wrong, but when a failover happens with your
>>>>>>> patch applied, the netconsole would be disabled forever even with
>>>>>>> another healthy slave, right?
>>>>>>>
>>>>>>
>>>>>> Yes, this is an easy solution, because bonding has several modes,
>>>>>> it is complex to make netpoll work in different modes.
>>>>>
>>>>> 	If I understand correctly, the root cause of the problem with
>>>>> netconsole and bonding is that bonding is, ultimately, performing
>>>>> printks with a write lock held, and when netconsole recursively calls
>>>>> into bonding to send the printk over the netconsole, there is a deadlock
>>>>> (when the bonding xmit function attempts to acquire the same lock for
>>>>> read).
>>>>
>>>>
>>>> Yes.
>>>>
>>>>>
>>>>> 	You're trying to avoid the deadlock by shutting off netconsole
>>>>> (permanently, it looks like) for one problem case: a failover, which
>>>>> does some printks with a write lock held.
>>>>>
>>>>> 	This doesn't look to me like a complete solution, there are
>>>>> other cases in bonding that will do printk with write locks held.  I
>>>>> suspect those will also hang netconsole as things exist today, and won't
>>>>> be affected by your patch below.
>>>>
>>>>
>>>> I can expect that, bonding modes are complex.
>>>>
>>>>>
>>>>> 	For example:
>>>>>
>>>>> 	The sysfs functions to set the primary (bonding_store_primary)
>>>>> or active (bonding_store_active_slave) options: a pr_info is called to
>>>>> provide a log message of the results.  These could be tested by setting
>>>>> the primary or active options via sysfs, e.g.,
>>>>>
>>>>> echo eth0>    /sys/class/net/bond0/bonding/primary
>>>>> echo eth0>    /sys/class/net/bond0/bonding/active
>>>>>
>>>>> 	If the kernel is defined with DEBUG, there are a few pr_debug
>>>>> calls within write_locks (bond_del_vlan, for example).
>>>>>
>>>>> 	If the slave's underlying device driver's ndo_vlan_rx_register
>>>>> or ndo_vlan_rx_kill_vid functions call printk (and it looks like some do
>>>>> for error cases, e.g., igbvf, ehea, enic), those would also presumably
>>>>> deadlock (because bonding holds its write_lock when calling the ndo_
>>>>> vlan functions).
>>>>>
>>>>> 	It also appears that (with the patch below) some nominally
>>>>> normal usage patterns will immediately disable netconsole.  The one that
>>>>> comes to mind is if the primary= option is set (to "eth1" for this
>>>>> example), but that slave not enslaved first (the slaves are added, say,
>>>>> eth0 then eth1).  In that situation, when the primary slave (eth1 here)
>>>>> is added, the first thing that will happen is a failover, and that will
>>>>> disable netconsole.
>>>>>
>>>>
>>>> Thanks for your detailed explanation!
>>>>
>>>> This is why I said bonding is complex. I guess we would have to adjust
>>>> netpoll code for different bonding cases, one solution seems not fix all.
>>>> I am not sure how much work to do, since I am not familiar with bonding
>>>> code. Maybe Andy can help?
>>>>
>>>
>>> Sorry I've been silent until now.  This does seem quite similar to a
>>> problem I've previously encountered when dealing with bonding+netpoll on
>>> some old 2.6.9-based kernels.  There is no guarantee the methods used
>>> there will apply here, but I'll talk about them anyway.
>>>
>>> As Flavio noticed, recursive calls into the bond transmit routines were
>>> not a good idea.  I discovered the same and worked around this issue by
>>> checking to see if we could take the bond->lock for writing before
>>> continuing.  If we could not get, I wanted to signal that this should be
>>> queued for transmission later.  Based on the flow of netpoll_send_skb
>>> (or possibly for another reason that is escaping me right now) I added
>>> one of these checks in bond_poll_controller too.  These aren't the
>>> prettiest fixes, but seemed to work well for me when I did this work in
>>> the past.  I realize the differences are not that great compared to some
>>> of the patches posted by Flavio, but I think they are worth trying.
>>
>>
>> Hmm, I still feel like this way is ugly, although it may work.
>> I guess David doesn't like it either.
>>
>
> Notice how I referred to it as a work-around? :)
>
> It certainly isn't a great way to resolve the issue, but I wanted to
> offer my opinon on the issue since you asked.

Sorry for my misunderstanding.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] netconsole: queue console messages to send later
  2010-06-08  0:37                     ` [Bridge] " Flavio Leitner
@ 2010-06-08  8:59                       ` Cong Wang
  -1 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-08  8:59 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: David Miller, netdev, fubar, mpm, gospo, nhorman, jmoyer,
	shemminger, linux-kernel, bridge, bonding-devel


Thanks for your fix, Flavio!

On 06/08/10 08:37, Flavio Leitner wrote:
>> There may not be another timer or workqueue able to execute after the
>> printk() we're trying to emit.  We may never get to that point.
>
> What if in the netpoll, before we push the skb to the driver, we check
> for a bit saying that it's already pushing another skb. In this case,
> queue the new skb inside of netpoll and soon as the first call returns
> and try to clear the bit, it will send the next skb?
>
> printk("message 1")
> ...
> netconsole called
> netpoll sets the flag bit
> pushes to the bonding driver which does another printk("message 2")
> netconsole called again
> netpoll checks for the flag, queue the message, returns.
> so, bonding can finish up to send the first message
> netpoll is about to return, checks for new queued messages, and pushes them.
> bonding finishes up to send the second message
> ....
>
> No deadlocks, skbs are ordered and still under the same opportunity
> to send something. Does it sound acceptable?
> It's off the top of my head, so probably this idea has some problems.
>


I am not a net expert, I am not sure if this solution really addresses
David's concern, but it makes sense for me.

>
>> Fix the locking in the drivers or layers that cause the issue instead
>> of breaking netconsole.
>
> Someday, somewhere, I know because I did this before, someone will
> use a debugging printk() and will see the entire box hanging with
> absolutely no message in any console because of this problem.
> I'm not saying that fixing driver isn't the right way to go but
> it seems not enough to me.

Well, I think netconsole is not alone, other console drivers could
have the same problem, printk() is not always available in some
situation like this.

Thanks.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Bridge] [PATCH] netconsole: queue console messages to send later
@ 2010-06-08  8:59                       ` Cong Wang
  0 siblings, 0 replies; 73+ messages in thread
From: Cong Wang @ 2010-06-08  8:59 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: bridge, nhorman, netdev, mpm, fubar, linux-kernel, bonding-devel,
	jmoyer, gospo, David Miller


Thanks for your fix, Flavio!

On 06/08/10 08:37, Flavio Leitner wrote:
>> There may not be another timer or workqueue able to execute after the
>> printk() we're trying to emit.  We may never get to that point.
>
> What if in the netpoll, before we push the skb to the driver, we check
> for a bit saying that it's already pushing another skb. In this case,
> queue the new skb inside of netpoll and soon as the first call returns
> and try to clear the bit, it will send the next skb?
>
> printk("message 1")
> ...
> netconsole called
> netpoll sets the flag bit
> pushes to the bonding driver which does another printk("message 2")
> netconsole called again
> netpoll checks for the flag, queue the message, returns.
> so, bonding can finish up to send the first message
> netpoll is about to return, checks for new queued messages, and pushes them.
> bonding finishes up to send the second message
> ....
>
> No deadlocks, skbs are ordered and still under the same opportunity
> to send something. Does it sound acceptable?
> It's off the top of my head, so probably this idea has some problems.
>


I am not a net expert, I am not sure if this solution really addresses
David's concern, but it makes sense for me.

>
>> Fix the locking in the drivers or layers that cause the issue instead
>> of breaking netconsole.
>
> Someday, somewhere, I know because I did this before, someone will
> use a debugging printk() and will see the entire box hanging with
> absolutely no message in any console because of this problem.
> I'm not saying that fixing driver isn't the right way to go but
> it seems not enough to me.

Well, I think netconsole is not alone, other console drivers could
have the same problem, printk() is not always available in some
situation like this.

Thanks.

^ permalink raw reply	[flat|nested] 73+ messages in thread

end of thread, other threads:[~2010-06-08  8:59 UTC | newest]

Thread overview: 73+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-05  8:11 [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices Amerigo Wang
2010-05-05  8:11 ` [Bridge] " Amerigo Wang
2010-05-05  8:11 ` Amerigo Wang
2010-05-05  8:11 ` [v5 Patch 2/3] bridge: make bridge support netpoll Amerigo Wang
2010-05-05  8:11   ` [Bridge] " Amerigo Wang
2010-05-05  8:11   ` Amerigo Wang
2010-05-05  8:11 ` [v5 Patch 3/3] bonding: make bonding " Amerigo Wang
2010-05-05  8:11   ` [Bridge] " Amerigo Wang
2010-05-05  8:11   ` Amerigo Wang
2010-05-06  2:05 ` [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices Matt Mackall
2010-05-06  2:05   ` [Bridge] " Matt Mackall
2010-05-06  7:44   ` David Miller
2010-05-06  7:44     ` [Bridge] " David Miller
2010-05-07  3:24     ` Cong Wang
2010-05-07  3:24       ` [Bridge] " Cong Wang
2010-05-27 18:05 ` Flavio Leitner
2010-05-27 18:05   ` [Bridge] " Flavio Leitner
2010-05-27 20:35   ` David Miller
2010-05-27 20:35     ` [Bridge] " David Miller
2010-05-27 21:25     ` Flavio Leitner
2010-05-27 21:25       ` [Bridge] " Flavio Leitner
2010-05-28  2:47   ` Cong Wang
2010-05-28  2:47     ` [Bridge] " Cong Wang
2010-05-28 19:40     ` Flavio Leitner
2010-05-28 19:40       ` [Bridge] " Flavio Leitner
2010-05-31  5:56       ` Cong Wang
2010-05-31  5:56         ` [Bridge] " Cong Wang
2010-05-31 19:08         ` Flavio Leitner
2010-05-31 19:08           ` [Bridge] " Flavio Leitner
2010-06-01  9:57           ` Cong Wang
2010-06-01  9:57             ` [Bridge] " Cong Wang
2010-06-01 18:42             ` Jay Vosburgh
2010-06-01 18:42               ` [Bridge] " Jay Vosburgh
2010-06-02 10:04               ` Cong Wang
2010-06-02 10:04                 ` [Bridge] " Cong Wang
2010-06-04 19:18                 ` Andy Gospodarek
2010-06-04 19:18                   ` [Bridge] " Andy Gospodarek
2010-06-07  9:57                   ` Cong Wang
2010-06-07  9:57                     ` [Bridge] " Cong Wang
2010-06-07 10:01                     ` David Miller
2010-06-07 10:01                       ` [Bridge] " David Miller
2010-06-08  8:36                       ` Cong Wang
2010-06-08  8:36                         ` [Bridge] " Cong Wang
2010-06-07 13:03                     ` Andy Gospodarek
2010-06-07 13:03                       ` [Bridge] " Andy Gospodarek
2010-06-08  8:38                       ` Cong Wang
2010-06-08  8:38                         ` [Bridge] " Cong Wang
2010-06-07 19:24               ` [PATCH] netconsole: queue console messages to send later Flavio Leitner
2010-06-07 19:24                 ` [Bridge] " Flavio Leitner
2010-06-07 19:50                 ` Matt Mackall
2010-06-07 19:50                   ` [Bridge] " Matt Mackall
2010-06-07 20:00                   ` Stephen Hemminger
2010-06-07 20:00                     ` [Bridge] " Stephen Hemminger
2010-06-07 20:21                     ` Matt Mackall
2010-06-07 20:21                       ` [Bridge] " Matt Mackall
2010-06-07 23:52                       ` David Miller
2010-06-07 23:52                         ` [Bridge] " David Miller
2010-06-07 23:50                 ` David Miller
2010-06-07 23:50                   ` [Bridge] " David Miller
2010-06-08  0:37                   ` Flavio Leitner
2010-06-08  0:37                     ` [Bridge] " Flavio Leitner
2010-06-08  8:59                     ` Cong Wang
2010-06-08  8:59                       ` [Bridge] " Cong Wang
2010-05-28  8:16   ` [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices Cong Wang
2010-05-28  8:16     ` [Bridge] " Cong Wang
2010-05-28 20:42     ` Flavio Leitner
2010-05-28 20:42       ` [Bridge] " Flavio Leitner
2010-05-28 21:03       ` Jay Vosburgh
2010-05-28 21:03         ` [Bridge] " Jay Vosburgh
2010-05-31  5:29         ` Cong Wang
2010-05-31  5:29           ` [Bridge] " Cong Wang
2010-05-31  5:37           ` Cong Wang
2010-05-31  5:37             ` [Bridge] " Cong Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.