netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] bond_alb: don't disable softirq under bond_alb_xmit
@ 2012-01-06 20:23 Maxim Uvarov
  2012-01-06 21:33 ` Jay Vosburgh
  0 siblings, 1 reply; 7+ messages in thread
From: Maxim Uvarov @ 2012-01-06 20:23 UTC (permalink / raw)
  To: netdev; +Cc: fubar, andy, davem, Maxim Uvarov, Cong Wang

No need to lock soft irqs under bond_alb_xmit()
which already has softirq disabled.

Signed-off-by: Maxim Uvarov <maxim.uvarov@oracle.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 drivers/net/bonding/bond_alb.c |   40 +++++++++++++++++++++++++++-------------
 1 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 106b88a..42d4286 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -135,7 +135,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
 	struct tlb_client_info *tx_hash_table;
 	u32 index;
 
-	_lock_tx_hashtbl(bond);
+	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 
 	/* clear slave from tx_hashtbl */
 	tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
@@ -152,7 +152,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
 
 	tlb_init_slave(slave);
 
-	_unlock_tx_hashtbl(bond);
+	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 }
 
 /* Must be called before starting the monitor timer */
@@ -226,15 +226,13 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
 	return least_loaded;
 }
 
-/* Caller must hold bond lock for read */
-static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
+static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
+						u32 skb_len)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct tlb_client_info *hash_table;
 	struct slave *assigned_slave;
 
-	_lock_tx_hashtbl(bond);
-
 	hash_table = bond_info->tx_hashtbl;
 	assigned_slave = hash_table[hash_index].tx_slave;
 	if (!assigned_slave) {
@@ -263,11 +261,27 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
 		hash_table[hash_index].tx_bytes += skb_len;
 	}
 
-	_unlock_tx_hashtbl(bond);
-
 	return assigned_slave;
 }
 
+/* Caller must hold bond lock for read */
+static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
+					u32 skb_len)
+{
+	struct slave *tx_slave;
+	/*
+	 * We don't need to disable softirq here, becase
+	 * tlb_choose_channel() is only called by bond_alb_xmit()
+	 * which already has softirq disabled.
+	 */
+	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
+	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+	return tx_slave;
+}
+
+
+
 /*********************** rlb specific functions ***************************/
 static inline void _lock_rx_hashtbl(struct bonding *bond)
 {
@@ -548,7 +562,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
 	struct rlb_client_info *client_info;
 	u32 hash_index;
 
-	_lock_rx_hashtbl(bond);
+	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 
 	hash_index = bond_info->rx_hashtbl_head;
 	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -572,7 +586,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
 		}
 	}
 
-	_unlock_rx_hashtbl(bond);
+	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 }
 
 /* Caller must hold both bond and ptr locks for read */
@@ -584,7 +598,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 	struct rlb_client_info *client_info;
 	u32 hash_index = 0;
 
-	_lock_rx_hashtbl(bond);
+	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 
 	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
 	client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -600,7 +614,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 
 			assigned_slave = client_info->slave;
 			if (assigned_slave) {
-				_unlock_rx_hashtbl(bond);
+				spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 				return assigned_slave;
 			}
 		} else {
@@ -652,7 +666,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 		}
 	}
 
-	_unlock_rx_hashtbl(bond);
+	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 
 	return assigned_slave;
 }
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] bond_alb: don't disable softirq under bond_alb_xmit
  2012-01-06 20:23 [PATCH] bond_alb: don't disable softirq under bond_alb_xmit Maxim Uvarov
@ 2012-01-06 21:33 ` Jay Vosburgh
  2012-01-07 18:14   ` David Miller
  0 siblings, 1 reply; 7+ messages in thread
From: Jay Vosburgh @ 2012-01-06 21:33 UTC (permalink / raw)
  To: Maxim Uvarov; +Cc: netdev, andy, davem, Cong Wang

Maxim Uvarov <maxim.uvarov@oracle.com> wrote:

>No need to lock soft irqs under bond_alb_xmit()
>which already has softirq disabled.

	In commit:

commit 6603a6f25e4bca922a7dfbf0bf03072d98850176
Author: Jay Vosburgh <fubar@us.ibm.com>
Date:   Wed Oct 17 17:37:50 2007 -0700

    bonding: Convert more locks to _bh, acquire rtnl, for new locking
    
        Convert more lock acquisitions to _bh flavor to avoid deadlock
    with workqueue activity and add acquisition of RTNL in appropriate places.
    Affects ALB mode, as well as core bonding functions and sysfs.
    
    Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
    Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

	the _lock_tx_hashtbl was upgraded from regular to _bh to prevent
deadlocks.  I don't recall right offhand what deadlock this prevented,
but are we sure there are no possible issues with converting this lock
back to a non-_bh acquisition?  A lot has changed since then, so I'm
willing to believe it's no longer an issue, but I think a bit of
research is warranted.

	Also, unlike my log message from the above commit, it would be
useful for future reference to describe the actual problem that this is
fixing.

	-J

>Signed-off-by: Maxim Uvarov <maxim.uvarov@oracle.com>
>Signed-off-by: Cong Wang <amwang@redhat.com>
>---
> drivers/net/bonding/bond_alb.c |   40 +++++++++++++++++++++++++++-------------
> 1 files changed, 27 insertions(+), 13 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index 106b88a..42d4286 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -135,7 +135,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
> 	struct tlb_client_info *tx_hash_table;
> 	u32 index;
>
>-	_lock_tx_hashtbl(bond);
>+	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
>
> 	/* clear slave from tx_hashtbl */
> 	tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
>@@ -152,7 +152,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
>
> 	tlb_init_slave(slave);
>
>-	_unlock_tx_hashtbl(bond);
>+	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> }
>
> /* Must be called before starting the monitor timer */
>@@ -226,15 +226,13 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
> 	return least_loaded;
> }
>
>-/* Caller must hold bond lock for read */
>-static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
>+static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
>+						u32 skb_len)
> {
> 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> 	struct tlb_client_info *hash_table;
> 	struct slave *assigned_slave;
>
>-	_lock_tx_hashtbl(bond);
>-
> 	hash_table = bond_info->tx_hashtbl;
> 	assigned_slave = hash_table[hash_index].tx_slave;
> 	if (!assigned_slave) {
>@@ -263,11 +261,27 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> 		hash_table[hash_index].tx_bytes += skb_len;
> 	}
>
>-	_unlock_tx_hashtbl(bond);
>-
> 	return assigned_slave;
> }
>
>+/* Caller must hold bond lock for read */
>+static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
>+					u32 skb_len)
>+{
>+	struct slave *tx_slave;
>+	/*
>+	 * We don't need to disable softirq here, becase
>+	 * tlb_choose_channel() is only called by bond_alb_xmit()
>+	 * which already has softirq disabled.
>+	 */
>+	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
>+	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
>+	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
>+	return tx_slave;
>+}
>+
>+
>+
> /*********************** rlb specific functions ***************************/
> static inline void _lock_rx_hashtbl(struct bonding *bond)
> {
>@@ -548,7 +562,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> 	struct rlb_client_info *client_info;
> 	u32 hash_index;
>
>-	_lock_rx_hashtbl(bond);
>+	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
>
> 	hash_index = bond_info->rx_hashtbl_head;
> 	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
>@@ -572,7 +586,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> 		}
> 	}
>
>-	_unlock_rx_hashtbl(bond);
>+	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> }
>
> /* Caller must hold both bond and ptr locks for read */
>@@ -584,7 +598,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> 	struct rlb_client_info *client_info;
> 	u32 hash_index = 0;
>
>-	_lock_rx_hashtbl(bond);
>+	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
>
> 	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
> 	client_info = &(bond_info->rx_hashtbl[hash_index]);
>@@ -600,7 +614,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
>
> 			assigned_slave = client_info->slave;
> 			if (assigned_slave) {
>-				_unlock_rx_hashtbl(bond);
>+				spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> 				return assigned_slave;
> 			}
> 		} else {
>@@ -652,7 +666,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> 		}
> 	}
>
>-	_unlock_rx_hashtbl(bond);
>+	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
>
> 	return assigned_slave;
> }
>-- 
>1.7.4.1
>

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bond_alb: don't disable softirq under bond_alb_xmit
  2012-01-06 21:33 ` Jay Vosburgh
@ 2012-01-07 18:14   ` David Miller
  2012-01-09 18:59     ` Maxim Uvarov
  0 siblings, 1 reply; 7+ messages in thread
From: David Miller @ 2012-01-07 18:14 UTC (permalink / raw)
  To: fubar; +Cc: maxim.uvarov, netdev, andy, amwang

From: Jay Vosburgh <fubar@us.ibm.com>
Date: Fri, 06 Jan 2012 13:33:25 -0800

> Maxim Uvarov <maxim.uvarov@oracle.com> wrote:
> 
>>No need to lock soft irqs under bond_alb_xmit()
>>which already has softirq disabled.
> 
> 	In commit:
> 
> commit 6603a6f25e4bca922a7dfbf0bf03072d98850176
> Author: Jay Vosburgh <fubar@us.ibm.com>
> Date:   Wed Oct 17 17:37:50 2007 -0700
> 
>     bonding: Convert more locks to _bh, acquire rtnl, for new locking
>     
>         Convert more lock acquisitions to _bh flavor to avoid deadlock
>     with workqueue activity and add acquisition of RTNL in appropriate places.
>     Affects ALB mode, as well as core bonding functions and sysfs.
>     
>     Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
>     Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
>     Signed-off-by: Jeff Garzik <jeff@garzik.org>
> 
> 	the _lock_tx_hashtbl was upgraded from regular to _bh to prevent
> deadlocks.  I don't recall right offhand what deadlock this prevented,
> but are we sure there are no possible issues with converting this lock
> back to a non-_bh acquisition?

Maxim's patch is not changing the BH'ness of the list.


He's just avoiding a BH disable which is unnecessary because BH is
already disabled in the effected code path(s).

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bond_alb: don't disable softirq under bond_alb_xmit
  2012-01-07 18:14   ` David Miller
@ 2012-01-09 18:59     ` Maxim Uvarov
  2012-01-09 19:36       ` Jay Vosburgh
  0 siblings, 1 reply; 7+ messages in thread
From: Maxim Uvarov @ 2012-01-09 18:59 UTC (permalink / raw)
  To: David Miller; +Cc: fubar, netdev, andy, amwang

On 01/07/2012 10:14 AM, David Miller wrote:
> From: Jay Vosburgh<fubar@us.ibm.com>
> Date: Fri, 06 Jan 2012 13:33:25 -0800
>
>> Maxim Uvarov<maxim.uvarov@oracle.com>  wrote:
>>
>>> No need to lock soft irqs under bond_alb_xmit()
>>> which already has softirq disabled.
>>
>> 	In commit:
>>
>> commit 6603a6f25e4bca922a7dfbf0bf03072d98850176
>> Author: Jay Vosburgh<fubar@us.ibm.com>
>> Date:   Wed Oct 17 17:37:50 2007 -0700
>>
>>      bonding: Convert more locks to _bh, acquire rtnl, for new locking
>>
>>          Convert more lock acquisitions to _bh flavor to avoid deadlock
>>      with workqueue activity and add acquisition of RTNL in appropriate places.
>>      Affects ALB mode, as well as core bonding functions and sysfs.
>>
>>      Signed-off-by: Andy Gospodarek<andy@greyhouse.net>
>>      Signed-off-by: Jay Vosburgh<fubar@us.ibm.com>
>>      Signed-off-by: Jeff Garzik<jeff@garzik.org>
>>
>> 	the _lock_tx_hashtbl was upgraded from regular to _bh to prevent
>> deadlocks.  I don't recall right offhand what deadlock this prevented,
>> but are we sure there are no possible issues with converting this lock
>> back to a non-_bh acquisition?
>
> Maxim's patch is not changing the BH'ness of the list.
>
>
> He's just avoiding a BH disable which is unnecessary because BH is
> already disabled in the effected code path(s).
>

Yes, I only removed disabling BH for tlb_choose_channel(). In other 
places this lock still disables BH. This makes lock more accurate,
because there are 2 paths for execution: 1. dev_queue_xmit() and BH
are already disabled. 2. netpoll and irqs are disabled. So no need to 
enable/disable BH.

This patch also removes waring:
WARNING: at kernel/softirq.c:159 local_bh_enable_ip+0x69/0x80()
   [<c0461b59>] ? local_bh_enable_ip+0x69/0x80
   [<c045aea1>] warn_slowpath_common+0x81/0xa0
   [<c0461b59>] ? local_bh_enable_ip+0x69/0x80
   [<c045aee2>] warn_slowpath_null+0x22/0x30
   [<c0461b59>] local_bh_enable_ip+0x69/0x80
   [<c086ab83>] _raw_spin_unlock_bh+0x13/0x20
   [<f82b2150>] tlb_choose_channel+0x50/0xb0 [bonding]
   [<f82b3387>] bond_alb_xmit+0x207/0x210 [bonding]
   [<f82ab647>] __bond_start_xmit+0x177/0x1a0 [bonding]
   [<f82abe66>] bond_start_xmit+0x46/0x80 [bonding]
   [<c07c9681>] netpoll_send_skb_on_dev+0x121/0x1a0
   [<c07a9afe>] ? __alloc_skb+0x7e/0x120
   [<c07c996c>] netpoll_send_udp+0x1dc/0x1f0
   [<f838e73f>] write_msg+0x8f/0xc0 [netconsole]
   [<f838e6b0>] ? store_remote_port+0x60/0x60 [netconsole]
   [<c045b2a7>] __call_console_drivers+0x77/0x90
   [<c045b311>] _call_console_drivers+0x51/0x90
   [<c045b60b>] call_console_drivers+0x8b/0xd0
   [<c045b867>] console_unlock+0x37/0xc0
   [<c045be59>] vprintk+0x159/0x300
   [<c0541215>] ? path_openat+0xc5/0x360
   [<c0541595>] ? do_filp_open+0x35/0x80
   [<c045c020>] printk+0x20/0x30
   [<c06acc9d>] __handle_sysrq+0x3d/0x110
   [<c05aaaee>] ? security_file_permission+0x1e/0x90
   [<c06acdba>] write_sysrq_trigger+0x4a/0x50
   [<c06acd70>] ? __handle_sysrq+0x110/0x110
   [<c057e28d>] proc_reg_write+0x5d/0x80
   [<c0534ffb>] vfs_write+0x9b/0x160
   [<c04aff91>] ? audit_syscall_exit+0x381/0x3f0
   [<c057e230>] ? proc_reg_poll+0x80/0x80
   [<c0535622>] sys_write+0x42/0x70
   [<c087231f>] sysenter_do_call+0x12/0x28

Maxim.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bond_alb: don't disable softirq under bond_alb_xmit
  2012-01-09 18:59     ` Maxim Uvarov
@ 2012-01-09 19:36       ` Jay Vosburgh
  2012-01-09 19:42         ` Maxim Uvarov
  0 siblings, 1 reply; 7+ messages in thread
From: Jay Vosburgh @ 2012-01-09 19:36 UTC (permalink / raw)
  To: Maxim Uvarov; +Cc: David Miller, netdev, andy, amwang

Maxim Uvarov <maxim.uvarov@oracle.com> wrote:

>On 01/07/2012 10:14 AM, David Miller wrote:
>> From: Jay Vosburgh<fubar@us.ibm.com>
>> Date: Fri, 06 Jan 2012 13:33:25 -0800
>>
>>> Maxim Uvarov<maxim.uvarov@oracle.com>  wrote:
>>>
>>>> No need to lock soft irqs under bond_alb_xmit()
>>>> which already has softirq disabled.
>>>
>>> 	In commit:
>>>
>>> commit 6603a6f25e4bca922a7dfbf0bf03072d98850176
>>> Author: Jay Vosburgh<fubar@us.ibm.com>
>>> Date:   Wed Oct 17 17:37:50 2007 -0700
>>>
>>>      bonding: Convert more locks to _bh, acquire rtnl, for new locking
>>>
>>>          Convert more lock acquisitions to _bh flavor to avoid deadlock
>>>      with workqueue activity and add acquisition of RTNL in appropriate places.
>>>      Affects ALB mode, as well as core bonding functions and sysfs.
>>>
>>>      Signed-off-by: Andy Gospodarek<andy@greyhouse.net>
>>>      Signed-off-by: Jay Vosburgh<fubar@us.ibm.com>
>>>      Signed-off-by: Jeff Garzik<jeff@garzik.org>
>>>
>>> 	the _lock_tx_hashtbl was upgraded from regular to _bh to prevent
>>> deadlocks.  I don't recall right offhand what deadlock this prevented,
>>> but are we sure there are no possible issues with converting this lock
>>> back to a non-_bh acquisition?
>>
>> Maxim's patch is not changing the BH'ness of the list.
>>
>>
>> He's just avoiding a BH disable which is unnecessary because BH is
>> already disabled in the effected code path(s).
>>
>
>Yes, I only removed disabling BH for tlb_choose_channel(). In other places
>this lock still disables BH. This makes lock more accurate,
>because there are 2 paths for execution: 1. dev_queue_xmit() and BH
>are already disabled. 2. netpoll and irqs are disabled. So no need to
>enable/disable BH.

	The tlb_choose_channel and rlb_choose_channel parts look to be
as you describe, but you also modify tlb_clear_slave:

--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -135,7 +135,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
 	struct tlb_client_info *tx_hash_table;
 	u32 index;

-	_lock_tx_hashtbl(bond);
+	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));

	This makes tlb_clear_slave acquire the tx_hashtbl_lock without
_bh.  The tlb_clear_slave function is called from multiple places
without already holding _bh, in addition to the call paths you list.
The cases I see are:

	bond_alb_monitor (which runs from a workqueue)

	bond_alb_handle_link_change (called from bond_miimon_commit,
from a workqueue)

	bond_alb_deinit_slave (called during slave removal)

	All three of these will call into tlb_clear_slave without
already holding something at _bh.  These paths do not enter
tlb_clear_slave through tlb_choose_channel or rlb_choose_channel.

	Are we sure this does not open a window wherein the non-_bh path
into tlb_clear_slave could deadlock against the with-_bh path?

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bond_alb: don't disable softirq under bond_alb_xmit
  2012-01-09 19:36       ` Jay Vosburgh
@ 2012-01-09 19:42         ` Maxim Uvarov
  2012-01-09 20:32           ` Andy Gospodarek
  0 siblings, 1 reply; 7+ messages in thread
From: Maxim Uvarov @ 2012-01-09 19:42 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: David Miller, netdev, andy, amwang

On 01/09/2012 11:36 AM, Jay Vosburgh wrote:
> Maxim Uvarov<maxim.uvarov@oracle.com>  wrote:
>
>> On 01/07/2012 10:14 AM, David Miller wrote:
>>> From: Jay Vosburgh<fubar@us.ibm.com>
>>> Date: Fri, 06 Jan 2012 13:33:25 -0800
>>>
>>>> Maxim Uvarov<maxim.uvarov@oracle.com>   wrote:
>>>>
>>>>> No need to lock soft irqs under bond_alb_xmit()
>>>>> which already has softirq disabled.
>>>>
>>>> 	In commit:
>>>>
>>>> commit 6603a6f25e4bca922a7dfbf0bf03072d98850176
>>>> Author: Jay Vosburgh<fubar@us.ibm.com>
>>>> Date:   Wed Oct 17 17:37:50 2007 -0700
>>>>
>>>>       bonding: Convert more locks to _bh, acquire rtnl, for new locking
>>>>
>>>>           Convert more lock acquisitions to _bh flavor to avoid deadlock
>>>>       with workqueue activity and add acquisition of RTNL in appropriate places.
>>>>       Affects ALB mode, as well as core bonding functions and sysfs.
>>>>
>>>>       Signed-off-by: Andy Gospodarek<andy@greyhouse.net>
>>>>       Signed-off-by: Jay Vosburgh<fubar@us.ibm.com>
>>>>       Signed-off-by: Jeff Garzik<jeff@garzik.org>
>>>>
>>>> 	the _lock_tx_hashtbl was upgraded from regular to _bh to prevent
>>>> deadlocks.  I don't recall right offhand what deadlock this prevented,
>>>> but are we sure there are no possible issues with converting this lock
>>>> back to a non-_bh acquisition?
>>>
>>> Maxim's patch is not changing the BH'ness of the list.
>>>
>>>
>>> He's just avoiding a BH disable which is unnecessary because BH is
>>> already disabled in the effected code path(s).
>>>
>>
>> Yes, I only removed disabling BH for tlb_choose_channel(). In other places
>> this lock still disables BH. This makes lock more accurate,
>> because there are 2 paths for execution: 1. dev_queue_xmit() and BH
>> are already disabled. 2. netpoll and irqs are disabled. So no need to
>> enable/disable BH.
>
> 	The tlb_choose_channel and rlb_choose_channel parts look to be
> as you describe, but you also modify tlb_clear_slave:
>
> --- a/drivers/net/bonding/bond_alb.c
> +++ b/drivers/net/bonding/bond_alb.c
> @@ -135,7 +135,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
>   	struct tlb_client_info *tx_hash_table;
>   	u32 index;
>
> -	_lock_tx_hashtbl(bond);
> +	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
>
> 	This makes tlb_clear_slave acquire the tx_hashtbl_lock without
> _bh.  The tlb_clear_slave function is called from multiple places
> without already holding _bh, in addition to the call paths you list.
> The cases I see are:
>
> 	bond_alb_monitor (which runs from a workqueue)
>
> 	bond_alb_handle_link_change (called from bond_miimon_commit,
> from a workqueue)
>
> 	bond_alb_deinit_slave (called during slave removal)
>
> 	All three of these will call into tlb_clear_slave without
> already holding something at _bh.  These paths do not enter
> tlb_clear_slave through tlb_choose_channel or rlb_choose_channel.
>

Yes, you are right. I will add non-bh/bh version to tlb_clear_slave().


> 	Are we sure this does not open a window wherein the non-_bh path
> into tlb_clear_slave could deadlock against the with-_bh path?
>
> 	-J
I hope so.

>
> ---
> 	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] bond_alb: don't disable softirq under bond_alb_xmit
  2012-01-09 19:42         ` Maxim Uvarov
@ 2012-01-09 20:32           ` Andy Gospodarek
  0 siblings, 0 replies; 7+ messages in thread
From: Andy Gospodarek @ 2012-01-09 20:32 UTC (permalink / raw)
  To: Maxim Uvarov; +Cc: Jay Vosburgh, David Miller, netdev, andy, amwang

On Mon, Jan 09, 2012 at 11:42:20AM -0800, Maxim Uvarov wrote:
> On 01/09/2012 11:36 AM, Jay Vosburgh wrote:
> >Maxim Uvarov<maxim.uvarov@oracle.com>  wrote:
> >
> >>On 01/07/2012 10:14 AM, David Miller wrote:
> >>>From: Jay Vosburgh<fubar@us.ibm.com>
> >>>Date: Fri, 06 Jan 2012 13:33:25 -0800
> >>>
> >>>>Maxim Uvarov<maxim.uvarov@oracle.com>   wrote:
> >>>>
> >>>>>No need to lock soft irqs under bond_alb_xmit()
> >>>>>which already has softirq disabled.
> >>>>
> >>>>	In commit:
> >>>>
> >>>>commit 6603a6f25e4bca922a7dfbf0bf03072d98850176
> >>>>Author: Jay Vosburgh<fubar@us.ibm.com>
> >>>>Date:   Wed Oct 17 17:37:50 2007 -0700
> >>>>
> >>>>      bonding: Convert more locks to _bh, acquire rtnl, for new locking
> >>>>
> >>>>          Convert more lock acquisitions to _bh flavor to avoid deadlock
> >>>>      with workqueue activity and add acquisition of RTNL in appropriate places.
> >>>>      Affects ALB mode, as well as core bonding functions and sysfs.
> >>>>
> >>>>      Signed-off-by: Andy Gospodarek<andy@greyhouse.net>
> >>>>      Signed-off-by: Jay Vosburgh<fubar@us.ibm.com>
> >>>>      Signed-off-by: Jeff Garzik<jeff@garzik.org>
> >>>>
> >>>>	the _lock_tx_hashtbl was upgraded from regular to _bh to prevent
> >>>>deadlocks.  I don't recall right offhand what deadlock this prevented,
> >>>>but are we sure there are no possible issues with converting this lock
> >>>>back to a non-_bh acquisition?
> >>>
> >>>Maxim's patch is not changing the BH'ness of the list.
> >>>
> >>>
> >>>He's just avoiding a BH disable which is unnecessary because BH is
> >>>already disabled in the effected code path(s).
> >>>
> >>
> >>Yes, I only removed disabling BH for tlb_choose_channel(). In other places
> >>this lock still disables BH. This makes lock more accurate,
> >>because there are 2 paths for execution: 1. dev_queue_xmit() and BH
> >>are already disabled. 2. netpoll and irqs are disabled. So no need to
> >>enable/disable BH.
> >
> >	The tlb_choose_channel and rlb_choose_channel parts look to be
> >as you describe, but you also modify tlb_clear_slave:
> >
> >--- a/drivers/net/bonding/bond_alb.c
> >+++ b/drivers/net/bonding/bond_alb.c
> >@@ -135,7 +135,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
> >  	struct tlb_client_info *tx_hash_table;
> >  	u32 index;
> >
> >-	_lock_tx_hashtbl(bond);
> >+	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> >
> >	This makes tlb_clear_slave acquire the tx_hashtbl_lock without
> >_bh.  The tlb_clear_slave function is called from multiple places
> >without already holding _bh, in addition to the call paths you list.
> >The cases I see are:
> >
> >	bond_alb_monitor (which runs from a workqueue)
> >
> >	bond_alb_handle_link_change (called from bond_miimon_commit,
> >from a workqueue)
> >
> >	bond_alb_deinit_slave (called during slave removal)
> >
> >	All three of these will call into tlb_clear_slave without
> >already holding something at _bh.  These paths do not enter
> >tlb_clear_slave through tlb_choose_channel or rlb_choose_channel.
> >
> 
> Yes, you are right. I will add non-bh/bh version to tlb_clear_slave().
> 

Can you also consider adding a _lock_tx_hashtbl_bh or similar to replace
_lock_tx_hashtbl and use _lock_tx_hashtbl for non-bh locks rather than
accessing the hash table lock directly sometimes and with
_lock_tx_hashtbl others.  Functionality aside I don't like the
inconsistency.

> 
> >	Are we sure this does not open a window wherein the non-_bh path
> >into tlb_clear_slave could deadlock against the with-_bh path?
> >
> >	-J
> I hope so.

I'm not sure hope is a strategy.  What sort of testing have you done
with these changes and lockdep enabled?

The original change to a bh-lock was made when moving from a timer-based
infrastructure to a workqueue-based infrastructure because there were
lockdep warnings all over the place when performing tests like taking
the bonding interfaces up and down, enslaving and deslaving devices, and
removing the bonding module.

These mostly stemmed from the fact that frames were often sent in
bh-context with timer handler functions for multicast and ARP.

Splitting them at this point seems reasonable, but it needs to be
right.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-01-09 20:32 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-06 20:23 [PATCH] bond_alb: don't disable softirq under bond_alb_xmit Maxim Uvarov
2012-01-06 21:33 ` Jay Vosburgh
2012-01-07 18:14   ` David Miller
2012-01-09 18:59     ` Maxim Uvarov
2012-01-09 19:36       ` Jay Vosburgh
2012-01-09 19:42         ` Maxim Uvarov
2012-01-09 20:32           ` Andy Gospodarek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).