linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] boning: use a read-write lock in bonding_show_bonds()
@ 2023-11-08  6:46 Haifeng Xu
  2023-11-08 14:19 ` Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Haifeng Xu @ 2023-11-08  6:46 UTC (permalink / raw)
  To: j.vosburgh
  Cc: andy, davem, edumazet, kuba, pabeni, netdev, linux-kernel, Haifeng Xu

call stack:
......
PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
[ffffa7a8e96bbac0] __schedule at ffffffffb0719898
[ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
[ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
[ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
[ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
[ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
[ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
[ffffa7a8e96bbc80] device_del at ffffffffb0209af8
[ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
[ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
[ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
[ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
[ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
[ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
[ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
[ffffa7a8e96bbf10] kthread at ffffffffafae132a
[ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92

290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
[ffffa7a8d14dbb80] __schedule at ffffffffb0719898
[ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
[ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
[ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
[ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
[ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
[ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
[ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
[ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
[ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
[ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
[ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
[ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
[ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
[ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
[ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
[ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
[ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
[ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
......

Problem description:

Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
but there are many readers which hold the kernfs_rwsem, so it has to sleep
for a long time to wait the readers release the lock. Thread 278176 and any
other threads which call bonding_show_bonds() also need to wait because
they try to accuire the rtnl_mutex.

bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
However, the addition and deletion of bond_list are only performed in
bond_init()/bond_uninit(), so we can intoduce a separate read-write lock
to synchronize bond list mutation.

What's the benefits of this change?

1) All threads which call bonding_show_bonds() only wait when the
registration or unregistration of bond device happens.

2) There are many other users of rtnl_mutex, so bonding_show_bonds()
won't compete with them.

In a word, this change reduces the lock contention of rtnl_mutex.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
---
 drivers/net/bonding/bond_main.c  | 4 ++++
 drivers/net/bonding/bond_sysfs.c | 6 ++++--
 include/net/bonding.h            | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 51d47eda1c87..ac4773d19beb 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5951,7 +5951,9 @@ static void bond_uninit(struct net_device *bond_dev)
 
 	bond_set_slave_arr(bond, NULL, NULL);
 
+	write_lock(&bonding_dev_lock);
 	list_del(&bond->bond_list);
+	write_unlock(&bonding_dev_lock);
 
 	bond_debug_unregister(bond);
 }
@@ -6364,7 +6366,9 @@ static int bond_init(struct net_device *bond_dev)
 	spin_lock_init(&bond->stats_lock);
 	netdev_lockdep_set_classes(bond_dev);
 
+	write_lock(&bonding_dev_lock);
 	list_add_tail(&bond->bond_list, &bn->dev_list);
+	write_unlock(&bonding_dev_lock);
 
 	bond_prepare_sysfs_group(bond);
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 2805135a7205..e107c1d7a6bf 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -28,6 +28,8 @@
 
 #define to_bond(cd)	((struct bonding *)(netdev_priv(to_net_dev(cd))))
 
+DEFINE_RWLOCK(bonding_dev_lock);
+
 /* "show" function for the bond_masters attribute.
  * The class parameter is ignored.
  */
@@ -40,7 +42,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
 	int res = 0;
 	struct bonding *bond;
 
-	rtnl_lock();
+	read_lock(&bonding_dev_lock);
 
 	list_for_each_entry(bond, &bn->dev_list, bond_list) {
 		if (res > (PAGE_SIZE - IFNAMSIZ)) {
@@ -55,7 +57,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
 	if (res)
 		buf[res-1] = '\n'; /* eat the leftover space */
 
-	rtnl_unlock();
+	read_unlock(&bonding_dev_lock);
 	return res;
 }
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 5b8b1b644a2d..584ba4b5b8df 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -777,6 +777,9 @@ extern struct rtnl_link_ops bond_link_ops;
 /* exported from bond_sysfs_slave.c */
 extern const struct sysfs_ops slave_sysfs_ops;
 
+/* exported from bond_sysfs.c */
+extern rwlock_t bonding_dev_lock;
+
 /* exported from bond_3ad.c */
 extern const u8 lacpdu_mcast_addr[];
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] boning: use a read-write lock in bonding_show_bonds()
  2023-11-08  6:46 [PATCH] boning: use a read-write lock in bonding_show_bonds() Haifeng Xu
@ 2023-11-08 14:19 ` Eric Dumazet
  2023-11-09  2:43   ` Haifeng Xu
  2023-11-17 10:43   ` [PATCH v2] bonding: " Haifeng Xu
  2023-11-09 15:47 ` [PATCH] boning: " Jiri Pirko
  2023-11-09 17:55 ` Stephen Hemminger
  2 siblings, 2 replies; 16+ messages in thread
From: Eric Dumazet @ 2023-11-08 14:19 UTC (permalink / raw)
  To: Haifeng Xu; +Cc: j.vosburgh, andy, davem, kuba, pabeni, netdev, linux-kernel

On Wed, Nov 8, 2023 at 7:47 AM Haifeng Xu <haifeng.xu@shopee.com> wrote:
>
> call stack:

These stacks should either be removed from the changelog, or moved
_after_ the description
of the problem. These are normal looking call stacks, you are not
fixing a crash or deadlock.

> ......
> PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
> [ffffa7a8e96bbac0] __schedule at ffffffffb0719898
> [ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
> [ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
> [ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
> [ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
> [ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
> [ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
> [ffffa7a8e96bbc80] device_del at ffffffffb0209af8
> [ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
> [ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
> [ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
> [ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
> [ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
> [ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
> [ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
> [ffffa7a8e96bbf10] kthread at ffffffffafae132a
> [ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92
>
> 290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
> [ffffa7a8d14dbb80] __schedule at ffffffffb0719898
> [ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
> [ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
> [ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
> [ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
> [ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
> [ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
> [ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
> [ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
> [ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
> [ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
> [ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
> [ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
> [ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
> [ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
> [ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
> [ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
> [ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
> [ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
> ......
>
> Problem description:
>
> Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
> but there are many readers which hold the kernfs_rwsem, so it has to sleep
> for a long time to wait the readers release the lock. Thread 278176 and any
> other threads which call bonding_show_bonds() also need to wait because
> they try to accuire the rtnl_mutex.

acquire

>
> bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
> However, the addition and deletion of bond_list are only performed in
> bond_init()/bond_uninit(), so we can intoduce a separate read-write lock

introduce

> to synchronize bond list mutation.
>
> What's the benefits of this change?
>
> 1) All threads which call bonding_show_bonds() only wait when the
> registration or unregistration of bond device happens.
>
> 2) There are many other users of rtnl_mutex, so bonding_show_bonds()
> won't compete with them.
>
> In a word, this change reduces the lock contention of rtnl_mutex.
>

This looks good to me, but please note:

1) This is net-next material, please resend next week, because
net-next is currently closed during the merge window.

2) Using a spell checker would point few typos (including in the title
"boning" -> "bonding")

Thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] boning: use a read-write lock in bonding_show_bonds()
  2023-11-08 14:19 ` Eric Dumazet
@ 2023-11-09  2:43   ` Haifeng Xu
  2023-11-17 10:43   ` [PATCH v2] bonding: " Haifeng Xu
  1 sibling, 0 replies; 16+ messages in thread
From: Haifeng Xu @ 2023-11-09  2:43 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: j.vosburgh, andy, davem, kuba, pabeni, netdev, linux-kernel



On 2023/11/8 22:19, Eric Dumazet wrote:
> On Wed, Nov 8, 2023 at 7:47 AM Haifeng Xu <haifeng.xu@shopee.com> wrote:
>>
>> call stack:
> 
> These stacks should either be removed from the changelog, or moved
> _after_ the description
> of the problem. These are normal looking call stacks, you are not
> fixing a crash or deadlock.
> 
>> ......
>> PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
>> [ffffa7a8e96bbac0] __schedule at ffffffffb0719898
>> [ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
>> [ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
>> [ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
>> [ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
>> [ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
>> [ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
>> [ffffa7a8e96bbc80] device_del at ffffffffb0209af8
>> [ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
>> [ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
>> [ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
>> [ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
>> [ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
>> [ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
>> [ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
>> [ffffa7a8e96bbf10] kthread at ffffffffafae132a
>> [ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92
>>
>> 290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
>> [ffffa7a8d14dbb80] __schedule at ffffffffb0719898
>> [ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
>> [ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
>> [ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
>> [ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
>> [ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
>> [ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
>> [ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
>> [ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
>> [ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
>> [ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
>> [ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
>> [ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
>> [ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
>> [ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
>> [ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
>> [ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
>> [ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
>> [ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
>> ......
>>
>> Problem description:
>>
>> Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
>> but there are many readers which hold the kernfs_rwsem, so it has to sleep
>> for a long time to wait the readers release the lock. Thread 278176 and any
>> other threads which call bonding_show_bonds() also need to wait because
>> they try to accuire the rtnl_mutex.
> 
> acquire
> 
>>
>> bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
>> However, the addition and deletion of bond_list are only performed in
>> bond_init()/bond_uninit(), so we can intoduce a separate read-write lock
> 
> introduce
> 
>> to synchronize bond list mutation.
>>
>> What's the benefits of this change?
>>
>> 1) All threads which call bonding_show_bonds() only wait when the
>> registration or unregistration of bond device happens.
>>
>> 2) There are many other users of rtnl_mutex, so bonding_show_bonds()
>> won't compete with them.
>>
>> In a word, this change reduces the lock contention of rtnl_mutex.
>>
> 
> This looks good to me, but please note:
> 
> 1) This is net-next material, please resend next week, because
> net-next is currently closed during the merge window.
> 
> 2) Using a spell checker would point few typos (including in the title
> "boning" -> "bonding")
> 
> Thanks.

Thanks for your review, I 'll send a new patch next week.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] boning: use a read-write lock in bonding_show_bonds()
  2023-11-08  6:46 [PATCH] boning: use a read-write lock in bonding_show_bonds() Haifeng Xu
  2023-11-08 14:19 ` Eric Dumazet
@ 2023-11-09 15:47 ` Jiri Pirko
  2023-11-10  1:59   ` Haifeng Xu
  2023-11-09 17:55 ` Stephen Hemminger
  2 siblings, 1 reply; 16+ messages in thread
From: Jiri Pirko @ 2023-11-09 15:47 UTC (permalink / raw)
  To: Haifeng Xu
  Cc: j.vosburgh, andy, davem, edumazet, kuba, pabeni, netdev, linux-kernel


s/boning/bonding/
?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] boning: use a read-write lock in bonding_show_bonds()
  2023-11-08  6:46 [PATCH] boning: use a read-write lock in bonding_show_bonds() Haifeng Xu
  2023-11-08 14:19 ` Eric Dumazet
  2023-11-09 15:47 ` [PATCH] boning: " Jiri Pirko
@ 2023-11-09 17:55 ` Stephen Hemminger
  2023-11-09 18:03   ` Eric Dumazet
  2023-11-10  2:35   ` Haifeng Xu
  2 siblings, 2 replies; 16+ messages in thread
From: Stephen Hemminger @ 2023-11-09 17:55 UTC (permalink / raw)
  To: Haifeng Xu
  Cc: j.vosburgh, andy, davem, edumazet, kuba, pabeni, netdev, linux-kernel

On Wed,  8 Nov 2023 06:46:41 +0000
Haifeng Xu <haifeng.xu@shopee.com> wrote:

> call stack:
> ......
> PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
> [ffffa7a8e96bbac0] __schedule at ffffffffb0719898
> [ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
> [ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
> [ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
> [ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
> [ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
> [ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
> [ffffa7a8e96bbc80] device_del at ffffffffb0209af8
> [ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
> [ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
> [ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
> [ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
> [ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
> [ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
> [ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
> [ffffa7a8e96bbf10] kthread at ffffffffafae132a
> [ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92
> 
> 290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
> [ffffa7a8d14dbb80] __schedule at ffffffffb0719898
> [ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
> [ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
> [ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
> [ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
> [ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
> [ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
> [ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
> [ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
> [ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
> [ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
> [ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
> [ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
> [ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
> [ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
> [ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
> [ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
> [ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
> [ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
> ......
> 
> Problem description:
> 
> Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
> but there are many readers which hold the kernfs_rwsem, so it has to sleep
> for a long time to wait the readers release the lock. Thread 278176 and any
> other threads which call bonding_show_bonds() also need to wait because
> they try to accuire the rtnl_mutex.
> 
> bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
> However, the addition and deletion of bond_list are only performed in
> bond_init()/bond_uninit(), so we can intoduce a separate read-write lock
> to synchronize bond list mutation.
> 
> What's the benefits of this change?
> 
> 1) All threads which call bonding_show_bonds() only wait when the
> registration or unregistration of bond device happens.
> 
> 2) There are many other users of rtnl_mutex, so bonding_show_bonds()
> won't compete with them.
> 
> In a word, this change reduces the lock contention of rtnl_mutex.
> 
> Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
> ---
>  drivers/net/bonding/bond_main.c  | 4 ++++
>  drivers/net/bonding/bond_sysfs.c | 6 ++++--
>  include/net/bonding.h            | 3 +++
>  3 files changed, 11 insertions(+), 2 deletions(-)

Reader-writer locks are slower than spin locks and should be discouraged.
Would it be possible to use RCU here instead?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] boning: use a read-write lock in bonding_show_bonds()
  2023-11-09 17:55 ` Stephen Hemminger
@ 2023-11-09 18:03   ` Eric Dumazet
  2023-11-10  2:35   ` Haifeng Xu
  1 sibling, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2023-11-09 18:03 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Haifeng Xu, j.vosburgh, andy, davem, kuba, pabeni, netdev, linux-kernel

On Thu, Nov 9, 2023 at 6:55 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:

> Reader-writer locks are slower than spin locks and should be discouraged.
> Would it be possible to use RCU here instead?

I doubt there is a case for repeatedly reading this file ?

This 'lock' is only for slow paths, it doesn't really matter what it is.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] boning: use a read-write lock in bonding_show_bonds()
  2023-11-09 15:47 ` [PATCH] boning: " Jiri Pirko
@ 2023-11-10  1:59   ` Haifeng Xu
  0 siblings, 0 replies; 16+ messages in thread
From: Haifeng Xu @ 2023-11-10  1:59 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: j.vosburgh, andy, davem, edumazet, kuba, pabeni, netdev, linux-kernel



On 2023/11/9 23:47, Jiri Pirko wrote:
> 
> s/boning/bonding/
> ?

Yes, Eric has pointed out the problem in last email.
Thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] boning: use a read-write lock in bonding_show_bonds()
  2023-11-09 17:55 ` Stephen Hemminger
  2023-11-09 18:03   ` Eric Dumazet
@ 2023-11-10  2:35   ` Haifeng Xu
  1 sibling, 0 replies; 16+ messages in thread
From: Haifeng Xu @ 2023-11-10  2:35 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: j.vosburgh, andy, davem, edumazet, kuba, pabeni, netdev, linux-kernel



On 2023/11/10 01:55, Stephen Hemminger wrote:
> On Wed,  8 Nov 2023 06:46:41 +0000
> Haifeng Xu <haifeng.xu@shopee.com> wrote:
> 
>> call stack:
>> ......
>> PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
>> [ffffa7a8e96bbac0] __schedule at ffffffffb0719898
>> [ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
>> [ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
>> [ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
>> [ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
>> [ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
>> [ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
>> [ffffa7a8e96bbc80] device_del at ffffffffb0209af8
>> [ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
>> [ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
>> [ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
>> [ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
>> [ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
>> [ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
>> [ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
>> [ffffa7a8e96bbf10] kthread at ffffffffafae132a
>> [ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92
>>
>> 290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
>> [ffffa7a8d14dbb80] __schedule at ffffffffb0719898
>> [ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
>> [ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
>> [ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
>> [ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
>> [ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
>> [ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
>> [ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
>> [ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
>> [ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
>> [ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
>> [ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
>> [ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
>> [ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
>> [ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
>> [ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
>> [ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
>> [ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
>> [ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
>> ......
>>
>> Problem description:
>>
>> Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
>> but there are many readers which hold the kernfs_rwsem, so it has to sleep
>> for a long time to wait the readers release the lock. Thread 278176 and any
>> other threads which call bonding_show_bonds() also need to wait because
>> they try to accuire the rtnl_mutex.
>>
>> bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
>> However, the addition and deletion of bond_list are only performed in
>> bond_init()/bond_uninit(), so we can intoduce a separate read-write lock
>> to synchronize bond list mutation.
>>
>> What's the benefits of this change?
>>
>> 1) All threads which call bonding_show_bonds() only wait when the
>> registration or unregistration of bond device happens.
>>
>> 2) There are many other users of rtnl_mutex, so bonding_show_bonds()
>> won't compete with them.
>>
>> In a word, this change reduces the lock contention of rtnl_mutex.
>>
>> Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
>> ---
>>  drivers/net/bonding/bond_main.c  | 4 ++++
>>  drivers/net/bonding/bond_sysfs.c | 6 ++++--
>>  include/net/bonding.h            | 3 +++
>>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> Reader-writer locks are slower than spin locks and should be discouraged> Would it be possible to use RCU here instead?
 
In most cases,there are many threads which want to iterate over the bond_list,
the registration or unregistration of bond device rarely happens.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2] bonding: use a read-write lock in bonding_show_bonds()
  2023-11-08 14:19 ` Eric Dumazet
  2023-11-09  2:43   ` Haifeng Xu
@ 2023-11-17 10:43   ` Haifeng Xu
  2023-11-17 10:59     ` Eric Dumazet
  1 sibling, 1 reply; 16+ messages in thread
From: Haifeng Xu @ 2023-11-17 10:43 UTC (permalink / raw)
  To: edumazet
  Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel, Haifeng Xu

Problem description:

Call stack:
......
PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
[ffffa7a8e96bbac0] __schedule at ffffffffb0719898
[ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
[ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
[ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
[ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
[ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
[ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
[ffffa7a8e96bbc80] device_del at ffffffffb0209af8
[ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
[ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
[ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
[ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
[ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
[ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
[ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
[ffffa7a8e96bbf10] kthread at ffffffffafae132a
[ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92

290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
[ffffa7a8d14dbb80] __schedule at ffffffffb0719898
[ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
[ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
[ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
[ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
[ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
[ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
[ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
[ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
[ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
[ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
[ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
[ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
[ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
[ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
[ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
[ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
[ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
[ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
......

Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
but there are many readers which hold the kernfs_rwsem, so it has to sleep
for a long time to wait the readers release the lock. Thread 278176 and any
other threads which call bonding_show_bonds() also need to wait because
they try to acquire the rtnl_mutex.

bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
However, the addition and deletion of bond_list are only performed in
bond_init()/bond_uninit(), so we can introduce a separate read-write lock
to synchronize bond list mutation.

What are the benefits of this change?

1) All threads which call bonding_show_bonds() only wait when the
registration or unregistration of bond device happens.

2) There are many other users of rtnl_mutex, so bonding_show_bonds()
won't compete with them.

In a word, this change reduces the lock contention of rtnl_mutex.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
---
v2:
- move the call stack after the description
- fix typos in the changelog
---
 drivers/net/bonding/bond_main.c  | 4 ++++
 drivers/net/bonding/bond_sysfs.c | 6 ++++--
 include/net/bonding.h            | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e6cc0e133b7..db8f1efaab78 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5957,7 +5957,9 @@ static void bond_uninit(struct net_device *bond_dev)
 
 	bond_set_slave_arr(bond, NULL, NULL);
 
+	write_lock(&bonding_dev_lock);
 	list_del(&bond->bond_list);
+	write_unlock(&bonding_dev_lock);
 
 	bond_debug_unregister(bond);
 }
@@ -6370,7 +6372,9 @@ static int bond_init(struct net_device *bond_dev)
 	spin_lock_init(&bond->stats_lock);
 	netdev_lockdep_set_classes(bond_dev);
 
+	write_lock(&bonding_dev_lock);
 	list_add_tail(&bond->bond_list, &bn->dev_list);
+	write_unlock(&bonding_dev_lock);
 
 	bond_prepare_sysfs_group(bond);
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 2805135a7205..e107c1d7a6bf 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -28,6 +28,8 @@
 
 #define to_bond(cd)	((struct bonding *)(netdev_priv(to_net_dev(cd))))
 
+DEFINE_RWLOCK(bonding_dev_lock);
+
 /* "show" function for the bond_masters attribute.
  * The class parameter is ignored.
  */
@@ -40,7 +42,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
 	int res = 0;
 	struct bonding *bond;
 
-	rtnl_lock();
+	read_lock(&bonding_dev_lock);
 
 	list_for_each_entry(bond, &bn->dev_list, bond_list) {
 		if (res > (PAGE_SIZE - IFNAMSIZ)) {
@@ -55,7 +57,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
 	if (res)
 		buf[res-1] = '\n'; /* eat the leftover space */
 
-	rtnl_unlock();
+	read_unlock(&bonding_dev_lock);
 	return res;
 }
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 5b8b1b644a2d..584ba4b5b8df 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -777,6 +777,9 @@ extern struct rtnl_link_ops bond_link_ops;
 /* exported from bond_sysfs_slave.c */
 extern const struct sysfs_ops slave_sysfs_ops;
 
+/* exported from bond_sysfs.c */
+extern rwlock_t bonding_dev_lock;
+
 /* exported from bond_3ad.c */
 extern const u8 lacpdu_mcast_addr[];
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] bonding: use a read-write lock in bonding_show_bonds()
  2023-11-17 10:43   ` [PATCH v2] bonding: " Haifeng Xu
@ 2023-11-17 10:59     ` Eric Dumazet
  2023-11-17 13:15       ` Haifeng Xu
                         ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Eric Dumazet @ 2023-11-17 10:59 UTC (permalink / raw)
  To: Haifeng Xu; +Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel

On Fri, Nov 17, 2023 at 11:43 AM Haifeng Xu <haifeng.xu@shopee.com> wrote:
>
> Problem description:
>
> Call stack:
> ......
> PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
> [ffffa7a8e96bbac0] __schedule at ffffffffb0719898
> [ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
> [ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
> [ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
> [ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
> [ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
> [ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
> [ffffa7a8e96bbc80] device_del at ffffffffb0209af8
> [ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
> [ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
> [ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
> [ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
> [ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
> [ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
> [ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
> [ffffa7a8e96bbf10] kthread at ffffffffafae132a
> [ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92
>
> 290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
> [ffffa7a8d14dbb80] __schedule at ffffffffb0719898
> [ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
> [ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
> [ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
> [ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
> [ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
> [ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
> [ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
> [ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
> [ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
> [ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
> [ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
> [ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
> [ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
> [ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
> [ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
> [ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
> [ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
> [ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
> ......
>
> Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
> but there are many readers which hold the kernfs_rwsem, so it has to sleep
> for a long time to wait the readers release the lock. Thread 278176 and any
> other threads which call bonding_show_bonds() also need to wait because
> they try to acquire the rtnl_mutex.
>
> bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
> However, the addition and deletion of bond_list are only performed in
> bond_init()/bond_uninit(), so we can introduce a separate read-write lock
> to synchronize bond list mutation.
>
> What are the benefits of this change?
>
> 1) All threads which call bonding_show_bonds() only wait when the
> registration or unregistration of bond device happens.
>
> 2) There are many other users of rtnl_mutex, so bonding_show_bonds()
> won't compete with them.
>
> In a word, this change reduces the lock contention of rtnl_mutex.
>
> Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
> ---
> v2:
> - move the call stack after the description
> - fix typos in the changelog
> ---
>  drivers/net/bonding/bond_main.c  | 4 ++++
>  drivers/net/bonding/bond_sysfs.c | 6 ++++--
>  include/net/bonding.h            | 3 +++
>  3 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 8e6cc0e133b7..db8f1efaab78 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -5957,7 +5957,9 @@ static void bond_uninit(struct net_device *bond_dev)
>
>         bond_set_slave_arr(bond, NULL, NULL);
>
> +       write_lock(&bonding_dev_lock);
>         list_del(&bond->bond_list);
> +       write_unlock(&bonding_dev_lock);
>
>         bond_debug_unregister(bond);
>  }
> @@ -6370,7 +6372,9 @@ static int bond_init(struct net_device *bond_dev)
>         spin_lock_init(&bond->stats_lock);
>         netdev_lockdep_set_classes(bond_dev);
>
> +       write_lock(&bonding_dev_lock);
>         list_add_tail(&bond->bond_list, &bn->dev_list);
> +       write_unlock(&bonding_dev_lock);
>
>         bond_prepare_sysfs_group(bond);
>
> diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
> index 2805135a7205..e107c1d7a6bf 100644
> --- a/drivers/net/bonding/bond_sysfs.c
> +++ b/drivers/net/bonding/bond_sysfs.c
> @@ -28,6 +28,8 @@
>
>  #define to_bond(cd)    ((struct bonding *)(netdev_priv(to_net_dev(cd))))
>
> +DEFINE_RWLOCK(bonding_dev_lock);
> +
>  /* "show" function for the bond_masters attribute.
>   * The class parameter is ignored.
>   */
> @@ -40,7 +42,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
>         int res = 0;
>         struct bonding *bond;
>
> -       rtnl_lock();
> +       read_lock(&bonding_dev_lock);
>
>         list_for_each_entry(bond, &bn->dev_list, bond_list) {
>                 if (res > (PAGE_SIZE - IFNAMSIZ)) {
> @@ -55,7 +57,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
>         if (res)
>                 buf[res-1] = '\n'; /* eat the leftover space */
>
> -       rtnl_unlock();
> +       read_unlock(&bonding_dev_lock);
>         return res;
>  }

This unfortunately would race with dev_change_name()

You probably need to read_lock(&devnet_rename_sem); before copying dev->name,
or use netdev_get_name(net, temp_buffer, bond->dev->ifindex)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] bonding: use a read-write lock in bonding_show_bonds()
  2023-11-17 10:59     ` Eric Dumazet
@ 2023-11-17 13:15       ` Haifeng Xu
  2023-11-17 13:40         ` Eric Dumazet
  2023-11-19  9:25       ` [PATCH v3 1/2] bonding: export devnet_rename_sem Haifeng Xu
  2023-11-19  9:25       ` [PATCH v3 2/2] bonding: use a read-write lock in bonding_show_bonds() Haifeng Xu
  2 siblings, 1 reply; 16+ messages in thread
From: Haifeng Xu @ 2023-11-17 13:15 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel



On 2023/11/17 18:59, Eric Dumazet wrote:
> On Fri, Nov 17, 2023 at 11:43 AM Haifeng Xu <haifeng.xu@shopee.com> wrote:
>>
>> Problem description:
>>
>> Call stack:
>> ......
>> PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
>> [ffffa7a8e96bbac0] __schedule at ffffffffb0719898
>> [ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
>> [ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
>> [ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
>> [ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
>> [ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
>> [ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
>> [ffffa7a8e96bbc80] device_del at ffffffffb0209af8
>> [ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
>> [ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
>> [ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
>> [ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
>> [ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
>> [ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
>> [ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
>> [ffffa7a8e96bbf10] kthread at ffffffffafae132a
>> [ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92
>>
>> 290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
>> [ffffa7a8d14dbb80] __schedule at ffffffffb0719898
>> [ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
>> [ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
>> [ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
>> [ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
>> [ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
>> [ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
>> [ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
>> [ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
>> [ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
>> [ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
>> [ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
>> [ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
>> [ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
>> [ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
>> [ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
>> [ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
>> [ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
>> [ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
>> ......
>>
>> Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
>> but there are many readers which hold the kernfs_rwsem, so it has to sleep
>> for a long time to wait the readers release the lock. Thread 278176 and any
>> other threads which call bonding_show_bonds() also need to wait because
>> they try to acquire the rtnl_mutex.
>>
>> bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
>> However, the addition and deletion of bond_list are only performed in
>> bond_init()/bond_uninit(), so we can introduce a separate read-write lock
>> to synchronize bond list mutation.
>>
>> What are the benefits of this change?
>>
>> 1) All threads which call bonding_show_bonds() only wait when the
>> registration or unregistration of bond device happens.
>>
>> 2) There are many other users of rtnl_mutex, so bonding_show_bonds()
>> won't compete with them.
>>
>> In a word, this change reduces the lock contention of rtnl_mutex.
>>
>> Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
>> ---
>> v2:
>> - move the call stack after the description
>> - fix typos in the changelog
>> ---
>>  drivers/net/bonding/bond_main.c  | 4 ++++
>>  drivers/net/bonding/bond_sysfs.c | 6 ++++--
>>  include/net/bonding.h            | 3 +++
>>  3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 8e6cc0e133b7..db8f1efaab78 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -5957,7 +5957,9 @@ static void bond_uninit(struct net_device *bond_dev)
>>
>>         bond_set_slave_arr(bond, NULL, NULL);
>>
>> +       write_lock(&bonding_dev_lock);
>>         list_del(&bond->bond_list);
>> +       write_unlock(&bonding_dev_lock);
>>
>>         bond_debug_unregister(bond);
>>  }
>> @@ -6370,7 +6372,9 @@ static int bond_init(struct net_device *bond_dev)
>>         spin_lock_init(&bond->stats_lock);
>>         netdev_lockdep_set_classes(bond_dev);
>>
>> +       write_lock(&bonding_dev_lock);
>>         list_add_tail(&bond->bond_list, &bn->dev_list);
>> +       write_unlock(&bonding_dev_lock);
>>
>>         bond_prepare_sysfs_group(bond);
>>
>> diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>> index 2805135a7205..e107c1d7a6bf 100644
>> --- a/drivers/net/bonding/bond_sysfs.c
>> +++ b/drivers/net/bonding/bond_sysfs.c
>> @@ -28,6 +28,8 @@
>>
>>  #define to_bond(cd)    ((struct bonding *)(netdev_priv(to_net_dev(cd))))
>>
>> +DEFINE_RWLOCK(bonding_dev_lock);
>> +
>>  /* "show" function for the bond_masters attribute.
>>   * The class parameter is ignored.
>>   */
>> @@ -40,7 +42,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
>>         int res = 0;
>>         struct bonding *bond;
>>
>> -       rtnl_lock();
>> +       read_lock(&bonding_dev_lock);
>>
>>         list_for_each_entry(bond, &bn->dev_list, bond_list) {
>>                 if (res > (PAGE_SIZE - IFNAMSIZ)) {
>> @@ -55,7 +57,7 @@ static ssize_t bonding_show_bonds(const struct class *cls,
>>         if (res)
>>                 buf[res-1] = '\n'; /* eat the leftover space */
>>
>> -       rtnl_unlock();
>> +       read_unlock(&bonding_dev_lock);
>>         return res;
>>  }
> 
> This unfortunately would race with dev_change_name()

dev_change_name()is either used in  dev_ifsioc(case: SIOCSIFNAME) or used in do_setlink(), so 
could these net devices which need to change name be related to bond? I am not quite sure.

> 
> You probably need to read_lock(&devnet_rename_sem); before copying dev->name,
> or use netdev_get_name(net, temp_buffer, bond->dev->ifindex)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] bonding: use a read-write lock in bonding_show_bonds()
  2023-11-17 13:15       ` Haifeng Xu
@ 2023-11-17 13:40         ` Eric Dumazet
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2023-11-17 13:40 UTC (permalink / raw)
  To: Haifeng Xu; +Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel

On Fri, Nov 17, 2023 at 2:15 PM Haifeng Xu <haifeng.xu@shopee.com> wrote:
>
>
> dev_change_name()is either used in  dev_ifsioc(case: SIOCSIFNAME) or used in do_setlink(), so
> could these net devices which need to change name be related to bond? I am not quite sure.

You can change a netdev name with

ip link set dev OLDNAME name NEWNAME

This is generic.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3 1/2] bonding: export devnet_rename_sem
  2023-11-17 10:59     ` Eric Dumazet
  2023-11-17 13:15       ` Haifeng Xu
@ 2023-11-19  9:25       ` Haifeng Xu
  2023-11-20  9:57         ` Eric Dumazet
  2023-11-19  9:25       ` [PATCH v3 2/2] bonding: use a read-write lock in bonding_show_bonds() Haifeng Xu
  2 siblings, 1 reply; 16+ messages in thread
From: Haifeng Xu @ 2023-11-19  9:25 UTC (permalink / raw)
  To: edumazet
  Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel, Haifeng Xu

This patch exports devnet_rename_sem variable, so it can be accessed in the
bonding modulde, not only being limited in net/core/dev.c.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
---
 include/net/bonding.h | 3 +++
 net/core/dev.c        | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/net/bonding.h b/include/net/bonding.h
index 5b8b1b644a2d..6c16d778b615 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -780,6 +780,9 @@ extern const struct sysfs_ops slave_sysfs_ops;
 /* exported from bond_3ad.c */
 extern const u8 lacpdu_mcast_addr[];
 
+/* exported from net/core/dev.c */
+extern struct rw_semaphore devnet_rename_sem;
+
 static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
 {
 	dev_core_stats_tx_dropped_inc(dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index af53f6d838ce..fdafab617227 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -197,7 +197,8 @@ static DEFINE_SPINLOCK(napi_hash_lock);
 static unsigned int napi_gen_id = NR_CPUS;
 static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
 
-static DECLARE_RWSEM(devnet_rename_sem);
+DECLARE_RWSEM(devnet_rename_sem);
+EXPORT_SYMBOL(devnet_rename_sem);
 
 static inline void dev_base_seq_inc(struct net *net)
 {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 2/2] bonding: use a read-write lock in bonding_show_bonds()
  2023-11-17 10:59     ` Eric Dumazet
  2023-11-17 13:15       ` Haifeng Xu
  2023-11-19  9:25       ` [PATCH v3 1/2] bonding: export devnet_rename_sem Haifeng Xu
@ 2023-11-19  9:25       ` Haifeng Xu
  2023-11-20  9:55         ` Eric Dumazet
  2 siblings, 1 reply; 16+ messages in thread
From: Haifeng Xu @ 2023-11-19  9:25 UTC (permalink / raw)
  To: edumazet
  Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel, Haifeng Xu

Problem description:

Call stack:
......
PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
[ffffa7a8e96bbac0] __schedule at ffffffffb0719898
[ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
[ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
[ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
[ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
[ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
[ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
[ffffa7a8e96bbc80] device_del at ffffffffb0209af8
[ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
[ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
[ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
[ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
[ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
[ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
[ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
[ffffa7a8e96bbf10] kthread at ffffffffafae132a
[ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92

290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
[ffffa7a8d14dbb80] __schedule at ffffffffb0719898
[ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
[ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
[ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
[ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
[ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
[ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
[ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
[ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
[ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
[ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
[ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
[ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
[ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
[ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
[ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
[ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
[ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
[ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
......

Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
but there are many readers which hold the kernfs_rwsem, so it has to sleep
for a long time to wait the readers release the lock. Thread 278176 and any
other threads which call bonding_show_bonds() also need to wait because
they try to acquire the rtnl_mutex.

bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
However, the addition and deletion of bond_list are only performed in
bond_init()/bond_uninit(), so we can introduce a separate read-write lock
to synchronize bond list mutation. In addition, bonding_show_bonds() could
race with dev_change_name(), so we need devnet_rename_sem to protect the
access to dev->name.

What are the benefits of this change?

1) All threads which call bonding_show_bonds() only wait when the
registration or unregistration of bond device happens or the name
of net device changes.

2) There are many other users of rtnl_mutex, so bonding_show_bonds()
won't compete with them.

In a word, this change reduces the lock contention of rtnl_mutex.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
---
v2:
- move the call stack after the description
- fix typos in the changelog
v3:
- add devnet_rename_sem in bonding_show_bonds()
- update the changelog
---
 drivers/net/bonding/bond_main.c  | 4 ++++
 drivers/net/bonding/bond_sysfs.c | 8 ++++++--
 include/net/bonding.h            | 3 +++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e6cc0e133b7..db8f1efaab78 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5957,7 +5957,9 @@ static void bond_uninit(struct net_device *bond_dev)
 
 	bond_set_slave_arr(bond, NULL, NULL);
 
+	write_lock(&bonding_dev_lock);
 	list_del(&bond->bond_list);
+	write_unlock(&bonding_dev_lock);
 
 	bond_debug_unregister(bond);
 }
@@ -6370,7 +6372,9 @@ static int bond_init(struct net_device *bond_dev)
 	spin_lock_init(&bond->stats_lock);
 	netdev_lockdep_set_classes(bond_dev);
 
+	write_lock(&bonding_dev_lock);
 	list_add_tail(&bond->bond_list, &bn->dev_list);
+	write_unlock(&bonding_dev_lock);
 
 	bond_prepare_sysfs_group(bond);
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 2805135a7205..5de71af7c36f 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -28,6 +28,8 @@
 
 #define to_bond(cd)	((struct bonding *)(netdev_priv(to_net_dev(cd))))
 
+DEFINE_RWLOCK(bonding_dev_lock);
+
 /* "show" function for the bond_masters attribute.
  * The class parameter is ignored.
  */
@@ -40,7 +42,8 @@ static ssize_t bonding_show_bonds(const struct class *cls,
 	int res = 0;
 	struct bonding *bond;
 
-	rtnl_lock();
+	down_read(&devnet_rename_sem);
+	read_lock(&bonding_dev_lock);
 
 	list_for_each_entry(bond, &bn->dev_list, bond_list) {
 		if (res > (PAGE_SIZE - IFNAMSIZ)) {
@@ -55,7 +58,8 @@ static ssize_t bonding_show_bonds(const struct class *cls,
 	if (res)
 		buf[res-1] = '\n'; /* eat the leftover space */
 
-	rtnl_unlock();
+	read_unlock(&bonding_dev_lock);
+	up_read(&devnet_rename_sem);
 	return res;
 }
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 6c16d778b615..ede4116457e2 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -783,6 +783,9 @@ extern const u8 lacpdu_mcast_addr[];
 /* exported from net/core/dev.c */
 extern struct rw_semaphore devnet_rename_sem;
 
+/* exported from bond_sysfs.c */
+extern rwlock_t bonding_dev_lock;
+
 static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
 {
 	dev_core_stats_tx_dropped_inc(dev);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/2] bonding: use a read-write lock in bonding_show_bonds()
  2023-11-19  9:25       ` [PATCH v3 2/2] bonding: use a read-write lock in bonding_show_bonds() Haifeng Xu
@ 2023-11-20  9:55         ` Eric Dumazet
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2023-11-20  9:55 UTC (permalink / raw)
  To: Haifeng Xu; +Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel

On Sun, Nov 19, 2023 at 10:25 AM Haifeng Xu <haifeng.xu@shopee.com> wrote:
>
> Problem description:
>
> Call stack:
> ......
> PID: 210933  TASK: ffff92424e5ec080  CPU: 13  COMMAND: "kworker/u96:2"
> [ffffa7a8e96bbac0] __schedule at ffffffffb0719898
> [ffffa7a8e96bbb48] schedule at ffffffffb0719e9e
> [ffffa7a8e96bbb68] rwsem_down_write_slowpath at ffffffffafb3167a
> [ffffa7a8e96bbc00] down_write at ffffffffb071bfc1
> [ffffa7a8e96bbc18] kernfs_remove_by_name_ns at ffffffffafe3593e
> [ffffa7a8e96bbc48] sysfs_unmerge_group at ffffffffafe38922
> [ffffa7a8e96bbc68] dpm_sysfs_remove at ffffffffb021c96a
> [ffffa7a8e96bbc80] device_del at ffffffffb0209af8
> [ffffa7a8e96bbcd0] netdev_unregister_kobject at ffffffffb04a6b0e
> [ffffa7a8e96bbcf8] unregister_netdevice_many at ffffffffb046d3d9
> [ffffa7a8e96bbd60] default_device_exit_batch at ffffffffb046d8d1
> [ffffa7a8e96bbdd0] ops_exit_list at ffffffffb045e21d
> [ffffa7a8e96bbe00] cleanup_net at ffffffffb045ea46
> [ffffa7a8e96bbe60] process_one_work at ffffffffafad94bb
> [ffffa7a8e96bbeb0] worker_thread at ffffffffafad96ad
> [ffffa7a8e96bbf10] kthread at ffffffffafae132a
> [ffffa7a8e96bbf50] ret_from_fork at ffffffffafa04b92
>
> 290858 PID: 278176  TASK: ffff925deb39a040  CPU: 32  COMMAND: "node-exporter"
> [ffffa7a8d14dbb80] __schedule at ffffffffb0719898
> [ffffa7a8d14dbc08] schedule at ffffffffb0719e9e
> [ffffa7a8d14dbc28] schedule_preempt_disabled at ffffffffb071a24e
> [ffffa7a8d14dbc38] __mutex_lock at ffffffffb071af28
> [ffffa7a8d14dbcb8] __mutex_lock_slowpath at ffffffffb071b1a3
> [ffffa7a8d14dbcc8] mutex_lock at ffffffffb071b1e2
> [ffffa7a8d14dbce0] rtnl_lock at ffffffffb047f4b5
> [ffffa7a8d14dbcf0] bonding_show_bonds at ffffffffc079b1a1 [bonding]
> [ffffa7a8d14dbd20] class_attr_show at ffffffffb02117ce
> [ffffa7a8d14dbd30] sysfs_kf_seq_show at ffffffffafe37ba1
> [ffffa7a8d14dbd50] kernfs_seq_show at ffffffffafe35c07
> [ffffa7a8d14dbd60] seq_read_iter at ffffffffafd9fce0
> [ffffa7a8d14dbdc0] kernfs_fop_read_iter at ffffffffafe36a10
> [ffffa7a8d14dbe00] new_sync_read at ffffffffafd6de23
> [ffffa7a8d14dbe90] vfs_read at ffffffffafd6e64e
> [ffffa7a8d14dbed0] ksys_read at ffffffffafd70977
> [ffffa7a8d14dbf10] __x64_sys_read at ffffffffafd70a0a
> [ffffa7a8d14dbf20] do_syscall_64 at ffffffffb070bf1c
> [ffffa7a8d14dbf50] entry_SYSCALL_64_after_hwframe at ffffffffb080007c
> ......
>
> Thread 210933 holds the rtnl_mutex and tries to acquire the kernfs_rwsem,
> but there are many readers which hold the kernfs_rwsem, so it has to sleep
> for a long time to wait the readers release the lock. Thread 278176 and any
> other threads which call bonding_show_bonds() also need to wait because
> they try to acquire the rtnl_mutex.
>
> bonding_show_bonds() uses rtnl_mutex to protect the bond_list traversal.
> However, the addition and deletion of bond_list are only performed in
> bond_init()/bond_uninit(), so we can introduce a separate read-write lock
> to synchronize bond list mutation. In addition, bonding_show_bonds() could
> race with dev_change_name(), so we need devnet_rename_sem to protect the
> access to dev->name.
>
> What are the benefits of this change?
>
> 1) All threads which call bonding_show_bonds() only wait when the
> registration or unregistration of bond device happens or the name
> of net device changes.
>
> 2) There are many other users of rtnl_mutex, so bonding_show_bonds()
> won't compete with them.
>
> In a word, this change reduces the lock contention of rtnl_mutex.
>
> Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> ---

Reviewed-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/2] bonding: export devnet_rename_sem
  2023-11-19  9:25       ` [PATCH v3 1/2] bonding: export devnet_rename_sem Haifeng Xu
@ 2023-11-20  9:57         ` Eric Dumazet
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2023-11-20  9:57 UTC (permalink / raw)
  To: Haifeng Xu; +Cc: andy, davem, j.vosburgh, kuba, pabeni, netdev, linux-kernel

On Sun, Nov 19, 2023 at 10:25 AM Haifeng Xu <haifeng.xu@shopee.com> wrote:
>
> This patch exports devnet_rename_sem variable, so it can be accessed in the
> bonding modulde, not only being limited in net/core/dev.c.
>
> Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/bonding.h | 3 +++
>  net/core/dev.c        | 3 ++-
>  2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/include/net/bonding.h b/include/net/bonding.h
> index 5b8b1b644a2d..6c16d778b615 100644
> --- a/include/net/bonding.h
> +++ b/include/net/bonding.h
> @@ -780,6 +780,9 @@ extern const struct sysfs_ops slave_sysfs_ops;
>  /* exported from bond_3ad.c */
>  extern const u8 lacpdu_mcast_addr[];
>
> +/* exported from net/core/dev.c */
> +extern struct rw_semaphore devnet_rename_sem;

This probably belongs to include/linux/netdevice.h

Reviewed-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-11-20  9:58 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-08  6:46 [PATCH] boning: use a read-write lock in bonding_show_bonds() Haifeng Xu
2023-11-08 14:19 ` Eric Dumazet
2023-11-09  2:43   ` Haifeng Xu
2023-11-17 10:43   ` [PATCH v2] bonding: " Haifeng Xu
2023-11-17 10:59     ` Eric Dumazet
2023-11-17 13:15       ` Haifeng Xu
2023-11-17 13:40         ` Eric Dumazet
2023-11-19  9:25       ` [PATCH v3 1/2] bonding: export devnet_rename_sem Haifeng Xu
2023-11-20  9:57         ` Eric Dumazet
2023-11-19  9:25       ` [PATCH v3 2/2] bonding: use a read-write lock in bonding_show_bonds() Haifeng Xu
2023-11-20  9:55         ` Eric Dumazet
2023-11-09 15:47 ` [PATCH] boning: " Jiri Pirko
2023-11-10  1:59   ` Haifeng Xu
2023-11-09 17:55 ` Stephen Hemminger
2023-11-09 18:03   ` Eric Dumazet
2023-11-10  2:35   ` Haifeng Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).