All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
@ 2019-10-15  8:07 Leon Romanovsky
  2019-10-24 13:17 ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2019-10-15  8:07 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Parav Pandit, RDMA mailing list, Jason Gunthorpe, Leon Romanovsky

From: Parav Pandit <parav@mellanox.com>

When rdmacm module is not loaded, and when netlink message is received
to get char device info, it results into a deadlock due to recursive
locking rdma_nl_mutex in below call sequence.

[..]
  rdma_nl_rcv()
  mutex_lock()
  rdma_nl_rcv_msg()
      ib_get_client_nl_info()
         request_module()
           iw_cm_init()
             rdma_nl_register()
               mutex_lock(); <- Deadlock, acquiring mutex again

Due to above call sequence, following call trace and deadlock is
observed.

kernel: __mutex_lock+0x35e/0x860
kernel: ? __mutex_lock+0x129/0x860
kernel: ? rdma_nl_register+0x1a/0x90 [ib_core]
kernel: rdma_nl_register+0x1a/0x90 [ib_core]
kernel: ? 0xffffffffc029b000
kernel: iw_cm_init+0x34/0x1000 [iw_cm]
kernel: do_one_initcall+0x67/0x2d4
kernel: ? kmem_cache_alloc_trace+0x1ec/0x2a0
kernel: do_init_module+0x5a/0x223
kernel: load_module+0x1998/0x1e10
kernel: ? __symbol_put+0x60/0x60
kernel: __do_sys_finit_module+0x94/0xe0
kernel: do_syscall_64+0x5a/0x270
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe

process stack trace:
[<0>] __request_module+0x1c9/0x460
[<0>] ib_get_client_nl_info+0x5e/0xb0 [ib_core]
[<0>] nldev_get_chardev+0x1ac/0x320 [ib_core]
[<0>] rdma_nl_rcv_msg+0xeb/0x1d0 [ib_core]
[<0>] rdma_nl_rcv+0xcd/0x120 [ib_core]
[<0>] netlink_unicast+0x179/0x220
[<0>] netlink_sendmsg+0x2f6/0x3f0
[<0>] sock_sendmsg+0x30/0x40
[<0>] ___sys_sendmsg+0x27a/0x290
[<0>] __sys_sendmsg+0x58/0xa0
[<0>] do_syscall_64+0x5a/0x270
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe

To overcome this deadlock and to allow multiple netlink messages to
progress in parallel, following scheme is implemented.

1. Block netlink table unregistration of a client until all the callers
finish executing callback for a given client.

2. Netlink clients shouldn't register table multiple times for a given
index. Such basic requirement from two non IB core module eliminates
mutex usage for table registratio,

Fixes: 0e2d00eb6fd45 ("RDMA: Add NLDEV_GET_CHARDEV to allow char dev discovery and autoload")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/core_priv.h |  1 +
 drivers/infiniband/core/device.c    |  2 +
 drivers/infiniband/core/netlink.c   | 93 ++++++++++++++++-------------
 3 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 3a8b0911c3bc..9d07378b5b42 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -199,6 +199,7 @@ void ib_mad_cleanup(void);
 int ib_sa_init(void);
 void ib_sa_cleanup(void);
 
+void rdma_nl_init(void);
 void rdma_nl_exit(void);
 
 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 2e53aa25f0c7..2f89c4d64b73 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2716,6 +2716,8 @@ static int __init ib_core_init(void)
 		goto err_comp_unbound;
 	}
 
+	rdma_nl_init();
+
 	ret = addr_init();
 	if (ret) {
 		pr_warn("Could't init IB address resolution\n");
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 81dbd5f41bed..a3507b8be569 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -42,9 +42,12 @@
 #include <linux/module.h>
 #include "core_priv.h"
 
-static DEFINE_MUTEX(rdma_nl_mutex);
 static struct {
-	const struct rdma_nl_cbs   *cb_table;
+	const struct rdma_nl_cbs __rcu *cb_table;
+	/* Synchronizes between ongoing netlink commands and netlink client
+	 * unregistration.
+	 */
+	struct srcu_struct unreg_srcu;
 } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
 
 bool rdma_nl_chk_listeners(unsigned int group)
@@ -78,8 +81,6 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
 static bool
 is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
 {
-	const struct rdma_nl_cbs *cb_table;
-
 	if (!is_nl_msg_valid(type, op))
 		return false;
 
@@ -90,23 +91,12 @@ is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
 	if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
 		return false;
 
-	if (!rdma_nl_types[type].cb_table) {
-		mutex_unlock(&rdma_nl_mutex);
-		request_module("rdma-netlink-subsys-%d", type);
-		mutex_lock(&rdma_nl_mutex);
-	}
-
-	cb_table = rdma_nl_types[type].cb_table;
-
-	if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
-		return false;
 	return true;
 }
 
 void rdma_nl_register(unsigned int index,
 		      const struct rdma_nl_cbs cb_table[])
 {
-	mutex_lock(&rdma_nl_mutex);
 	if (!is_nl_msg_valid(index, 0)) {
 		/*
 		 * All clients are not interesting in success/failure of
@@ -114,31 +104,21 @@ void rdma_nl_register(unsigned int index,
 		 * continue their initialization. Print warning for them,
 		 * because it is programmer's error to be here.
 		 */
-		mutex_unlock(&rdma_nl_mutex);
 		WARN(true,
 		     "The not-valid %u index was supplied to RDMA netlink\n",
 		     index);
 		return;
 	}
 
-	if (rdma_nl_types[index].cb_table) {
-		mutex_unlock(&rdma_nl_mutex);
-		WARN(true,
-		     "The %u index is already registered in RDMA netlink\n",
-		     index);
-		return;
-	}
-
-	rdma_nl_types[index].cb_table = cb_table;
-	mutex_unlock(&rdma_nl_mutex);
+	/* Publish now that this table entry can be accessed */
+	rcu_assign_pointer(rdma_nl_types[index].cb_table, cb_table);
 }
 EXPORT_SYMBOL(rdma_nl_register);
 
 void rdma_nl_unregister(unsigned int index)
 {
-	mutex_lock(&rdma_nl_mutex);
-	rdma_nl_types[index].cb_table = NULL;
-	mutex_unlock(&rdma_nl_mutex);
+	rcu_assign_pointer(rdma_nl_types[index].cb_table, NULL);
+	synchronize_srcu(&rdma_nl_types[index].unreg_srcu);
 }
 EXPORT_SYMBOL(rdma_nl_unregister);
 
@@ -170,15 +150,35 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	unsigned int index = RDMA_NL_GET_CLIENT(type);
 	unsigned int op = RDMA_NL_GET_OP(type);
 	const struct rdma_nl_cbs *cb_table;
+	int srcu_idx;
+	int err = -EINVAL;
 
 	if (!is_nl_valid(skb, index, op))
 		return -EINVAL;
 
-	cb_table = rdma_nl_types[index].cb_table;
+	srcu_idx = srcu_read_lock(&rdma_nl_types[index].unreg_srcu);
+	cb_table = srcu_dereference(rdma_nl_types[index].cb_table,
+				    &rdma_nl_types[index].unreg_srcu);
+	if (!cb_table) {
+		/* Didn't get valid reference of the table;
+		 * attempt module load once.
+		 */
+		srcu_read_unlock(&rdma_nl_types[index].unreg_srcu, srcu_idx);
+
+		request_module("rdma-netlink-subsys-%d", index);
+
+		srcu_idx = srcu_read_lock(&rdma_nl_types[index].unreg_srcu);
+		cb_table = srcu_dereference(rdma_nl_types[index].cb_table,
+					    &rdma_nl_types[index].unreg_srcu);
+	}
+	if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
+		goto done;
 
 	if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
-	    !netlink_capable(skb, CAP_NET_ADMIN))
-		return -EPERM;
+	    !netlink_capable(skb, CAP_NET_ADMIN)) {
+		err = -EPERM;
+		goto done;
+	}
 
 	/*
 	 * LS responses overload the 0x100 (NLM_F_ROOT) flag.  Don't
@@ -186,8 +186,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	 */
 	if (index == RDMA_NL_LS) {
 		if (cb_table[op].doit)
-			return cb_table[op].doit(skb, nlh, extack);
-		return -EINVAL;
+			err = cb_table[op].doit(skb, nlh, extack);
+		goto done;
 	}
 	/* FIXME: Convert IWCM to properly handle doit callbacks */
 	if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
@@ -195,14 +195,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			.dump = cb_table[op].dump,
 		};
 		if (c.dump)
-			return netlink_dump_start(skb->sk, skb, nlh, &c);
-		return -EINVAL;
+			err = netlink_dump_start(skb->sk, skb, nlh, &c);
+		goto done;
 	}
 
 	if (cb_table[op].doit)
-		return cb_table[op].doit(skb, nlh, extack);
-
-	return 0;
+		err = cb_table[op].doit(skb, nlh, extack);
+done:
+	srcu_read_unlock(&rdma_nl_types[index].unreg_srcu, srcu_idx);
+	return err;
 }
 
 /*
@@ -263,9 +264,7 @@ static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 
 static void rdma_nl_rcv(struct sk_buff *skb)
 {
-	mutex_lock(&rdma_nl_mutex);
 	rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
-	mutex_unlock(&rdma_nl_mutex);
 }
 
 int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
@@ -297,14 +296,24 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(rdma_nl_multicast);
 
-void rdma_nl_exit(void)
+void rdma_nl_init(void)
 {
 	int idx;
 
 	for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+		init_srcu_struct(&rdma_nl_types[idx].unreg_srcu);
+}
+
+void rdma_nl_exit(void)
+{
+	int idx;
+
+	for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) {
+		cleanup_srcu_struct(&rdma_nl_types[idx].unreg_srcu);
 		WARN(rdma_nl_types[idx].cb_table,
 		     "Netlink client %d wasn't released prior to unloading %s\n",
 		     idx, KBUILD_MODNAME);
+	}
 }
 
 int rdma_nl_net_init(struct rdma_dev_net *rnet)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-15  8:07 [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling Leon Romanovsky
@ 2019-10-24 13:17 ` Jason Gunthorpe
  2019-10-24 13:26   ` Leon Romanovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2019-10-24 13:17 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Parav Pandit, RDMA mailing list, Leon Romanovsky

On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:

> diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
> index 81dbd5f41bed..a3507b8be569 100644
> +++ b/drivers/infiniband/core/netlink.c
> @@ -42,9 +42,12 @@
>  #include <linux/module.h>
>  #include "core_priv.h"
>  
> -static DEFINE_MUTEX(rdma_nl_mutex);
>  static struct {
> -	const struct rdma_nl_cbs   *cb_table;
> +	const struct rdma_nl_cbs __rcu *cb_table;
> +	/* Synchronizes between ongoing netlink commands and netlink client
> +	 * unregistration.
> +	 */
> +	struct srcu_struct unreg_srcu;

A srcu in every index is serious overkill for this. Lets just us a
rwsem:

From 3ab38b889ab6b85b8689a8cd9bccbf2b28711677 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 15 Oct 2019 11:07:33 +0300
Subject: [PATCH] IB/core: Avoid deadlock during netlink message handling

When rdmacm module is not loaded, and when netlink message is received to
get char device info, it results into a deadlock due to recursive locking
of rdma_nl_mutex with the below call sequence.

[..]
  rdma_nl_rcv()
  mutex_lock()
   [..]
   rdma_nl_rcv_msg()
      ib_get_client_nl_info()
         request_module()
           iw_cm_init()
             rdma_nl_register()
               mutex_lock(); <- Deadlock, acquiring mutex again

Due to above call sequence, following call trace and deadlock is observed.

  kernel: __mutex_lock+0x35e/0x860
  kernel: ? __mutex_lock+0x129/0x860
  kernel: ? rdma_nl_register+0x1a/0x90 [ib_core]
  kernel: rdma_nl_register+0x1a/0x90 [ib_core]
  kernel: ? 0xffffffffc029b000
  kernel: iw_cm_init+0x34/0x1000 [iw_cm]
  kernel: do_one_initcall+0x67/0x2d4
  kernel: ? kmem_cache_alloc_trace+0x1ec/0x2a0
  kernel: do_init_module+0x5a/0x223
  kernel: load_module+0x1998/0x1e10
  kernel: ? __symbol_put+0x60/0x60
  kernel: __do_sys_finit_module+0x94/0xe0
  kernel: do_syscall_64+0x5a/0x270
  kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe

  process stack trace:
  [<0>] __request_module+0x1c9/0x460
  [<0>] ib_get_client_nl_info+0x5e/0xb0 [ib_core]
  [<0>] nldev_get_chardev+0x1ac/0x320 [ib_core]
  [<0>] rdma_nl_rcv_msg+0xeb/0x1d0 [ib_core]
  [<0>] rdma_nl_rcv+0xcd/0x120 [ib_core]
  [<0>] netlink_unicast+0x179/0x220
  [<0>] netlink_sendmsg+0x2f6/0x3f0
  [<0>] sock_sendmsg+0x30/0x40
  [<0>] ___sys_sendmsg+0x27a/0x290
  [<0>] __sys_sendmsg+0x58/0xa0
  [<0>] do_syscall_64+0x5a/0x270
  [<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe

To overcome this deadlock and to allow multiple netlink messages to
progress in parallel, following scheme is implemented.

1. Split the lock protecting the cb_table into a per-index lock, and make
   it a rwlock. This lock is used to ensure no callbacks are running after
   unregistration returns. Since a module will not be registered once it
   is already running callbacks, this avoids the deadlock.

2. Use smp_store_release() to update the cb_table during registration so
   that no lock is required. This avoids lockdep problems with thinking
   all the rwsems are the same lock class.

Fixes: 0e2d00eb6fd45 ("RDMA: Add NLDEV_GET_CHARDEV to allow char dev discovery and autoload")
Link: https://lore.kernel.org/r/20191015080733.18625-1-leon@kernel.org
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/core_priv.h |   1 +
 drivers/infiniband/core/device.c    |   2 +
 drivers/infiniband/core/netlink.c   | 107 ++++++++++++++--------------
 3 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 3a8b0911c3bc16..9d07378b5b4230 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -199,6 +199,7 @@ void ib_mad_cleanup(void);
 int ib_sa_init(void);
 void ib_sa_cleanup(void);
 
+void rdma_nl_init(void);
 void rdma_nl_exit(void);
 
 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 2dd2cfe9b56136..50a92442c4f7c1 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2716,6 +2716,8 @@ static int __init ib_core_init(void)
 		goto err_comp_unbound;
 	}
 
+	rdma_nl_init();
+
 	ret = addr_init();
 	if (ret) {
 		pr_warn("Could't init IB address resolution\n");
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 81dbd5f41beda2..8cd31ef25eff20 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -42,9 +42,12 @@
 #include <linux/module.h>
 #include "core_priv.h"
 
-static DEFINE_MUTEX(rdma_nl_mutex);
 static struct {
-	const struct rdma_nl_cbs   *cb_table;
+	const struct rdma_nl_cbs *cb_table;
+	/* Synchronizes between ongoing netlink commands and netlink client
+	 * unregistration.
+	 */
+	struct rw_semaphore sem;
 } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
 
 bool rdma_nl_chk_listeners(unsigned int group)
@@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
 	return (op < max_num_ops[type]) ? true : false;
 }
 
-static bool
-is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
+static const struct rdma_nl_cbs *
+get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
 {
 	const struct rdma_nl_cbs *cb_table;
 
-	if (!is_nl_msg_valid(type, op))
-		return false;
-
 	/*
 	 * Currently only NLDEV client is supporting netlink commands in
 	 * non init_net net namespace.
 	 */
 	if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
-		return false;
+		return NULL;
 
-	if (!rdma_nl_types[type].cb_table) {
-		mutex_unlock(&rdma_nl_mutex);
-		request_module("rdma-netlink-subsys-%d", type);
-		mutex_lock(&rdma_nl_mutex);
-	}
+	cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+	if (!cb_table) {
+		/*
+		 * Didn't get valid reference of the table, attempt module
+		 * load once.
+		 */
+		up_read(&rdma_nl_types[type].sem);
 
-	cb_table = rdma_nl_types[type].cb_table;
+		request_module("rdma-netlink-subsys-%d", type);
 
+		down_read(&rdma_nl_types[type].sem);
+		cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+	}
 	if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
-		return false;
-	return true;
+		return NULL;
+	return cb_table;
 }
 
 void rdma_nl_register(unsigned int index,
 		      const struct rdma_nl_cbs cb_table[])
 {
-	mutex_lock(&rdma_nl_mutex);
-	if (!is_nl_msg_valid(index, 0)) {
-		/*
-		 * All clients are not interesting in success/failure of
-		 * this call. They want to see the print to error log and
-		 * continue their initialization. Print warning for them,
-		 * because it is programmer's error to be here.
-		 */
-		mutex_unlock(&rdma_nl_mutex);
-		WARN(true,
-		     "The not-valid %u index was supplied to RDMA netlink\n",
-		     index);
+	if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
+	    WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
 		return;
-	}
-
-	if (rdma_nl_types[index].cb_table) {
-		mutex_unlock(&rdma_nl_mutex);
-		WARN(true,
-		     "The %u index is already registered in RDMA netlink\n",
-		     index);
-		return;
-	}
 
-	rdma_nl_types[index].cb_table = cb_table;
-	mutex_unlock(&rdma_nl_mutex);
+	/* Pairs with the READ_ONCE in is_nl_valid() */
+	smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
 }
 EXPORT_SYMBOL(rdma_nl_register);
 
 void rdma_nl_unregister(unsigned int index)
 {
-	mutex_lock(&rdma_nl_mutex);
+	down_write(&rdma_nl_types[index].sem);
 	rdma_nl_types[index].cb_table = NULL;
-	mutex_unlock(&rdma_nl_mutex);
+	up_write(&rdma_nl_types[index].sem);
 }
 EXPORT_SYMBOL(rdma_nl_unregister);
 
@@ -170,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	unsigned int index = RDMA_NL_GET_CLIENT(type);
 	unsigned int op = RDMA_NL_GET_OP(type);
 	const struct rdma_nl_cbs *cb_table;
+	int err = -EINVAL;
 
-	if (!is_nl_valid(skb, index, op))
+	if (!is_nl_msg_valid(index, op))
 		return -EINVAL;
 
-	cb_table = rdma_nl_types[index].cb_table;
+	down_read(&rdma_nl_types[index].sem);
+	cb_table = get_cb_table(skb, index, op);
+	if (!cb_table)
+		goto done;
 
 	if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
-	    !netlink_capable(skb, CAP_NET_ADMIN))
-		return -EPERM;
+	    !netlink_capable(skb, CAP_NET_ADMIN)) {
+		err = -EPERM;
+		goto done;
+	}
 
 	/*
 	 * LS responses overload the 0x100 (NLM_F_ROOT) flag.  Don't
@@ -186,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	 */
 	if (index == RDMA_NL_LS) {
 		if (cb_table[op].doit)
-			return cb_table[op].doit(skb, nlh, extack);
-		return -EINVAL;
+			err = cb_table[op].doit(skb, nlh, extack);
+		goto done;
 	}
 	/* FIXME: Convert IWCM to properly handle doit callbacks */
 	if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
@@ -195,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			.dump = cb_table[op].dump,
 		};
 		if (c.dump)
-			return netlink_dump_start(skb->sk, skb, nlh, &c);
-		return -EINVAL;
+			err = netlink_dump_start(skb->sk, skb, nlh, &c);
+		goto done;
 	}
 
 	if (cb_table[op].doit)
-		return cb_table[op].doit(skb, nlh, extack);
-
-	return 0;
+		err = cb_table[op].doit(skb, nlh, extack);
+done:
+	up_read(&rdma_nl_types[index].sem);
+	return err;
 }
 
 /*
@@ -263,9 +256,7 @@ static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 
 static void rdma_nl_rcv(struct sk_buff *skb)
 {
-	mutex_lock(&rdma_nl_mutex);
 	rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
-	mutex_unlock(&rdma_nl_mutex);
 }
 
 int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
@@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(rdma_nl_multicast);
 
+void rdma_nl_init(void)
+{
+	int idx;
+
+	for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+		init_rwsem(&rdma_nl_types[idx].sem);
+}
+
 void rdma_nl_exit(void)
 {
 	int idx;
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 13:17 ` Jason Gunthorpe
@ 2019-10-24 13:26   ` Leon Romanovsky
  2019-10-24 13:50     ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2019-10-24 13:26 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, Parav Pandit, RDMA mailing list

On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
>
> > diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
> > index 81dbd5f41bed..a3507b8be569 100644
> > +++ b/drivers/infiniband/core/netlink.c
> > @@ -42,9 +42,12 @@
> >  #include <linux/module.h>
> >  #include "core_priv.h"
> >
> > -static DEFINE_MUTEX(rdma_nl_mutex);
> >  static struct {
> > -	const struct rdma_nl_cbs   *cb_table;
> > +	const struct rdma_nl_cbs __rcu *cb_table;
> > +	/* Synchronizes between ongoing netlink commands and netlink client
> > +	 * unregistration.
> > +	 */
> > +	struct srcu_struct unreg_srcu;
>
> A srcu in every index is serious overkill for this. Lets just us a
> rwsem:

I liked previous variant more than rwsem, but it is Parav's patch.

Thanks

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 13:26   ` Leon Romanovsky
@ 2019-10-24 13:50     ` Jason Gunthorpe
  2019-10-24 16:02       ` Leon Romanovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2019-10-24 13:50 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Doug Ledford, Parav Pandit, RDMA mailing list

On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky wrote:
> On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
> >
> > > diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
> > > index 81dbd5f41bed..a3507b8be569 100644
> > > +++ b/drivers/infiniband/core/netlink.c
> > > @@ -42,9 +42,12 @@
> > >  #include <linux/module.h>
> > >  #include "core_priv.h"
> > >
> > > -static DEFINE_MUTEX(rdma_nl_mutex);
> > >  static struct {
> > > -	const struct rdma_nl_cbs   *cb_table;
> > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > +	/* Synchronizes between ongoing netlink commands and netlink client
> > > +	 * unregistration.
> > > +	 */
> > > +	struct srcu_struct unreg_srcu;
> >
> > A srcu in every index is serious overkill for this. Lets just us a
> > rwsem:
> 
> I liked previous variant more than rwsem, but it is Parav's patch.

Why? srcu is a huge data structure and slow on unregister

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 13:50     ` Jason Gunthorpe
@ 2019-10-24 16:02       ` Leon Romanovsky
  2019-10-24 16:08         ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2019-10-24 16:02 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, Parav Pandit, RDMA mailing list

On Thu, Oct 24, 2019 at 10:50:17AM -0300, Jason Gunthorpe wrote:
> On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky wrote:
> > On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> > > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
> > >
> > > > diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
> > > > index 81dbd5f41bed..a3507b8be569 100644
> > > > +++ b/drivers/infiniband/core/netlink.c
> > > > @@ -42,9 +42,12 @@
> > > >  #include <linux/module.h>
> > > >  #include "core_priv.h"
> > > >
> > > > -static DEFINE_MUTEX(rdma_nl_mutex);
> > > >  static struct {
> > > > -	const struct rdma_nl_cbs   *cb_table;
> > > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > > +	/* Synchronizes between ongoing netlink commands and netlink client
> > > > +	 * unregistration.
> > > > +	 */
> > > > +	struct srcu_struct unreg_srcu;
> > >
> > > A srcu in every index is serious overkill for this. Lets just us a
> > > rwsem:
> >
> > I liked previous variant more than rwsem, but it is Parav's patch.
>
> Why? srcu is a huge data structure and slow on unregister

The unregister time is not so important for those IB/core modules.
I liked SRCU because it doesn't have *_ONCE() macros and smb_* calls.

Maybe wrong here, but the extra advantage of SRCU is that we are already
using that mechanism in uverbs and my assumption that SRCU will greatly
enjoy shared grace period.

Thanks

>
> Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 16:02       ` Leon Romanovsky
@ 2019-10-24 16:08         ` Jason Gunthorpe
  2019-10-24 16:13           ` Leon Romanovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2019-10-24 16:08 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: Doug Ledford, Parav Pandit, RDMA mailing list

On Thu, Oct 24, 2019 at 07:02:52PM +0300, Leon Romanovsky wrote:
> On Thu, Oct 24, 2019 at 10:50:17AM -0300, Jason Gunthorpe wrote:
> > On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky wrote:
> > > On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> > > > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
> > > >
> > > > > diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
> > > > > index 81dbd5f41bed..a3507b8be569 100644
> > > > > +++ b/drivers/infiniband/core/netlink.c
> > > > > @@ -42,9 +42,12 @@
> > > > >  #include <linux/module.h>
> > > > >  #include "core_priv.h"
> > > > >
> > > > > -static DEFINE_MUTEX(rdma_nl_mutex);
> > > > >  static struct {
> > > > > -	const struct rdma_nl_cbs   *cb_table;
> > > > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > > > +	/* Synchronizes between ongoing netlink commands and netlink client
> > > > > +	 * unregistration.
> > > > > +	 */
> > > > > +	struct srcu_struct unreg_srcu;
> > > >
> > > > A srcu in every index is serious overkill for this. Lets just us a
> > > > rwsem:
> > >
> > > I liked previous variant more than rwsem, but it is Parav's patch.
> >
> > Why? srcu is a huge data structure and slow on unregister
> 
> The unregister time is not so important for those IB/core modules.
> I liked SRCU because it doesn't have *_ONCE() macros and smb_* calls.

It does, they are just hidden under other macros..

> Maybe wrong here, but the extra advantage of SRCU is that we are already
> using that mechanism in uverbs and my assumption that SRCU will greatly
> enjoy shared grace period.

Hm, I'm not sure it works like that, the grace periods would be consecutive

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 16:08         ` Jason Gunthorpe
@ 2019-10-24 16:13           ` Leon Romanovsky
  2019-10-24 18:28             ` Parav Pandit
  0 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2019-10-24 16:13 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, Parav Pandit, RDMA mailing list

On Thu, Oct 24, 2019 at 01:08:10PM -0300, Jason Gunthorpe wrote:
> On Thu, Oct 24, 2019 at 07:02:52PM +0300, Leon Romanovsky wrote:
> > On Thu, Oct 24, 2019 at 10:50:17AM -0300, Jason Gunthorpe wrote:
> > > On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky wrote:
> > > > On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> > > > > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
> > > > >
> > > > > > diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
> > > > > > index 81dbd5f41bed..a3507b8be569 100644
> > > > > > +++ b/drivers/infiniband/core/netlink.c
> > > > > > @@ -42,9 +42,12 @@
> > > > > >  #include <linux/module.h>
> > > > > >  #include "core_priv.h"
> > > > > >
> > > > > > -static DEFINE_MUTEX(rdma_nl_mutex);
> > > > > >  static struct {
> > > > > > -	const struct rdma_nl_cbs   *cb_table;
> > > > > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > > > > +	/* Synchronizes between ongoing netlink commands and netlink client
> > > > > > +	 * unregistration.
> > > > > > +	 */
> > > > > > +	struct srcu_struct unreg_srcu;
> > > > >
> > > > > A srcu in every index is serious overkill for this. Lets just us a
> > > > > rwsem:
> > > >
> > > > I liked previous variant more than rwsem, but it is Parav's patch.
> > >
> > > Why? srcu is a huge data structure and slow on unregister
> >
> > The unregister time is not so important for those IB/core modules.
> > I liked SRCU because it doesn't have *_ONCE() macros and smb_* calls.
>
> It does, they are just hidden under other macros..
>
> > Maybe wrong here, but the extra advantage of SRCU is that we are already
> > using that mechanism in uverbs and my assumption that SRCU will greatly
> > enjoy shared grace period.
>
> Hm, I'm not sure it works like that, the grace periods would be consecutive

Whatever, the most important part of Parav's patch that we removed
global lock from RDMA netlink interface.

Thanks

>
> Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 16:13           ` Leon Romanovsky
@ 2019-10-24 18:28             ` Parav Pandit
  2019-10-24 18:36               ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Parav Pandit @ 2019-10-24 18:28 UTC (permalink / raw)
  To: Leon Romanovsky, Jason Gunthorpe; +Cc: Doug Ledford, RDMA mailing list



> -----Original Message-----
> From: Leon Romanovsky <leon@kernel.org>
> Sent: Thursday, October 24, 2019 11:13 AM
> To: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: Doug Ledford <dledford@redhat.com>; Parav Pandit
> <parav@mellanox.com>; RDMA mailing list <linux-rdma@vger.kernel.org>
> Subject: Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message
> handling
> 
> On Thu, Oct 24, 2019 at 01:08:10PM -0300, Jason Gunthorpe wrote:
> > On Thu, Oct 24, 2019 at 07:02:52PM +0300, Leon Romanovsky wrote:
> > > On Thu, Oct 24, 2019 at 10:50:17AM -0300, Jason Gunthorpe wrote:
> > > > On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky wrote:
> > > > > On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> > > > > > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
> > > > > >
> > > > > > > diff --git a/drivers/infiniband/core/netlink.c
> > > > > > > b/drivers/infiniband/core/netlink.c
> > > > > > > index 81dbd5f41bed..a3507b8be569 100644
> > > > > > > +++ b/drivers/infiniband/core/netlink.c
> > > > > > > @@ -42,9 +42,12 @@
> > > > > > >  #include <linux/module.h>
> > > > > > >  #include "core_priv.h"
> > > > > > >
> > > > > > > -static DEFINE_MUTEX(rdma_nl_mutex);  static struct {
> > > > > > > -	const struct rdma_nl_cbs   *cb_table;
> > > > > > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > > > > > +	/* Synchronizes between ongoing netlink commands and
> netlink client
> > > > > > > +	 * unregistration.
> > > > > > > +	 */
> > > > > > > +	struct srcu_struct unreg_srcu;
> > > > > >
> > > > > > A srcu in every index is serious overkill for this. Lets just
> > > > > > us a
> > > > > > rwsem:
> > > > >
> > > > > I liked previous variant more than rwsem, but it is Parav's patch.
> > > >
> > > > Why? srcu is a huge data structure and slow on unregister
> > >
> > > The unregister time is not so important for those IB/core modules.
> > > I liked SRCU because it doesn't have *_ONCE() macros and smb_* calls.
> >
> > It does, they are just hidden under other macros..
> >
Its better that they are hidden. So that we don't need open code them.
Also with srcu, we don't need lock annotations in get_cb_table() which releases and acquires semaphore.
Additionally lock nesting makes overall more complex.
Given that there are only 3 indices, out of which only 2 are outside of the ib_core module and unlikely to be unloaded, I also prefer srcu version.


> > > Maybe wrong here, but the extra advantage of SRCU is that we are
> > > already using that mechanism in uverbs and my assumption that SRCU
> > > will greatly enjoy shared grace period.
> >
> > Hm, I'm not sure it works like that, the grace periods would be
> > consecutive
> 
> Whatever, the most important part of Parav's patch that we removed global
> lock from RDMA netlink interface.
> 
> Thanks
> 
> >
> > Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 18:28             ` Parav Pandit
@ 2019-10-24 18:36               ` Jason Gunthorpe
  2019-10-24 19:19                 ` Leon Romanovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Jason Gunthorpe @ 2019-10-24 18:36 UTC (permalink / raw)
  To: Parav Pandit; +Cc: Leon Romanovsky, Doug Ledford, RDMA mailing list

On Thu, Oct 24, 2019 at 06:28:35PM +0000, Parav Pandit wrote:
> 
> 
> > From: Leon Romanovsky <leon@kernel.org>
> > Sent: Thursday, October 24, 2019 11:13 AM
> > To: Jason Gunthorpe <jgg@ziepe.ca>
> > Cc: Doug Ledford <dledford@redhat.com>; Parav Pandit
> > <parav@mellanox.com>; RDMA mailing list <linux-rdma@vger.kernel.org>
> > Subject: Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message
> > handling
> > 
> > On Thu, Oct 24, 2019 at 01:08:10PM -0300, Jason Gunthorpe wrote:
> > > On Thu, Oct 24, 2019 at 07:02:52PM +0300, Leon Romanovsky wrote:
> > > > On Thu, Oct 24, 2019 at 10:50:17AM -0300, Jason Gunthorpe wrote:
> > > > > On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky wrote:
> > > > > > On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> > > > > > > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
> > > > > > >
> > > > > > > > diff --git a/drivers/infiniband/core/netlink.c
> > > > > > > > b/drivers/infiniband/core/netlink.c
> > > > > > > > index 81dbd5f41bed..a3507b8be569 100644
> > > > > > > > +++ b/drivers/infiniband/core/netlink.c
> > > > > > > > @@ -42,9 +42,12 @@
> > > > > > > >  #include <linux/module.h>
> > > > > > > >  #include "core_priv.h"
> > > > > > > >
> > > > > > > > -static DEFINE_MUTEX(rdma_nl_mutex);  static struct {
> > > > > > > > -	const struct rdma_nl_cbs   *cb_table;
> > > > > > > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > > > > > > +	/* Synchronizes between ongoing netlink commands and
> > netlink client
> > > > > > > > +	 * unregistration.
> > > > > > > > +	 */
> > > > > > > > +	struct srcu_struct unreg_srcu;
> > > > > > >
> > > > > > > A srcu in every index is serious overkill for this. Lets just
> > > > > > > us a
> > > > > > > rwsem:
> > > > > >
> > > > > > I liked previous variant more than rwsem, but it is Parav's patch.
> > > > >
> > > > > Why? srcu is a huge data structure and slow on unregister
> > > >
> > > > The unregister time is not so important for those IB/core modules.
> > > > I liked SRCU because it doesn't have *_ONCE() macros and smb_* calls.
> > >
> > > It does, they are just hidden under other macros..
 
> Its better that they are hidden. So that we don't need open code
> them.

I wouldn't call swapping one function call for another 'open coding'

> Also with srcu, we don't need lock annotations in get_cb_table()
> which releases and acquires semaphore.

You don't need lock annoations for that.

> Additionally lock nesting makes overall more complex.

SRCU nesting is just as complicated! Don't think SRCU magically hides
that issue, it is still proposing to nest SRCU read side sections.

> Given that there are only 3 indices, out of which only 2 are outside
> of the ib_core module and unlikely to be unloaded, I also prefer
> srcu version.

Why? It isn't faster, it uses more memory, it still has the same
complex concurrency arrangement..

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 18:36               ` Jason Gunthorpe
@ 2019-10-24 19:19                 ` Leon Romanovsky
  2019-10-24 19:53                   ` Parav Pandit
  0 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2019-10-24 19:19 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Parav Pandit, Doug Ledford, RDMA mailing list

On Thu, Oct 24, 2019 at 03:36:39PM -0300, Jason Gunthorpe wrote:
> On Thu, Oct 24, 2019 at 06:28:35PM +0000, Parav Pandit wrote:
> >
> >
> > > From: Leon Romanovsky <leon@kernel.org>
> > > Sent: Thursday, October 24, 2019 11:13 AM
> > > To: Jason Gunthorpe <jgg@ziepe.ca>
> > > Cc: Doug Ledford <dledford@redhat.com>; Parav Pandit
> > > <parav@mellanox.com>; RDMA mailing list <linux-rdma@vger.kernel.org>
> > > Subject: Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message
> > > handling
> > >
> > > On Thu, Oct 24, 2019 at 01:08:10PM -0300, Jason Gunthorpe wrote:
> > > > On Thu, Oct 24, 2019 at 07:02:52PM +0300, Leon Romanovsky wrote:
> > > > > On Thu, Oct 24, 2019 at 10:50:17AM -0300, Jason Gunthorpe wrote:
> > > > > > On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky wrote:
> > > > > > > On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe wrote:
> > > > > > > > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky wrote:
> > > > > > > >
> > > > > > > > > diff --git a/drivers/infiniband/core/netlink.c
> > > > > > > > > b/drivers/infiniband/core/netlink.c
> > > > > > > > > index 81dbd5f41bed..a3507b8be569 100644
> > > > > > > > > +++ b/drivers/infiniband/core/netlink.c
> > > > > > > > > @@ -42,9 +42,12 @@
> > > > > > > > >  #include <linux/module.h>
> > > > > > > > >  #include "core_priv.h"
> > > > > > > > >
> > > > > > > > > -static DEFINE_MUTEX(rdma_nl_mutex);  static struct {
> > > > > > > > > -	const struct rdma_nl_cbs   *cb_table;
> > > > > > > > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > > > > > > > +	/* Synchronizes between ongoing netlink commands and
> > > netlink client
> > > > > > > > > +	 * unregistration.
> > > > > > > > > +	 */
> > > > > > > > > +	struct srcu_struct unreg_srcu;
> > > > > > > >
> > > > > > > > A srcu in every index is serious overkill for this. Lets just
> > > > > > > > us a
> > > > > > > > rwsem:
> > > > > > >
> > > > > > > I liked previous variant more than rwsem, but it is Parav's patch.
> > > > > >
> > > > > > Why? srcu is a huge data structure and slow on unregister
> > > > >
> > > > > The unregister time is not so important for those IB/core modules.
> > > > > I liked SRCU because it doesn't have *_ONCE() macros and smb_* calls.
> > > >
> > > > It does, they are just hidden under other macros..
>
> > Its better that they are hidden. So that we don't need open code
> > them.
>
> I wouldn't call swapping one function call for another 'open coding'
>
> > Also with srcu, we don't need lock annotations in get_cb_table()
> > which releases and acquires semaphore.
>
> You don't need lock annoations for that.
>
> > Additionally lock nesting makes overall more complex.
>
> SRCU nesting is just as complicated! Don't think SRCU magically hides
> that issue, it is still proposing to nest SRCU read side sections.
>
> > Given that there are only 3 indices, out of which only 2 are outside
> > of the ib_core module and unlikely to be unloaded, I also prefer
> > srcu version.
>
> Why? It isn't faster, it uses more memory, it still has the same
> complex concurrency arrangement..

Jason,

It doesn't worth arguing, both Parav and I prefer SRCU variant, you
prefer rwsem, so go for it, take rwsem, it is not important.

Thanks

>
> Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 19:19                 ` Leon Romanovsky
@ 2019-10-24 19:53                   ` Parav Pandit
  2019-10-24 23:53                     ` Jason Gunthorpe
  0 siblings, 1 reply; 12+ messages in thread
From: Parav Pandit @ 2019-10-24 19:53 UTC (permalink / raw)
  To: Leon Romanovsky, Jason Gunthorpe; +Cc: Doug Ledford, RDMA mailing list



> -----Original Message-----
> From: Leon Romanovsky <leon@kernel.org>
> Sent: Thursday, October 24, 2019 2:20 PM
> To: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: Parav Pandit <parav@mellanox.com>; Doug Ledford
> <dledford@redhat.com>; RDMA mailing list <linux-rdma@vger.kernel.org>
> Subject: Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message
> handling
> 
> On Thu, Oct 24, 2019 at 03:36:39PM -0300, Jason Gunthorpe wrote:
> > On Thu, Oct 24, 2019 at 06:28:35PM +0000, Parav Pandit wrote:
> > >
> > >
> > > > From: Leon Romanovsky <leon@kernel.org>
> > > > Sent: Thursday, October 24, 2019 11:13 AM
> > > > To: Jason Gunthorpe <jgg@ziepe.ca>
> > > > Cc: Doug Ledford <dledford@redhat.com>; Parav Pandit
> > > > <parav@mellanox.com>; RDMA mailing list
> > > > <linux-rdma@vger.kernel.org>
> > > > Subject: Re: [PATCH rdma-next] IB/core: Avoid deadlock during
> > > > netlink message handling
> > > >
> > > > On Thu, Oct 24, 2019 at 01:08:10PM -0300, Jason Gunthorpe wrote:
> > > > > On Thu, Oct 24, 2019 at 07:02:52PM +0300, Leon Romanovsky wrote:
> > > > > > On Thu, Oct 24, 2019 at 10:50:17AM -0300, Jason Gunthorpe wrote:
> > > > > > > On Thu, Oct 24, 2019 at 04:26:07PM +0300, Leon Romanovsky
> wrote:
> > > > > > > > On Thu, Oct 24, 2019 at 10:17:43AM -0300, Jason Gunthorpe
> wrote:
> > > > > > > > > On Tue, Oct 15, 2019 at 11:07:33AM +0300, Leon Romanovsky
> wrote:
> > > > > > > > >
> > > > > > > > > > diff --git a/drivers/infiniband/core/netlink.c
> > > > > > > > > > b/drivers/infiniband/core/netlink.c
> > > > > > > > > > index 81dbd5f41bed..a3507b8be569 100644
> > > > > > > > > > +++ b/drivers/infiniband/core/netlink.c
> > > > > > > > > > @@ -42,9 +42,12 @@
> > > > > > > > > >  #include <linux/module.h>  #include "core_priv.h"
> > > > > > > > > >
> > > > > > > > > > -static DEFINE_MUTEX(rdma_nl_mutex);  static struct {
> > > > > > > > > > -	const struct rdma_nl_cbs   *cb_table;
> > > > > > > > > > +	const struct rdma_nl_cbs __rcu *cb_table;
> > > > > > > > > > +	/* Synchronizes between ongoing netlink commands
> and
> > > > netlink client
> > > > > > > > > > +	 * unregistration.
> > > > > > > > > > +	 */
> > > > > > > > > > +	struct srcu_struct unreg_srcu;
> > > > > > > > >
> > > > > > > > > A srcu in every index is serious overkill for this. Lets
> > > > > > > > > just us a
> > > > > > > > > rwsem:
> > > > > > > >
> > > > > > > > I liked previous variant more than rwsem, but it is Parav's patch.
> > > > > > >
> > > > > > > Why? srcu is a huge data structure and slow on unregister
> > > > > >
> > > > > > The unregister time is not so important for those IB/core modules.
> > > > > > I liked SRCU because it doesn't have *_ONCE() macros and smb_*
> calls.
> > > > >
> > > > > It does, they are just hidden under other macros..
> >
> > > Its better that they are hidden. So that we don't need open code
> > > them.
> >
> > I wouldn't call swapping one function call for another 'open coding'
> >
> > > Also with srcu, we don't need lock annotations in get_cb_table()
> > > which releases and acquires semaphore.
> >
> > You don't need lock annoations for that.
> >
> > > Additionally lock nesting makes overall more complex.
> >
> > SRCU nesting is just as complicated! Don't think SRCU magically hides
> > that issue, it is still proposing to nest SRCU read side sections.
> >
> > > Given that there are only 3 indices, out of which only 2 are outside
> > > of the ib_core module and unlikely to be unloaded, I also prefer
> > > srcu version.
> >
> > Why? It isn't faster, it uses more memory, it still has the same
> > complex concurrency arrangement..
> 
> Jason,
> 
> It doesn't worth arguing, both Parav and I prefer SRCU variant, you prefer
> rwsem, so go for it, take rwsem, it is not important.
> 
Jason's memory size point made be curious about the srcu_struct size.
On my x86 5.x kernel I see srcu_struct costs 70+Kbytes! Likely due to some debug info in my kernel.
Which is probably a good reason in this case to shift to rwsem. (rwsem is 80 bytes).

One small comment correction needed is,

-	rdma_nl_types[index].cb_table = cb_table;
-	mutex_unlock(&rdma_nl_mutex);
+	/* Pairs with the READ_ONCE in is_nl_valid() */
+	smp_store_release(&rdma_nl_types[index].cb_table, cb_table);

It should be "Pairs with the READ_ONE in get_cb_table() */

> Thanks
> 
> >
> > Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling
  2019-10-24 19:53                   ` Parav Pandit
@ 2019-10-24 23:53                     ` Jason Gunthorpe
  0 siblings, 0 replies; 12+ messages in thread
From: Jason Gunthorpe @ 2019-10-24 23:53 UTC (permalink / raw)
  To: Parav Pandit; +Cc: Leon Romanovsky, Doug Ledford, RDMA mailing list

On Thu, Oct 24, 2019 at 07:53:56PM +0000, Parav Pandit wrote:

> Jason's memory size point made be curious about the srcu_struct
> size.  On my x86 5.x kernel I see srcu_struct costs 70+Kbytes!
> Likely due to some debug info in my kernel.  Which is probably a
> good reason in this case to shift to rwsem. (rwsem is 80 bytes).

Yikes, I knew it was bad, but I didn't think that bad.. So
unbelievable I checked here and I got +4752 bytes using SRCU on my
much less debug kernel. Crazy.

> One small comment correction needed is,
> 
> -	rdma_nl_types[index].cb_table = cb_table;
> -	mutex_unlock(&rdma_nl_mutex);
> +	/* Pairs with the READ_ONCE in is_nl_valid() */
> +	smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
> 
> It should be "Pairs with the READ_ONE in get_cb_table() */

Done, applied to for-rc, thanks

Jason

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-10-24 23:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-15  8:07 [PATCH rdma-next] IB/core: Avoid deadlock during netlink message handling Leon Romanovsky
2019-10-24 13:17 ` Jason Gunthorpe
2019-10-24 13:26   ` Leon Romanovsky
2019-10-24 13:50     ` Jason Gunthorpe
2019-10-24 16:02       ` Leon Romanovsky
2019-10-24 16:08         ` Jason Gunthorpe
2019-10-24 16:13           ` Leon Romanovsky
2019-10-24 18:28             ` Parav Pandit
2019-10-24 18:36               ` Jason Gunthorpe
2019-10-24 19:19                 ` Leon Romanovsky
2019-10-24 19:53                   ` Parav Pandit
2019-10-24 23:53                     ` Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.