All of lore.kernel.org
 help / color / mirror / Atom feed
From: Parav Pandit <parav@nvidia.com>
To: <netdev@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
	<gregkh@linuxfoundation.org>
Cc: <jiri@nvidia.com>, <jgg@nvidia.com>, <dledford@redhat.com>,
	<leonro@nvidia.com>, <saeedm@nvidia.com>, <kuba@kernel.org>,
	<davem@davemloft.net>, Parav Pandit <parav@nvidia.com>
Subject: [PATCH net-next 06/13] devlink: Introduce devlink refcount to reduce scope of global devlink_mutex
Date: Thu, 12 Nov 2020 21:24:16 +0200	[thread overview]
Message-ID: <20201112192424.2742-7-parav@nvidia.com> (raw)
In-Reply-To: <20201112192424.2742-1-parav@nvidia.com>

Currently global devlink_mutex is held while a doit() operation is
progress. This brings a limitation.

A Driver cannot perform devlink_register()/unregister() calls
during devlink doit() callback functions.
This is typically required when a port state change described in
RFC [1] callback wants to delete an active SF port or wants to
activate a SF port that results into unregistering or registering a
devlink instance on different bus such as ancillary bus.

An example flow:

devlink_predoit()
  mutex_lock(&devlink_mutex); <- First lock acquire
  devlink_reload()
    driver->reload_down(inactive)
        adev->remove();
           mlx5_adev_remove(ancillary_dev);
             devlink_unregister(ancillary_dev->devlink_instance);
               mutex_lock(&devlink_mutex); <- Second lock acquire

This patch is preparation patch to enable drivers to achieve this.

It achieves this by maintaining a per devlink instance refcount to
prevent devlink device unregistration while user command are in progress
or while devlink device is migration to init_net net namespace.

devlink_nl_family continue to remain registered with parallel_ops
disabled. So even after removing devlink_mutex during doit commands,
it doesn't enable userspace to run multiple devlink commands for one
or multiple devlink instance.

[1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 include/net/devlink.h |  5 +++
 net/core/devlink.c    | 84 +++++++++++++++++++++++++++++++------------
 2 files changed, 67 insertions(+), 22 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ef487b8ed17b..c8eab814c234 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -53,6 +53,11 @@ struct devlink {
 			    * port, sb, dpipe, resource, params, region, traps and more.
 			    */
 	struct mutex reload_lock; /* Protects reload operation */
+	struct list_head reload_list;
+	refcount_t refcount; /* Serializes user doit commands and netns command
+			      * with device unregistration.
+			      */
+	struct completion unregister_complete;
 	u8 reload_failed:1,
 	   reload_enabled:1,
 	   registered:1;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index c7c6f274d392..84f3ec12b3e8 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -96,9 +96,8 @@ static LIST_HEAD(devlink_list);
 
 /* devlink_mutex
  *
- * An overall lock guarding every operation coming from userspace.
- * It also guards devlink devices list and it is taken when
- * driver registers/unregisters it.
+ * An overall lock guarding devlink devices list during operations coming from
+ * userspace and when driver registers/unregisters devlink device.
  */
 static DEFINE_MUTEX(devlink_mutex);
 
@@ -121,6 +120,18 @@ void devlink_net_set(struct devlink *devlink, struct net *net)
 }
 EXPORT_SYMBOL_GPL(devlink_net_set);
 
+static inline bool
+devlink_try_get(struct devlink *devlink)
+{
+	return refcount_inc_not_zero(&devlink->refcount);
+}
+
+static void devlink_put(struct devlink *devlink)
+{
+	if (refcount_dec_and_test(&devlink->refcount))
+		complete(&devlink->unregister_complete);
+}
+
 static struct devlink *devlink_get_from_attrs(struct net *net,
 					      struct nlattr **attrs)
 {
@@ -139,7 +150,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
 	list_for_each_entry(devlink, &devlink_list, list) {
 		if (strcmp(devlink->dev->bus->name, busname) == 0 &&
 		    strcmp(dev_name(devlink->dev), devname) == 0 &&
-		    net_eq(devlink_net(devlink), net))
+		    net_eq(devlink_net(devlink), net) && devlink_try_get(devlink))
 			return devlink;
 	}
 
@@ -411,7 +422,7 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
 
 /* The per devlink instance lock is taken by default in the pre-doit
  * operation, yet several commands do not require this. The global
- * devlink lock is taken and protects from disruption by user-calls.
+ * devlink lock is taken and protects from disruption by dumpit user-calls.
  */
 #define DEVLINK_NL_FLAG_NO_LOCK			BIT(2)
 
@@ -424,10 +435,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 
 	mutex_lock(&devlink_mutex);
 	devlink = devlink_get_from_info(info);
-	if (IS_ERR(devlink)) {
-		mutex_unlock(&devlink_mutex);
+	mutex_unlock(&devlink_mutex);
+
+	if (IS_ERR(devlink))
 		return PTR_ERR(devlink);
-	}
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_lock(&devlink->lock);
 	info->user_ptr[0] = devlink;
@@ -448,7 +459,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 unlock:
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
-	mutex_unlock(&devlink_mutex);
+	devlink_put(devlink);
 	return err;
 }
 
@@ -460,7 +471,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
 	devlink = info->user_ptr[0];
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
-	mutex_unlock(&devlink_mutex);
+	devlink_put(devlink);
 }
 
 static struct genl_family devlink_nl_family;
@@ -8122,6 +8133,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	mutex_init(&devlink->lock);
 	mutex_init(&devlink->reporters_lock);
 	mutex_init(&devlink->reload_lock);
+	init_completion(&devlink->unregister_complete);
 	return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -8136,6 +8148,7 @@ int devlink_register(struct devlink *devlink, struct device *dev)
 {
 	devlink->dev = dev;
 	devlink->registered = true;
+	refcount_set(&devlink->refcount, 1);
 	mutex_lock(&devlink_mutex);
 	list_add_tail(&devlink->list, &devlink_list);
 	devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -8151,12 +8164,23 @@ EXPORT_SYMBOL_GPL(devlink_register);
  */
 void devlink_unregister(struct devlink *devlink)
 {
+	/* Remove from the list first, so that no new users can get it */
 	mutex_lock(&devlink_mutex);
-	WARN_ON(devlink_reload_supported(devlink->ops) &&
-		devlink->reload_enabled);
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 	list_del(&devlink->list);
 	mutex_unlock(&devlink_mutex);
+
+	/* Balances with refcount_set in devlink_register(). */
+	devlink_put(devlink);
+	/* Wait for any existing users to stop using the devlink device */
+	wait_for_completion(&devlink->unregister_complete);
+
+	/* At this point there are no active users working on the devlink instance;
+	 * also net ns exit operation (if any) is also completed.
+	 * devlink is out of global list, hence no users can acquire reference to this devlink
+	 * instance anymore. Hence, it is safe to proceed with unregistration.
+	 */
+	WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled);
 }
 EXPORT_SYMBOL_GPL(devlink_unregister);
 
@@ -10472,6 +10496,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 {
 	struct devlink *devlink;
 	u32 actions_performed;
+	LIST_HEAD(local_list);
+	struct devlink *tmp;
 	int err;
 
 	/* In case network namespace is getting destroyed, reload
@@ -10479,18 +10505,32 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 	 */
 	mutex_lock(&devlink_mutex);
 	list_for_each_entry(devlink, &devlink_list, list) {
-		if (net_eq(devlink_net(devlink), net)) {
-			if (WARN_ON(!devlink_reload_supported(devlink->ops)))
-				continue;
-			err = devlink_reload(devlink, &init_net,
-					     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
-					     DEVLINK_RELOAD_LIMIT_UNSPEC,
-					     &actions_performed, NULL);
-			if (err && err != -EOPNOTSUPP)
-				pr_warn("Failed to reload devlink instance into init_net\n");
-		}
+		if (!net_eq(devlink_net(devlink), net))
+			continue;
+
+		if (WARN_ON(!devlink_reload_supported(devlink->ops)))
+			continue;
+
+		/* Hold the reference to devlink instance so that it doesn't get unregistered
+		 * once global devlink_mutex is unlocked.
+		 * Store the devlink to a shadow list so that if devlink unregistration is
+		 * started, it can be still found in the shadow list.
+		 */
+		if (devlink_try_get(devlink))
+			list_add_tail(&devlink->reload_list, &local_list);
 	}
 	mutex_unlock(&devlink_mutex);
+
+	list_for_each_entry_safe(devlink, tmp, &local_list, reload_list) {
+		list_del_init(&devlink->reload_list);
+		err = devlink_reload(devlink, &init_net,
+				     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+				     DEVLINK_RELOAD_LIMIT_UNSPEC,
+				     &actions_performed, NULL);
+		if (err && err != -EOPNOTSUPP)
+			pr_warn("Failed to reload devlink instance into init_net\n");
+		devlink_put(devlink);
+	}
 }
 
 static struct pernet_operations devlink_pernet_ops __net_initdata = {
-- 
2.26.2


  parent reply	other threads:[~2020-11-12 19:25 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-12 19:24 [PATCH net-next 00/13] Add mlx5 subfunction support Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 01/13] devlink: Prepare code to fill multiple port function attributes Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 02/13] devlink: Introduce PCI SF port flavour and port attribute Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 03/13] devlink: Support add and delete devlink port Parav Pandit
2020-11-18 16:21   ` David Ahern
2020-11-18 17:02     ` Parav Pandit
2020-11-18 18:03       ` David Ahern
2020-11-18 18:38         ` Jason Gunthorpe
2020-11-18 19:36           ` David Ahern
2020-11-18 20:42             ` Jason Gunthorpe
2020-11-18 19:22         ` Parav Pandit
2020-11-19  0:41           ` Jacob Keller
2020-11-19  1:17             ` David Ahern
2020-11-19  1:56               ` Samudrala, Sridhar
2020-11-19  0:52       ` Jacob Keller
2020-11-12 19:24 ` [PATCH net-next 04/13] devlink: Support get and set state of port function Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 05/13] devlink: Avoid global devlink mutex, use per instance reload lock Parav Pandit
2020-11-12 19:24 ` Parav Pandit [this message]
2020-11-12 19:24 ` [PATCH net-next 07/13] net/mlx5: SF, Add auxiliary device support Parav Pandit
2020-12-07  2:48   ` David Ahern
2020-12-07  4:53     ` Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 08/13] net/mlx5: SF, Add auxiliary device driver Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 09/13] net/mlx5: E-switch, Prepare eswitch to handle SF vport Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 10/13] net/mlx5: E-switch, Add eswitch helpers for " Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 11/13] net/mlx5: SF, Add SF configuration hardware commands Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 12/13] net/mlx5: SF, Add port add delete functionality Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 13/13] net/mlx5: SF, Port function state change support Parav Pandit
2020-11-16 22:52 ` [PATCH net-next 00/13] Add mlx5 subfunction support Jakub Kicinski
2020-11-17  0:06   ` Saeed Mahameed
2020-11-17  1:58     ` Jakub Kicinski
2020-11-17  4:08       ` Parav Pandit
2020-11-17 17:11         ` Jakub Kicinski
2020-11-17 18:49           ` Jason Gunthorpe
2020-11-19  2:14             ` Jakub Kicinski
2020-11-19  4:35               ` David Ahern
2020-11-19  5:57                 ` Saeed Mahameed
2020-11-20  1:31                   ` Jakub Kicinski
2020-11-25  5:33                   ` David Ahern
2020-11-25  6:00                     ` Parav Pandit
2020-11-25 14:37                       ` David Ahern
2020-11-20  1:29                 ` Jakub Kicinski
2020-11-20 17:58                   ` Alexander Duyck
2020-11-20 19:04                     ` Samudrala, Sridhar
2020-11-23 21:51                       ` Saeed Mahameed
2020-11-24  7:01                       ` Jason Wang
2020-11-24  7:05                         ` Jason Wang
2020-11-19  6:12               ` Saeed Mahameed
2020-11-19  8:25                 ` Parav Pandit
2020-11-20  1:35                 ` Jakub Kicinski
2020-11-20  3:34                   ` Parav Pandit
2020-11-17 18:50           ` Parav Pandit
2020-11-19  2:23             ` Jakub Kicinski
2020-11-19  6:22               ` Saeed Mahameed
2020-11-19 14:00                 ` Jason Gunthorpe
2020-11-20  3:35                   ` Jakub Kicinski
2020-11-20  3:50                     ` Parav Pandit
2020-11-20 16:16                     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201112192424.2742-7-parav@nvidia.com \
    --to=parav@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=dledford@redhat.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=jgg@nvidia.com \
    --cc=jiri@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.