netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Parav Pandit <parav@nvidia.com>
To: <netdev@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
	<gregkh@linuxfoundation.org>
Cc: <jiri@nvidia.com>, <jgg@nvidia.com>, <dledford@redhat.com>,
	<leonro@nvidia.com>, <saeedm@nvidia.com>, <kuba@kernel.org>,
	<davem@davemloft.net>, Parav Pandit <parav@nvidia.com>
Subject: [PATCH net-next 06/13] devlink: Introduce devlink refcount to reduce scope of global devlink_mutex
Date: Thu, 12 Nov 2020 21:24:16 +0200	[thread overview]
Message-ID: <20201112192424.2742-7-parav@nvidia.com> (raw)
In-Reply-To: <20201112192424.2742-1-parav@nvidia.com>

Currently global devlink_mutex is held while a doit() operation is
progress. This brings a limitation.

A Driver cannot perform devlink_register()/unregister() calls
during devlink doit() callback functions.
This is typically required when a port state change described in
RFC [1] callback wants to delete an active SF port or wants to
activate a SF port that results into unregistering or registering a
devlink instance on different bus such as ancillary bus.

An example flow:

devlink_predoit()
  mutex_lock(&devlink_mutex); <- First lock acquire
  devlink_reload()
    driver->reload_down(inactive)
        adev->remove();
           mlx5_adev_remove(ancillary_dev);
             devlink_unregister(ancillary_dev->devlink_instance);
               mutex_lock(&devlink_mutex); <- Second lock acquire

This patch is preparation patch to enable drivers to achieve this.

It achieves this by maintaining a per devlink instance refcount to
prevent devlink device unregistration while user command are in progress
or while devlink device is migration to init_net net namespace.

devlink_nl_family continue to remain registered with parallel_ops
disabled. So even after removing devlink_mutex during doit commands,
it doesn't enable userspace to run multiple devlink commands for one
or multiple devlink instance.

[1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 include/net/devlink.h |  5 +++
 net/core/devlink.c    | 84 +++++++++++++++++++++++++++++++------------
 2 files changed, 67 insertions(+), 22 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ef487b8ed17b..c8eab814c234 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -53,6 +53,11 @@ struct devlink {
 			    * port, sb, dpipe, resource, params, region, traps and more.
 			    */
 	struct mutex reload_lock; /* Protects reload operation */
+	struct list_head reload_list;
+	refcount_t refcount; /* Serializes user doit commands and netns command
+			      * with device unregistration.
+			      */
+	struct completion unregister_complete;
 	u8 reload_failed:1,
 	   reload_enabled:1,
 	   registered:1;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index c7c6f274d392..84f3ec12b3e8 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -96,9 +96,8 @@ static LIST_HEAD(devlink_list);
 
 /* devlink_mutex
  *
- * An overall lock guarding every operation coming from userspace.
- * It also guards devlink devices list and it is taken when
- * driver registers/unregisters it.
+ * An overall lock guarding devlink devices list during operations coming from
+ * userspace and when driver registers/unregisters devlink device.
  */
 static DEFINE_MUTEX(devlink_mutex);
 
@@ -121,6 +120,18 @@ void devlink_net_set(struct devlink *devlink, struct net *net)
 }
 EXPORT_SYMBOL_GPL(devlink_net_set);
 
+static inline bool
+devlink_try_get(struct devlink *devlink)
+{
+	return refcount_inc_not_zero(&devlink->refcount);
+}
+
+static void devlink_put(struct devlink *devlink)
+{
+	if (refcount_dec_and_test(&devlink->refcount))
+		complete(&devlink->unregister_complete);
+}
+
 static struct devlink *devlink_get_from_attrs(struct net *net,
 					      struct nlattr **attrs)
 {
@@ -139,7 +150,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
 	list_for_each_entry(devlink, &devlink_list, list) {
 		if (strcmp(devlink->dev->bus->name, busname) == 0 &&
 		    strcmp(dev_name(devlink->dev), devname) == 0 &&
-		    net_eq(devlink_net(devlink), net))
+		    net_eq(devlink_net(devlink), net) && devlink_try_get(devlink))
 			return devlink;
 	}
 
@@ -411,7 +422,7 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
 
 /* The per devlink instance lock is taken by default in the pre-doit
  * operation, yet several commands do not require this. The global
- * devlink lock is taken and protects from disruption by user-calls.
+ * devlink lock is taken and protects from disruption by dumpit user-calls.
  */
 #define DEVLINK_NL_FLAG_NO_LOCK			BIT(2)
 
@@ -424,10 +435,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 
 	mutex_lock(&devlink_mutex);
 	devlink = devlink_get_from_info(info);
-	if (IS_ERR(devlink)) {
-		mutex_unlock(&devlink_mutex);
+	mutex_unlock(&devlink_mutex);
+
+	if (IS_ERR(devlink))
 		return PTR_ERR(devlink);
-	}
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_lock(&devlink->lock);
 	info->user_ptr[0] = devlink;
@@ -448,7 +459,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 unlock:
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
-	mutex_unlock(&devlink_mutex);
+	devlink_put(devlink);
 	return err;
 }
 
@@ -460,7 +471,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
 	devlink = info->user_ptr[0];
 	if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
 		mutex_unlock(&devlink->lock);
-	mutex_unlock(&devlink_mutex);
+	devlink_put(devlink);
 }
 
 static struct genl_family devlink_nl_family;
@@ -8122,6 +8133,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	mutex_init(&devlink->lock);
 	mutex_init(&devlink->reporters_lock);
 	mutex_init(&devlink->reload_lock);
+	init_completion(&devlink->unregister_complete);
 	return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -8136,6 +8148,7 @@ int devlink_register(struct devlink *devlink, struct device *dev)
 {
 	devlink->dev = dev;
 	devlink->registered = true;
+	refcount_set(&devlink->refcount, 1);
 	mutex_lock(&devlink_mutex);
 	list_add_tail(&devlink->list, &devlink_list);
 	devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -8151,12 +8164,23 @@ EXPORT_SYMBOL_GPL(devlink_register);
  */
 void devlink_unregister(struct devlink *devlink)
 {
+	/* Remove from the list first, so that no new users can get it */
 	mutex_lock(&devlink_mutex);
-	WARN_ON(devlink_reload_supported(devlink->ops) &&
-		devlink->reload_enabled);
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 	list_del(&devlink->list);
 	mutex_unlock(&devlink_mutex);
+
+	/* Balances with refcount_set in devlink_register(). */
+	devlink_put(devlink);
+	/* Wait for any existing users to stop using the devlink device */
+	wait_for_completion(&devlink->unregister_complete);
+
+	/* At this point there are no active users working on the devlink instance;
+	 * also net ns exit operation (if any) is also completed.
+	 * devlink is out of global list, hence no users can acquire reference to this devlink
+	 * instance anymore. Hence, it is safe to proceed with unregistration.
+	 */
+	WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled);
 }
 EXPORT_SYMBOL_GPL(devlink_unregister);
 
@@ -10472,6 +10496,8 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 {
 	struct devlink *devlink;
 	u32 actions_performed;
+	LIST_HEAD(local_list);
+	struct devlink *tmp;
 	int err;
 
 	/* In case network namespace is getting destroyed, reload
@@ -10479,18 +10505,32 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 	 */
 	mutex_lock(&devlink_mutex);
 	list_for_each_entry(devlink, &devlink_list, list) {
-		if (net_eq(devlink_net(devlink), net)) {
-			if (WARN_ON(!devlink_reload_supported(devlink->ops)))
-				continue;
-			err = devlink_reload(devlink, &init_net,
-					     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
-					     DEVLINK_RELOAD_LIMIT_UNSPEC,
-					     &actions_performed, NULL);
-			if (err && err != -EOPNOTSUPP)
-				pr_warn("Failed to reload devlink instance into init_net\n");
-		}
+		if (!net_eq(devlink_net(devlink), net))
+			continue;
+
+		if (WARN_ON(!devlink_reload_supported(devlink->ops)))
+			continue;
+
+		/* Hold the reference to devlink instance so that it doesn't get unregistered
+		 * once global devlink_mutex is unlocked.
+		 * Store the devlink to a shadow list so that if devlink unregistration is
+		 * started, it can be still found in the shadow list.
+		 */
+		if (devlink_try_get(devlink))
+			list_add_tail(&devlink->reload_list, &local_list);
 	}
 	mutex_unlock(&devlink_mutex);
+
+	list_for_each_entry_safe(devlink, tmp, &local_list, reload_list) {
+		list_del_init(&devlink->reload_list);
+		err = devlink_reload(devlink, &init_net,
+				     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+				     DEVLINK_RELOAD_LIMIT_UNSPEC,
+				     &actions_performed, NULL);
+		if (err && err != -EOPNOTSUPP)
+			pr_warn("Failed to reload devlink instance into init_net\n");
+		devlink_put(devlink);
+	}
 }
 
 static struct pernet_operations devlink_pernet_ops __net_initdata = {
-- 
2.26.2


  parent reply	other threads:[~2020-11-12 19:25 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-12 19:24 [PATCH net-next 00/13] Add mlx5 subfunction support Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 01/13] devlink: Prepare code to fill multiple port function attributes Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 02/13] devlink: Introduce PCI SF port flavour and port attribute Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 03/13] devlink: Support add and delete devlink port Parav Pandit
2020-11-18 16:21   ` David Ahern
2020-11-18 17:02     ` Parav Pandit
2020-11-18 18:03       ` David Ahern
2020-11-18 18:38         ` Jason Gunthorpe
2020-11-18 19:36           ` David Ahern
2020-11-18 20:42             ` Jason Gunthorpe
2020-11-18 19:22         ` Parav Pandit
2020-11-19  0:41           ` Jacob Keller
2020-11-19  1:17             ` David Ahern
2020-11-19  1:56               ` Samudrala, Sridhar
2020-11-19  0:52       ` Jacob Keller
2020-11-12 19:24 ` [PATCH net-next 04/13] devlink: Support get and set state of port function Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 05/13] devlink: Avoid global devlink mutex, use per instance reload lock Parav Pandit
2020-11-12 19:24 ` Parav Pandit [this message]
2020-11-12 19:24 ` [PATCH net-next 07/13] net/mlx5: SF, Add auxiliary device support Parav Pandit
2020-12-07  2:48   ` David Ahern
2020-12-07  4:53     ` Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 08/13] net/mlx5: SF, Add auxiliary device driver Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 09/13] net/mlx5: E-switch, Prepare eswitch to handle SF vport Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 10/13] net/mlx5: E-switch, Add eswitch helpers for " Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 11/13] net/mlx5: SF, Add SF configuration hardware commands Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 12/13] net/mlx5: SF, Add port add delete functionality Parav Pandit
2020-11-12 19:24 ` [PATCH net-next 13/13] net/mlx5: SF, Port function state change support Parav Pandit
2020-11-16 22:52 ` [PATCH net-next 00/13] Add mlx5 subfunction support Jakub Kicinski
2020-11-17  0:06   ` Saeed Mahameed
2020-11-17  1:58     ` Jakub Kicinski
2020-11-17  4:08       ` Parav Pandit
2020-11-17 17:11         ` Jakub Kicinski
2020-11-17 18:49           ` Jason Gunthorpe
2020-11-19  2:14             ` Jakub Kicinski
2020-11-19  4:35               ` David Ahern
2020-11-19  5:57                 ` Saeed Mahameed
2020-11-20  1:31                   ` Jakub Kicinski
2020-11-25  5:33                   ` David Ahern
2020-11-25  6:00                     ` Parav Pandit
2020-11-25 14:37                       ` David Ahern
2020-11-20  1:29                 ` Jakub Kicinski
2020-11-20 17:58                   ` Alexander Duyck
2020-11-20 19:04                     ` Samudrala, Sridhar
2020-11-23 21:51                       ` Saeed Mahameed
2020-11-24  7:01                       ` Jason Wang
2020-11-24  7:05                         ` Jason Wang
2020-11-19  6:12               ` Saeed Mahameed
2020-11-19  8:25                 ` Parav Pandit
2020-11-20  1:35                 ` Jakub Kicinski
2020-11-20  3:34                   ` Parav Pandit
2020-11-17 18:50           ` Parav Pandit
2020-11-19  2:23             ` Jakub Kicinski
2020-11-19  6:22               ` Saeed Mahameed
2020-11-19 14:00                 ` Jason Gunthorpe
2020-11-20  3:35                   ` Jakub Kicinski
2020-11-20  3:50                     ` Parav Pandit
2020-11-20 16:16                     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201112192424.2742-7-parav@nvidia.com \
    --to=parav@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=dledford@redhat.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=jgg@nvidia.com \
    --cc=jiri@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).