All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Monjalon <thomas@monjalon.net>
To: dev@dpdk.org
Cc: matan@mellanox.com, arybchenko@solarflare.com,
	stephen@networkplumber.org, ferruh.yigit@intel.com
Subject: [PATCH v3 10/11] net/failsafe: fix sub-device ownership race
Date: Fri, 11 May 2018 01:58:35 +0200	[thread overview]
Message-ID: <20180510235836.1099-11-thomas@monjalon.net> (raw)
In-Reply-To: <20180510235836.1099-1-thomas@monjalon.net>

From: Matan Azrad <matan@mellanox.com>

There is time between the sub-device port probing by the sub-device PMD
to the sub-device port ownership taking by a fail-safe port.

In this time, the port is available for the application usage. For
example, the port will be exposed to the applications which use
RTE_ETH_FOREACH_DEV iterator.

Thus, ownership unaware applications may manage the port in this time
what may cause a lot of problematic behaviors in the fail-safe
sub-device initialization.

Register to the ethdev NEW event to take the sub-device port ownership
before it becomes exposed to the application.

Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/net/failsafe/failsafe.c         | 20 ++++++++++--
 drivers/net/failsafe/failsafe_eal.c     | 56 ++++++++++++++++++++++-----------
 drivers/net/failsafe/failsafe_ether.c   | 23 ++++++++++++++
 drivers/net/failsafe/failsafe_private.h |  3 ++
 4 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index b35471dd1..eafbb75df 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -204,16 +204,25 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
 	}
 	snprintf(priv->my_owner.name, sizeof(priv->my_owner.name),
 		 FAILSAFE_OWNER_NAME);
+	DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data->port_id,
+	      priv->my_owner.name, priv->my_owner.id);
+	ret = rte_eth_dev_callback_register(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+					    failsafe_eth_new_event_callback,
+					    dev);
+	if (ret) {
+		ERROR("Failed to register NEW callback");
+		goto free_args;
+	}
 	ret = failsafe_eal_init(dev);
 	if (ret)
-		goto free_args;
+		goto unregister_new_callback;
 	ret = fs_mutex_init(priv);
 	if (ret)
-		goto free_args;
+		goto unregister_new_callback;
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret) {
 		ERROR("Could not set up plug-in event detection");
-		goto free_args;
+		goto unregister_new_callback;
 	}
 	mac = &dev->data->mac_addrs[0];
 	if (mac_from_arg) {
@@ -263,6 +272,9 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
 	return 0;
 cancel_alarm:
 	failsafe_hotplug_alarm_cancel(dev);
+unregister_new_callback:
+	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+					failsafe_eth_new_event_callback, dev);
 free_args:
 	failsafe_args_free(dev);
 free_subs:
@@ -282,6 +294,8 @@ fs_rte_eth_free(const char *name)
 	dev = rte_eth_dev_allocated(name);
 	if (dev == NULL)
 		return -ENODEV;
+	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
+					failsafe_eth_new_event_callback, dev);
 	ret = failsafe_eal_uninit(dev);
 	if (ret)
 		ERROR("Error while uninitializing sub-EAL");
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index ce767703f..5672f3961 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -18,8 +18,9 @@ fs_ethdev_portid_get(const char *name, uint16_t *port_id)
 		return -EINVAL;
 	}
 	len = strlen(name);
-	RTE_ETH_FOREACH_DEV(pid) {
-		if (!strncmp(name, rte_eth_devices[pid].device->name, len)) {
+	for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
+		if (rte_eth_dev_is_valid_port(pid) &&
+		    !strncmp(name, rte_eth_devices[pid].device->name, len)) {
 			*port_id = pid;
 			return 0;
 		}
@@ -41,6 +42,8 @@ fs_bus_init(struct rte_eth_dev *dev)
 			continue;
 		da = &sdev->devargs;
 		if (fs_ethdev_portid_get(da->name, &pid) != 0) {
+			struct rte_eth_dev_owner pid_owner;
+
 			ret = rte_eal_hotplug_add(da->bus->name,
 						  da->name,
 						  da->args);
@@ -55,12 +58,26 @@ fs_bus_init(struct rte_eth_dev *dev)
 				ERROR("sub_device %d init went wrong", i);
 				return -ENODEV;
 			}
+			/*
+			 * The NEW callback tried to take ownership, check
+			 * whether it succeed or didn't.
+			 */
+			rte_eth_dev_owner_get(pid, &pid_owner);
+			if (pid_owner.id != PRIV(dev)->my_owner.id) {
+				INFO("sub_device %d owner(%s_%016"PRIX64") is not my,"
+				     " owner(%s_%016"PRIX64"), will try again later",
+				     i, pid_owner.name, pid_owner.id,
+				     PRIV(dev)->my_owner.name,
+				     PRIV(dev)->my_owner.id);
+				continue;
+			}
 		} else {
+			/* The sub-device port was found. */
 			char devstr[DEVARGS_MAXLEN] = "";
 			struct rte_devargs *probed_da =
 					rte_eth_devices[pid].device->devargs;
 
-			/* Take control of device probed by EAL options. */
+			/* Take control of probed device. */
 			free(da->args);
 			memset(da, 0, sizeof(*da));
 			if (probed_da != NULL)
@@ -77,22 +94,23 @@ fs_bus_init(struct rte_eth_dev *dev)
 			}
 			INFO("Taking control of a probed sub device"
 			      " %d named %s", i, da->name);
-		}
-		ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
-		if (ret < 0) {
-			INFO("sub_device %d owner set failed (%s),"
-			     " will try again later", i, strerror(-ret));
-			continue;
-		} else if (strncmp(rte_eth_devices[pid].device->name, da->name,
-			   strlen(da->name)) != 0) {
-			/*
-			 * The device probably was removed and its port id was
-			 * reallocated before ownership set.
-			 */
-			rte_eth_dev_owner_unset(pid, PRIV(dev)->my_owner.id);
-			INFO("sub_device %d was probably removed before taking"
-			     " ownership, will try again later", i);
-			continue;
+			ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
+			if (ret < 0) {
+				INFO("sub_device %d owner set failed (%s), "
+				     "will try again later", i, strerror(-ret));
+				continue;
+			} else if (strncmp(rte_eth_devices[pid].device->name,
+				   da->name, strlen(da->name)) != 0) {
+				/*
+				 * The device probably was removed and its port
+				 * id was reallocated before ownership set.
+				 */
+				rte_eth_dev_owner_unset(pid,
+							PRIV(dev)->my_owner.id);
+				INFO("sub_device %d was removed before taking"
+				     " ownership, will try again later", i);
+				continue;
+			}
 		}
 		ETH(sdev) = &rte_eth_devices[pid];
 		SUB_ID(sdev) = i;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index b414a7884..733e95df6 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -463,3 +463,26 @@ failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
 	else
 		return 0;
 }
+
+/* Take sub-device ownership before it becomes exposed to the application. */
+int
+failsafe_eth_new_event_callback(uint16_t port_id,
+				enum rte_eth_event_type event __rte_unused,
+				void *cb_arg, void *out __rte_unused)
+{
+	struct rte_eth_dev *fs_dev = cb_arg;
+	struct sub_device *sdev;
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	uint8_t i;
+
+	FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
+		if (sdev->state >= DEV_PROBED)
+			continue;
+		if (strcmp(sdev->devargs.name, dev->device->name) != 0)
+			continue;
+		rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
+		/* The actual owner will be checked after the port probing. */
+		break;
+	}
+	return 0;
+}
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index b54f8e336..7e6a3f82a 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -220,6 +220,9 @@ int failsafe_eth_rmv_event_callback(uint16_t port_id,
 int failsafe_eth_lsc_event_callback(uint16_t port_id,
 				    enum rte_eth_event_type event,
 				    void *cb_arg, void *out);
+int failsafe_eth_new_event_callback(uint16_t port_id,
+				    enum rte_eth_event_type event,
+				    void *cb_arg, void *out);
 
 /* GLOBALS */
 
-- 
2.16.2

  parent reply	other threads:[~2018-05-10 23:58 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-09  9:43 [PATCH 00/11] ethdev: fix race conditions in iterator and notifications Thomas Monjalon
2018-05-09  9:43 ` [PATCH 01/11] ethdev: fix debug log of owner id Thomas Monjalon
2018-05-09 17:53   ` [dpdk-stable] " Ferruh Yigit
2018-05-09  9:43 ` [PATCH 02/11] net/failsafe: fix sub-device visibility Thomas Monjalon
2018-05-09 12:13   ` Gaëtan Rivet
2018-05-09 12:21     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 03/11] ethdev: add doxygen comments for each state Thomas Monjalon
2018-05-09 17:54   ` Ferruh Yigit
2018-05-09  9:43 ` [PATCH 04/11] drivers/net: use higher level of probing helper for PCI Thomas Monjalon
2018-05-09 17:54   ` Ferruh Yigit
2018-05-09  9:43 ` [PATCH 05/11] ethdev: add probing finish function Thomas Monjalon
2018-05-10 20:18   ` Stephen Hemminger
2018-05-10 21:49     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 06/11] ethdev: allow ownership operations on unused port Thomas Monjalon
2018-05-09 18:00   ` Ferruh Yigit
2018-05-09 19:05     ` Thomas Monjalon
2018-05-10 20:26   ` Stephen Hemminger
2018-05-10 21:53     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 07/11] ethdev: add lock to port allocation check Thomas Monjalon
2018-05-09 12:21   ` Gaëtan Rivet
2018-05-09 12:25     ` Thomas Monjalon
2018-05-10 20:35     ` Stephen Hemminger
2018-05-10 22:13       ` Thomas Monjalon
2018-05-10 23:47         ` Thomas Monjalon
2018-05-10 20:33   ` Stephen Hemminger
2018-05-10 22:10     ` Thomas Monjalon
2018-05-10 22:29       ` Stephen Hemminger
2018-05-09  9:43 ` [PATCH 08/11] ethdev: fix port visibility before initialization Thomas Monjalon
2018-05-09 18:03   ` [dpdk-stable] " Ferruh Yigit
2018-05-09 19:08     ` Thomas Monjalon
2018-05-10 20:40   ` Stephen Hemminger
2018-05-10 22:18     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 09/11] ethdev: fix port probing notification Thomas Monjalon
2018-05-09 18:07   ` [dpdk-stable] " Ferruh Yigit
2018-05-09 19:13     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 10/11] net/failsafe: fix sub-device ownership race Thomas Monjalon
2018-05-09 12:41   ` Gaëtan Rivet
2018-05-09 13:01     ` Matan Azrad
2018-05-09 13:30       ` Gaëtan Rivet
2018-05-09 13:43         ` Thomas Monjalon
2018-05-09 14:03           ` Gaëtan Rivet
2018-05-09 21:59             ` [dpdk-stable] " Thomas Monjalon
2018-05-09 13:26     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 11/11] ethdev: fix port removal notification timing Thomas Monjalon
2018-05-09 18:07   ` [dpdk-stable] " Ferruh Yigit
2018-05-09 22:43 ` [PATCH v2 00/11] ethdev: fix race conditions in iterator and notifications Thomas Monjalon
2018-05-09 22:43   ` [PATCH v2 01/11] ethdev: fix debug log of owner id Thomas Monjalon
2018-05-10  9:49     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 02/11] net/failsafe: fix sub-device visibility Thomas Monjalon
2018-05-10  9:55     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 03/11] ethdev: add doxygen comments for each state Thomas Monjalon
2018-05-10  9:56     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 04/11] drivers/net: use higher level of probing helper for PCI Thomas Monjalon
2018-05-10 10:03     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 05/11] ethdev: add probing finish function Thomas Monjalon
2018-05-10 10:37     ` Andrew Rybchenko
2018-05-10 10:54       ` Thomas Monjalon
2018-05-10 11:24         ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 06/11] ethdev: allow ownership operations on unused port Thomas Monjalon
2018-05-10 10:41     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 07/11] ethdev: add lock to port allocation check Thomas Monjalon
2018-05-10 10:44     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 08/11] ethdev: fix port visibility before initialization Thomas Monjalon
2018-05-10 10:52     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 09/11] ethdev: fix port probing notification Thomas Monjalon
2018-05-10 10:53     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 10/11] net/failsafe: fix sub-device ownership race Thomas Monjalon
2018-05-10 11:15     ` Andrew Rybchenko
2018-05-10 12:03       ` Matan Azrad
2018-05-09 22:43   ` [PATCH v2 11/11] ethdev: fix port removal notification timing Thomas Monjalon
2018-05-10 11:17     ` Andrew Rybchenko
2018-05-10 11:19   ` [PATCH v2 00/11] ethdev: fix race conditions in iterator and notifications Andrew Rybchenko
2018-05-10 16:23   ` Gaëtan Rivet
2018-05-10 22:27 ` [PATCH " Stephen Hemminger
2018-05-10 23:58 ` [PATCH v3 " Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 01/11] ethdev: fix debug log of owner id Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 02/11] net/failsafe: fix sub-device visibility Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 03/11] ethdev: add doxygen comments for each state Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 04/11] drivers/net: use higher level of probing helper for PCI Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 05/11] ethdev: add probing finish function Thomas Monjalon
2018-05-23 10:09     ` Ferruh Yigit
2018-05-23 10:14       ` Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 06/11] ethdev: allow ownership operations on unused port Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 07/11] ethdev: add lock to port allocation check Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 08/11] ethdev: fix port visibility before initialization Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 09/11] ethdev: fix port probing notification Thomas Monjalon
2018-05-10 23:58   ` Thomas Monjalon [this message]
2018-05-10 23:58   ` [PATCH v3 11/11] ethdev: fix port removal notification timing Thomas Monjalon
2018-05-11  1:14   ` [PATCH v3 00/11] ethdev: fix race conditions in iterator and notifications Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180510235836.1099-11-thomas@monjalon.net \
    --to=thomas@monjalon.net \
    --cc=arybchenko@solarflare.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=matan@mellanox.com \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.