All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Rybchenko <arybchenko@solarflare.com>
To: Thomas Monjalon <thomas@monjalon.net>, <dev@dpdk.org>
Cc: Matan Azrad <matan@mellanox.com>
Subject: Re: [PATCH v2 10/11] net/failsafe: fix sub-device ownership race
Date: Thu, 10 May 2018 14:15:44 +0300	[thread overview]
Message-ID: <6fd5308e-9566-b220-7ec9-175d82a4aded@solarflare.com> (raw)
In-Reply-To: <20180509224313.27289-11-thomas@monjalon.net>

On 05/10/2018 01:43 AM, Thomas Monjalon wrote:
> From: Matan Azrad <matan@mellanox.com>
>
> There is time between the sub-device port probing by the sub-device PMD
> to the sub-device port ownership taking by a fail-safe port.
>
> In this time, the port is available for the application usage. For
> example, the port will be exposed to the applications which use
> RTE_ETH_FOREACH_DEV iterator.
>
> Thus, ownership unaware applications may manage the port in this time
> what may cause a lot of problematic behaviors in the fail-safe
> sub-device initialization.
>
> Register to the ethdev NEW event to take the sub-device port ownership
> before it becomes exposed to the application.
>
> Fixes: a46f8d584eb8 ("net/failsafe: add fail-safe PMD")
> Cc: stable@dpdk.org
>
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>
> ---
>   drivers/net/failsafe/failsafe.c         | 22 ++++++++++---
>   drivers/net/failsafe/failsafe_eal.c     | 56 ++++++++++++++++++++++-----------
>   drivers/net/failsafe/failsafe_ether.c   | 23 ++++++++++++++
>   drivers/net/failsafe/failsafe_private.h |  4 +++
>   4 files changed, 82 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
> index fc989c4f5..c9d128de3 100644
> --- a/drivers/net/failsafe/failsafe.c
> +++ b/drivers/net/failsafe/failsafe.c
> @@ -204,16 +204,25 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
>   	}
>   	snprintf(priv->my_owner.name, sizeof(priv->my_owner.name),
>   		 FAILSAFE_OWNER_NAME);
> +	DEBUG("Failsafe port %u owner info: %s_%016"PRIX64, dev->data->port_id,
> +	      priv->my_owner.name, priv->my_owner.id);
> +	ret = rte_eth_dev_callback_register(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
> +					    failsafe_eth_new_event_callback,
> +					    dev);
> +	if (ret) {
> +		ERROR("Failed to register NEW callback");
> +		goto free_args;
> +	}
>   	ret = failsafe_eal_init(dev);
>   	if (ret)
> -		goto free_args;
> +		goto unregister_new_callback;
>   	ret = fs_mutex_init(priv);
>   	if (ret)
> -		goto free_args;
> +		goto unregister_new_callback;
>   	ret = failsafe_hotplug_alarm_install(dev);
>   	if (ret) {
>   		ERROR("Could not set up plug-in event detection");
> -		goto free_args;
> +		goto unregister_new_callback;
>   	}
>   	mac = &dev->data->mac_addrs[0];
>   	if (mac_from_arg) {
> @@ -226,7 +235,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
>   							       mac);
>   			if (ret) {
>   				ERROR("Failed to set default MAC address");
> -				goto free_args;
> +				goto unregister_new_callback;

It will fail to apply on next-net since there is cancel_alarm there now.
So, this hunk should be simply skipped.

>   			}
>   		}
>   	} else {
> @@ -261,6 +270,9 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
>   	};
>   	rte_eth_dev_probing_finish(dev);
>   	return 0;
> +unregister_new_callback:
> +	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
> +					failsafe_eth_new_event_callback, dev);

There is cancel_alarm here in next-net.

>   free_args:
>   	failsafe_args_free(dev);
>   free_subs:
> @@ -280,6 +292,8 @@ fs_rte_eth_free(const char *name)
>   	dev = rte_eth_dev_allocated(name);
>   	if (dev == NULL)
>   		return -ENODEV;
> +	rte_eth_dev_callback_unregister(RTE_ETH_ALL, RTE_ETH_EVENT_NEW,
> +					failsafe_eth_new_event_callback, dev);
>   	ret = failsafe_eal_uninit(dev);
>   	if (ret)
>   		ERROR("Error while uninitializing sub-EAL");
> diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
> index ce767703f..5672f3961 100644
> --- a/drivers/net/failsafe/failsafe_eal.c
> +++ b/drivers/net/failsafe/failsafe_eal.c
> @@ -18,8 +18,9 @@ fs_ethdev_portid_get(const char *name, uint16_t *port_id)
>   		return -EINVAL;
>   	}
>   	len = strlen(name);
> -	RTE_ETH_FOREACH_DEV(pid) {
> -		if (!strncmp(name, rte_eth_devices[pid].device->name, len)) {
> +	for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) {
> +		if (rte_eth_dev_is_valid_port(pid) &&
> +		    !strncmp(name, rte_eth_devices[pid].device->name, len)) {
>   			*port_id = pid;
>   			return 0;
>   		}
> @@ -41,6 +42,8 @@ fs_bus_init(struct rte_eth_dev *dev)
>   			continue;
>   		da = &sdev->devargs;
>   		if (fs_ethdev_portid_get(da->name, &pid) != 0) {
> +			struct rte_eth_dev_owner pid_owner;
> +
>   			ret = rte_eal_hotplug_add(da->bus->name,
>   						  da->name,
>   						  da->args);
> @@ -55,12 +58,26 @@ fs_bus_init(struct rte_eth_dev *dev)
>   				ERROR("sub_device %d init went wrong", i);
>   				return -ENODEV;
>   			}
> +			/*
> +			 * The NEW callback tried to take ownership, check
> +			 * whether it succeed or didn't.
> +			 */
> +			rte_eth_dev_owner_get(pid, &pid_owner);
> +			if (pid_owner.id != PRIV(dev)->my_owner.id) {
> +				INFO("sub_device %d owner(%s_%016"PRIX64") is not my,"
> +				     " owner(%s_%016"PRIX64"), will try again later",

Frankly speaking I don't understand what and why will change later.

> +				     i, pid_owner.name, pid_owner.id,
> +				     PRIV(dev)->my_owner.name,
> +				     PRIV(dev)->my_owner.id);
> +				continue;
> +			}
>   		} else {
> +			/* The sub-device port was found. */
>   			char devstr[DEVARGS_MAXLEN] = "";
>   			struct rte_devargs *probed_da =
>   					rte_eth_devices[pid].device->devargs;
>   
> -			/* Take control of device probed by EAL options. */
> +			/* Take control of probed device. */
>   			free(da->args);
>   			memset(da, 0, sizeof(*da));
>   			if (probed_da != NULL)
> @@ -77,22 +94,23 @@ fs_bus_init(struct rte_eth_dev *dev)
>   			}
>   			INFO("Taking control of a probed sub device"
>   			      " %d named %s", i, da->name);
> -		}
> -		ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
> -		if (ret < 0) {
> -			INFO("sub_device %d owner set failed (%s),"
> -			     " will try again later", i, strerror(-ret));
> -			continue;
> -		} else if (strncmp(rte_eth_devices[pid].device->name, da->name,
> -			   strlen(da->name)) != 0) {
> -			/*
> -			 * The device probably was removed and its port id was
> -			 * reallocated before ownership set.
> -			 */
> -			rte_eth_dev_owner_unset(pid, PRIV(dev)->my_owner.id);
> -			INFO("sub_device %d was probably removed before taking"
> -			     " ownership, will try again later", i);
> -			continue;
> +			ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
> +			if (ret < 0) {
> +				INFO("sub_device %d owner set failed (%s), "
> +				     "will try again later", i, strerror(-ret));
> +				continue;
> +			} else if (strncmp(rte_eth_devices[pid].device->name,
> +				   da->name, strlen(da->name)) != 0) {
> +				/*
> +				 * The device probably was removed and its port
> +				 * id was reallocated before ownership set.
> +				 */
> +				rte_eth_dev_owner_unset(pid,
> +							PRIV(dev)->my_owner.id);
> +				INFO("sub_device %d was removed before taking"
> +				     " ownership, will try again later", i);
> +				continue;
> +			}
>   		}
>   		ETH(sdev) = &rte_eth_devices[pid];
>   		SUB_ID(sdev) = i;
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index b414a7884..ebce87841 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -463,3 +463,26 @@ failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
>   	else
>   		return 0;
>   }
> +
> +/* Take sub-device ownership before it becomes exposed to the application. */
> +int
> +failsafe_eth_new_event_callback(uint16_t port_id,
> +				enum rte_eth_event_type event __rte_unused,
> +				void *cb_arg, void *out __rte_unused)
> +{
> +	struct rte_eth_dev *fs_dev = cb_arg;
> +	struct sub_device *sdev;
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> +	size_t len = strlen(dev->device->name);
> +	uint8_t i;
> +
> +	FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
> +		if (sdev->state >= DEV_PROBED)
> +			continue;
> +		if (strncmp(sdev->devargs.name, dev->device->name, len) != 0)

Shouldn't we ensure that name is exact match instead of checking
sdev->devargs.name prefix only.

> +			continue;
> +		rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);

I think return value should be processed here.
If we really ignore it, it should be explained why.

> +		break;
> +	}
> +	return 0;
> +}
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index b54f8e336..cd8e0b8c9 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -220,6 +220,10 @@ int failsafe_eth_rmv_event_callback(uint16_t port_id,
>   int failsafe_eth_lsc_event_callback(uint16_t port_id,
>   				    enum rte_eth_event_type event,
>   				    void *cb_arg, void *out);
> +int
> +failsafe_eth_new_event_callback(uint16_t port_id,
> +				enum rte_eth_event_type event,
> +				void *cb_arg, void *out);

Return value type and function name should be on the same line in
accordance with style used in this file. Just to be consistent.

[...]

  reply	other threads:[~2018-05-10 11:15 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-09  9:43 [PATCH 00/11] ethdev: fix race conditions in iterator and notifications Thomas Monjalon
2018-05-09  9:43 ` [PATCH 01/11] ethdev: fix debug log of owner id Thomas Monjalon
2018-05-09 17:53   ` [dpdk-stable] " Ferruh Yigit
2018-05-09  9:43 ` [PATCH 02/11] net/failsafe: fix sub-device visibility Thomas Monjalon
2018-05-09 12:13   ` Gaëtan Rivet
2018-05-09 12:21     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 03/11] ethdev: add doxygen comments for each state Thomas Monjalon
2018-05-09 17:54   ` Ferruh Yigit
2018-05-09  9:43 ` [PATCH 04/11] drivers/net: use higher level of probing helper for PCI Thomas Monjalon
2018-05-09 17:54   ` Ferruh Yigit
2018-05-09  9:43 ` [PATCH 05/11] ethdev: add probing finish function Thomas Monjalon
2018-05-10 20:18   ` Stephen Hemminger
2018-05-10 21:49     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 06/11] ethdev: allow ownership operations on unused port Thomas Monjalon
2018-05-09 18:00   ` Ferruh Yigit
2018-05-09 19:05     ` Thomas Monjalon
2018-05-10 20:26   ` Stephen Hemminger
2018-05-10 21:53     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 07/11] ethdev: add lock to port allocation check Thomas Monjalon
2018-05-09 12:21   ` Gaëtan Rivet
2018-05-09 12:25     ` Thomas Monjalon
2018-05-10 20:35     ` Stephen Hemminger
2018-05-10 22:13       ` Thomas Monjalon
2018-05-10 23:47         ` Thomas Monjalon
2018-05-10 20:33   ` Stephen Hemminger
2018-05-10 22:10     ` Thomas Monjalon
2018-05-10 22:29       ` Stephen Hemminger
2018-05-09  9:43 ` [PATCH 08/11] ethdev: fix port visibility before initialization Thomas Monjalon
2018-05-09 18:03   ` [dpdk-stable] " Ferruh Yigit
2018-05-09 19:08     ` Thomas Monjalon
2018-05-10 20:40   ` Stephen Hemminger
2018-05-10 22:18     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 09/11] ethdev: fix port probing notification Thomas Monjalon
2018-05-09 18:07   ` [dpdk-stable] " Ferruh Yigit
2018-05-09 19:13     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 10/11] net/failsafe: fix sub-device ownership race Thomas Monjalon
2018-05-09 12:41   ` Gaëtan Rivet
2018-05-09 13:01     ` Matan Azrad
2018-05-09 13:30       ` Gaëtan Rivet
2018-05-09 13:43         ` Thomas Monjalon
2018-05-09 14:03           ` Gaëtan Rivet
2018-05-09 21:59             ` [dpdk-stable] " Thomas Monjalon
2018-05-09 13:26     ` Thomas Monjalon
2018-05-09  9:43 ` [PATCH 11/11] ethdev: fix port removal notification timing Thomas Monjalon
2018-05-09 18:07   ` [dpdk-stable] " Ferruh Yigit
2018-05-09 22:43 ` [PATCH v2 00/11] ethdev: fix race conditions in iterator and notifications Thomas Monjalon
2018-05-09 22:43   ` [PATCH v2 01/11] ethdev: fix debug log of owner id Thomas Monjalon
2018-05-10  9:49     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 02/11] net/failsafe: fix sub-device visibility Thomas Monjalon
2018-05-10  9:55     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 03/11] ethdev: add doxygen comments for each state Thomas Monjalon
2018-05-10  9:56     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 04/11] drivers/net: use higher level of probing helper for PCI Thomas Monjalon
2018-05-10 10:03     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 05/11] ethdev: add probing finish function Thomas Monjalon
2018-05-10 10:37     ` Andrew Rybchenko
2018-05-10 10:54       ` Thomas Monjalon
2018-05-10 11:24         ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 06/11] ethdev: allow ownership operations on unused port Thomas Monjalon
2018-05-10 10:41     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 07/11] ethdev: add lock to port allocation check Thomas Monjalon
2018-05-10 10:44     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 08/11] ethdev: fix port visibility before initialization Thomas Monjalon
2018-05-10 10:52     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 09/11] ethdev: fix port probing notification Thomas Monjalon
2018-05-10 10:53     ` Andrew Rybchenko
2018-05-09 22:43   ` [PATCH v2 10/11] net/failsafe: fix sub-device ownership race Thomas Monjalon
2018-05-10 11:15     ` Andrew Rybchenko [this message]
2018-05-10 12:03       ` Matan Azrad
2018-05-09 22:43   ` [PATCH v2 11/11] ethdev: fix port removal notification timing Thomas Monjalon
2018-05-10 11:17     ` Andrew Rybchenko
2018-05-10 11:19   ` [PATCH v2 00/11] ethdev: fix race conditions in iterator and notifications Andrew Rybchenko
2018-05-10 16:23   ` Gaëtan Rivet
2018-05-10 22:27 ` [PATCH " Stephen Hemminger
2018-05-10 23:58 ` [PATCH v3 " Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 01/11] ethdev: fix debug log of owner id Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 02/11] net/failsafe: fix sub-device visibility Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 03/11] ethdev: add doxygen comments for each state Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 04/11] drivers/net: use higher level of probing helper for PCI Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 05/11] ethdev: add probing finish function Thomas Monjalon
2018-05-23 10:09     ` Ferruh Yigit
2018-05-23 10:14       ` Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 06/11] ethdev: allow ownership operations on unused port Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 07/11] ethdev: add lock to port allocation check Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 08/11] ethdev: fix port visibility before initialization Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 09/11] ethdev: fix port probing notification Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 10/11] net/failsafe: fix sub-device ownership race Thomas Monjalon
2018-05-10 23:58   ` [PATCH v3 11/11] ethdev: fix port removal notification timing Thomas Monjalon
2018-05-11  1:14   ` [PATCH v3 00/11] ethdev: fix race conditions in iterator and notifications Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6fd5308e-9566-b220-7ec9-175d82a4aded@solarflare.com \
    --to=arybchenko@solarflare.com \
    --cc=dev@dpdk.org \
    --cc=matan@mellanox.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.