All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/4] net/failsafe: replace local device with shared data
@ 2019-03-05  9:52 Raslan Darawsheh
  2019-03-05  9:52 ` [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh
                   ` (4 more replies)
  0 siblings, 5 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-05  9:52 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

In multiprocess context, the private structure is shared between
processes. The back reference from private to generic data was using
a pointer to a per process eth_dev. It's now changed to a reference of
the shared data.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
 drivers/net/failsafe/failsafe.c         |  2 +-
 drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
 drivers/net/failsafe/failsafe_ops.c     |  4 ++--
 drivers/net/failsafe/failsafe_private.h |  6 +++++-
 drivers/net/failsafe/failsafe_rxtx.c    |  4 ++--
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 06e859e..68926ca 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -181,7 +181,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
 		return -1;
 	}
 	priv = PRIV(dev);
-	priv->dev = dev;
+	priv->data = dev->data;
 	dev->dev_ops = &failsafe_ops;
 	dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
 	dev->data->dev_link = eth_link;
diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c
index 1c2cb71..09aa3c6 100644
--- a/drivers/net/failsafe/failsafe_intr.c
+++ b/drivers/net/failsafe/failsafe_intr.c
@@ -133,8 +133,8 @@ fs_rx_event_proxy_service_install(struct fs_priv *priv)
 	/* prepare service info */
 	memset(&service, 0, sizeof(struct rte_service_spec));
 	snprintf(service.name, sizeof(service.name), "%s_Rx_service",
-		 priv->dev->data->name);
-	service.socket_id = priv->dev->data->numa_node;
+		 priv->data->name);
+	service.socket_id = priv->data->numa_node;
 	service.callback = fs_rx_event_proxy_routine;
 	service.callback_userdata = priv;
 
@@ -437,7 +437,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv)
 	unsigned int count;
 	struct rte_intr_handle *intr_handle;
 
-	rxqs_n = priv->dev->data->nb_rx_queues;
+	rxqs_n = priv->data->nb_rx_queues;
 	n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
 	count = 0;
 	intr_handle = &priv->intr_handle;
@@ -452,7 +452,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv)
 		return -rte_errno;
 	}
 	for (i = 0; i < n; i++) {
-		struct rxq *rxq = priv->dev->data->rx_queues[i];
+		struct rxq *rxq = priv->data->rx_queues[i];
 
 		/* Skip queues that cannot request interrupts. */
 		if (rxq == NULL || rxq->event_fd < 0) {
@@ -521,7 +521,7 @@ failsafe_rx_intr_install(struct rte_eth_dev *dev)
 {
 	struct fs_priv *priv = PRIV(dev);
 	const struct rte_intr_conf *const intr_conf =
-			&priv->dev->data->dev_conf.intr_conf;
+			&priv->data->dev_conf.intr_conf;
 
 	if (intr_conf->rxq == 0 || dev->intr_handle != NULL)
 		return 0;
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index e3add40..65957a2 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -452,7 +452,7 @@ fs_rx_queue_release(void *queue)
 	if (queue == NULL)
 		return;
 	rxq = queue;
-	dev = rxq->priv->dev;
+	dev = &rte_eth_devices[rxq->priv->data->port_id];
 	fs_lock(dev, 0);
 	if (rxq->event_fd > 0)
 		close(rxq->event_fd);
@@ -636,7 +636,7 @@ fs_tx_queue_release(void *queue)
 	if (queue == NULL)
 		return;
 	txq = queue;
-	dev = txq->priv->dev;
+	dev = &rte_eth_devices[txq->priv->data->port_id];
 	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		if (ETH(sdev)->data->tx_queues != NULL &&
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 0dfea65..29dfd40 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -128,8 +128,12 @@ struct sub_device {
 	unsigned int lsc_callback:1;
 };
 
+/*
+ * This is referenced by eth_dev->data->dev_private
+ * This is shared between processes.
+ */
 struct fs_priv {
-	struct rte_eth_dev *dev;
+	struct rte_eth_dev_data *data; /* backreference to shared data. */
 	/*
 	 * Set of sub_devices.
 	 * subs[0] is the preferred device
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index 034f47b..231c832 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -126,7 +126,7 @@ failsafe_tx_burst(void *queue,
 	uint16_t nb_tx;
 
 	txq = queue;
-	sdev = TX_SUBDEV(txq->priv->dev);
+	sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]);
 	if (unlikely(fs_tx_unsafe(sdev)))
 		return 0;
 	sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
@@ -147,7 +147,7 @@ failsafe_tx_burst_fast(void *queue,
 	uint16_t nb_tx;
 
 	txq = queue;
-	sdev = TX_SUBDEV(txq->priv->dev);
+	sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]);
 	RTE_ASSERT(!fs_tx_unsafe(sdev));
 	sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
 	FS_ATOMIC_P(txq->refcnt[sdev->sid]);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 2/4] net/failsafe: change back-reference from sub-device
  2019-03-05  9:52 [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
  2019-03-05  9:52 ` [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh
@ 2019-03-05  9:52 ` Raslan Darawsheh
  2019-03-05 16:48   ` Gaëtan Rivet
  2019-03-05  9:52 ` [PATCH v2 4/4] net/failsafe: support secondary process Raslan Darawsheh
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-05  9:52 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

In multiprocess context, the sub-device structure is shared
between processes. The reference to the failsafe device was
a per process pointer. It's changed to port id which is the
same for all processes.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
v2: changed macro to an inline function
---
 drivers/net/failsafe/failsafe_eal.c     |  2 +-
 drivers/net/failsafe/failsafe_ether.c   | 15 ++++++++-------
 drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
 drivers/net/failsafe/failsafe_private.h | 13 ++++++++++---
 4 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 8a888b1..56d1669 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev)
 		}
 		ETH(sdev) = &rte_eth_devices[pid];
 		SUB_ID(sdev) = i;
-		sdev->fs_dev = dev;
+		sdev->fs_port_id = dev->data->port_id;
 		sdev->dev = ETH(sdev)->device;
 		sdev->state = DEV_PROBED;
 	}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 1783165..d5b1488 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev)
 		break;
 	}
 	sdev->remove = 0;
-	failsafe_hotplug_alarm_install(sdev->fs_dev);
+	failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev));
 }
 
 static void
@@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev)
 			WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
 				 (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
 	}
-	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
-			err ? &sdev->stats_snapshot.stats : &stats);
+	failsafe_stats_increment
+		(&PRIV(fsdev_from_subdev(sdev))->stats_accumulator,
+		err ? &sdev->stats_snapshot.stats : &stats);
 	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
 }
 
@@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
 {
 	struct sub_device *sdev = cb_arg;
 
-	fs_lock(sdev->fs_dev, 0);
+	fs_lock(fsdev_from_subdev(sdev), 0);
 	/* Switch as soon as possible tx_dev. */
-	fs_switch_dev(sdev->fs_dev, sdev);
+	fs_switch_dev(fsdev_from_subdev(sdev), sdev);
 	/* Use safe bursts in any case. */
-	failsafe_set_burst_fn(sdev->fs_dev, 1);
+	failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1);
 	/*
 	 * Async removal, the sub-PMD will try to unregister
 	 * the callback at the source of the current thread context.
 	 */
 	sdev->remove = 1;
-	fs_unlock(sdev->fs_dev, 0);
+	fs_unlock(fsdev_from_subdev(sdev), 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c
index 09aa3c6..d372d09 100644
--- a/drivers/net/failsafe/failsafe_intr.c
+++ b/drivers/net/failsafe/failsafe_intr.c
@@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op)
 	int rc;
 	int ret = 0;
 
+	fsdev = fsdev_from_subdev(sdev);
 	if (sdev == NULL || (ETH(sdev) == NULL) ||
-	    sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) {
+		fsdev == NULL || (PRIV(fsdev) == NULL)) {
 		ERROR("Called with invalid arguments");
 		return -EINVAL;
 	}
 	dev = ETH(sdev);
-	fsdev = sdev->fs_dev;
-	epfd = PRIV(sdev->fs_dev)->rxp.efd;
+	epfd = PRIV(fsdev)->rxp.efd;
 	pid = PORT_ID(sdev);
 
 	if (epfd <= 0) {
@@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct sub_device *sdev)
 	const struct rte_intr_conf *const intr_conf =
 				&ETH(sdev)->data->dev_conf.intr_conf;
 
-	fsdev = sdev->fs_dev;
+	fsdev = fsdev_from_subdev(sdev);
 	rxq = (struct rxq **)fsdev->data->rx_queues;
 	if (intr_conf->rxq == 0)
 		return 0;
@@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev)
 	struct rte_eth_dev *fsdev;
 	struct rxq *fsrxq;
 
-	fsdev = sdev->fs_dev;
+	fsdev = fsdev_from_subdev(sdev);
 	for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
 		if (qid < fsdev->data->nb_rx_queues) {
 			fsrxq = fsdev->data->rx_queues[qid];
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 29dfd40..84e847f 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -117,7 +117,7 @@ struct sub_device {
 	/* Others are retrieved through a file descriptor */
 	char *fd_str;
 	/* fail-safe device backreference */
-	struct rte_eth_dev *fs_dev;
+	uint16_t fs_port_id; /* shared between processes*/
 	/* flag calling for recollection */
 	volatile unsigned int remove:1;
 	/* flow isolation state */
@@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg;
  */
 #define FS_ATOMIC_RX(s, i) \
 	rte_atomic64_read( \
-	 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
+	 &((struct rxq *) \
+	 (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \
 	)
 /**
  * s: (struct sub_device *)
@@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg;
  */
 #define FS_ATOMIC_TX(s, i) \
 	rte_atomic64_read( \
-	 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
+	 &((struct txq *) \
+	 (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \
 	)
 
 #ifdef RTE_EXEC_ENV_BSDAPP
@@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev,
 	return &subs[sid];
 }
 
+static inline struct rte_eth_dev *
+fsdev_from_subdev(struct sub_device *sdev) {
+	return &rte_eth_devices[sdev->fs_port_id];
+}
+
 /*
  * Lock hot-plug mutex.
  * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-05  9:52 [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
@ 2019-03-05  9:52 ` Raslan Darawsheh
  2019-03-05  9:59   ` Thomas Monjalon
  2019-03-05 17:38   ` Gaëtan Rivet
  2019-03-05  9:52 ` [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-05  9:52 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

In multiprocess context, the pointer to sub-device is shared between
processes. Previously, it was a pointer to per process eth_dev so
it's needed to replace this dependency.

Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
---
v2: - moved comment in fs_sdev about subs to this commit
    - added parenthesis around macro arguments.
---
 drivers/net/failsafe/failsafe_eal.c     |  2 +-
 drivers/net/failsafe/failsafe_ether.c   |  7 ++++---
 drivers/net/failsafe/failsafe_private.h | 13 ++++++++-----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 56d1669..6fac4b6 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev)
 				continue;
 			}
 		}
-		ETH(sdev) = &rte_eth_devices[pid];
+		sdev->data = rte_eth_devices[pid].data;
 		SUB_ID(sdev) = i;
 		sdev->fs_port_id = dev->data->port_id;
 		sdev->dev = ETH(sdev)->device;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index d5b1488..e1fff59 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -267,18 +267,19 @@ static void
 fs_dev_remove(struct sub_device *sdev)
 {
 	int ret;
+	struct rte_eth_dev *edev = ETH(sdev);
 
 	if (sdev == NULL)
 		return;
 	switch (sdev->state) {
 	case DEV_STARTED:
 		failsafe_rx_intr_uninstall_subdevice(sdev);
-		rte_eth_dev_stop(PORT_ID(sdev));
+		rte_eth_dev_stop(edev->data->port_id);
 		sdev->state = DEV_ACTIVE;
 		/* fallthrough */
 	case DEV_ACTIVE:
 		failsafe_eth_dev_unregister_callbacks(sdev);
-		rte_eth_dev_close(PORT_ID(sdev));
+		rte_eth_dev_close(edev->data->port_id);
 		sdev->state = DEV_PROBED;
 		/* fallthrough */
 	case DEV_PROBED:
@@ -287,7 +288,7 @@ fs_dev_remove(struct sub_device *sdev)
 			ERROR("Bus detach failed for sub_device %u",
 			      SUB_ID(sdev));
 		} else {
-			rte_eth_dev_release_port(ETH(sdev));
+			rte_eth_dev_release_port(edev);
 		}
 		sdev->state = DEV_PARSED;
 		/* fallthrough */
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 84e847f..1e2ad2d 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -100,13 +100,16 @@ struct fs_stats {
 	uint64_t timestamp;
 };
 
+/*
+ * Allocated in shared memory.
+ */
 struct sub_device {
 	/* Exhaustive DPDK device description */
 	struct sub_device *next;
 	struct rte_devargs devargs;
-	struct rte_bus *bus;
-	struct rte_device *dev;
-	struct rte_eth_dev *edev;
+	struct rte_bus *bus; /* per process. */
+	struct rte_device *dev; /* per process. */
+	struct rte_eth_dev_data *data; /* shared between processes */
 	uint8_t sid;
 	/* Device state machine */
 	enum dev_state state;
@@ -139,7 +142,7 @@ struct fs_priv {
 	 * subs[0] is the preferred device
 	 * any other is just another slave
 	 */
-	struct sub_device *subs;
+	struct sub_device *subs;  /* shared between processes */
 	uint8_t subs_head; /* if head == tail, no subs */
 	uint8_t subs_tail; /* first invalid */
 	uint8_t subs_tx; /* current emitting device */
@@ -254,7 +257,7 @@ extern int failsafe_mac_from_arg;
 
 /* sdev: (struct sub_device *) */
 #define ETH(sdev) \
-	((sdev)->edev)
+	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id])
 
 /* sdev: (struct sub_device *) */
 #define PORT_ID(sdev) \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v2 4/4] net/failsafe: support secondary process
  2019-03-05  9:52 [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
  2019-03-05  9:52 ` [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh
  2019-03-05  9:52 ` [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh
@ 2019-03-05  9:52 ` Raslan Darawsheh
  2019-03-05 16:43 ` [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet
  2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
  4 siblings, 0 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-05  9:52 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

Add implementation for probe in secondary.

Failsafe will attempt to attach all the sub-devices in
secondary process.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
v2: changed devargs_alread_listed return value to be bool.
---
 drivers/net/failsafe/failsafe.c | 45 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 68926ca..c3f568b 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -3,6 +3,8 @@
  * Copyright 2017 Mellanox Technologies, Ltd
  */
 
+#include <stdbool.h>
+
 #include <rte_alarm.h>
 #include <rte_malloc.h>
 #include <rte_ethdev_driver.h>
@@ -312,11 +314,28 @@ fs_rte_eth_free(const char *name)
 	return ret;
 }
 
+static bool
+devargs_already_listed(struct rte_devargs *devargs)
+{
+	struct rte_devargs *list_da;
+
+	RTE_EAL_DEVARGS_FOREACH(devargs->bus->name, list_da) {
+		if (strcmp(list_da->name, devargs->name) == 0)
+			/* devargs already in the list */
+			return true;
+	}
+	return false;
+}
+
 static int
 rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
 {
 	const char *name;
 	struct rte_eth_dev *eth_dev;
+	struct sub_device  *sdev;
+	struct rte_devargs devargs;
+	uint8_t i;
+	int ret;
 
 	name = rte_vdev_device_name(vdev);
 	INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
@@ -329,9 +348,33 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
 			ERROR("Failed to probe %s", name);
 			return -1;
 		}
-		/* TODO: request info from primary to set up Rx and Tx */
 		eth_dev->dev_ops = &failsafe_ops;
 		eth_dev->device = &vdev->device;
+		eth_dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+		eth_dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+		/*
+		 * Failsafe will attempt to probe all of its sub-devices.
+		 * Any failure in sub-devices is not a fatal error.
+		 * A sub-device can be plugged later.
+		 */
+		FOREACH_SUBDEV(sdev, i, eth_dev) {
+			/* rebuild devargs to be able to get the bus name. */
+			ret = rte_devargs_parse(&devargs,
+						sdev->devargs.name);
+			if (ret != 0) {
+				ERROR("Failed to parse devargs %s",
+					devargs.name);
+				continue;
+			}
+			if (!devargs_already_listed(&devargs)) {
+				ret = rte_dev_probe(devargs.name);
+				if (ret != 0) {
+					ERROR("Failed to probe devargs %s",
+					      devargs.name);
+					continue;
+				}
+			}
+		}
 		rte_eth_dev_probing_finish(eth_dev);
 		return 0;
 	}
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-05  9:52 ` [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh
@ 2019-03-05  9:59   ` Thomas Monjalon
  2019-03-05 17:38   ` Gaëtan Rivet
  1 sibling, 0 replies; 26+ messages in thread
From: Thomas Monjalon @ 2019-03-05  9:59 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: gaetan.rivet, dev, stephen

05/03/2019 10:52, Raslan Darawsheh:
> +/*
> + * Allocated in shared memory.
> + */
>  struct sub_device {
>  	/* Exhaustive DPDK device description */
>  	struct sub_device *next;
>  	struct rte_devargs devargs;
> -	struct rte_bus *bus;
> -	struct rte_device *dev;
> -	struct rte_eth_dev *edev;
> +	struct rte_bus *bus; /* per process. */
> +	struct rte_device *dev; /* per process. */

Thinking again about these comments.
Given it is in a shared struct, it would be more precise to say
"only for primary process".

> +	struct rte_eth_dev_data *data; /* shared between processes */

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/4] net/failsafe: replace local device with shared data
  2019-03-05  9:52 [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
                   ` (2 preceding siblings ...)
  2019-03-05  9:52 ` [PATCH v2 4/4] net/failsafe: support secondary process Raslan Darawsheh
@ 2019-03-05 16:43 ` Gaëtan Rivet
  2019-03-05 17:40   ` Gaëtan Rivet
  2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
  4 siblings, 1 reply; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-05 16:43 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen

Hello Raslan,

Sorry for the delay.

I have had a little trouble reading the patches. I think the 3 first
should be squashed into a single one, it would be more coherent.

I think I have seen a few points where doing so would have prevented
some unnecessary changes for example, simplifying the series. (thinking
about at least two PORT_ID() and a few ETH() removal that might have
been prevented, I will try to point them all to you.)

On Tue, Mar 05, 2019 at 09:52:04AM +0000, Raslan Darawsheh wrote:
> In multiprocess context, the private structure is shared between
> processes. The back reference from private to generic data was using
> a pointer to a per process eth_dev. It's now changed to a reference of
> the shared data.
> 
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
>  drivers/net/failsafe/failsafe.c         |  2 +-
>  drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
>  drivers/net/failsafe/failsafe_ops.c     |  4 ++--
>  drivers/net/failsafe/failsafe_private.h |  6 +++++-
>  drivers/net/failsafe/failsafe_rxtx.c    |  4 ++--
>  5 files changed, 15 insertions(+), 11 deletions(-)
> 

Beside the squashing, this patch seems ok.

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] net/failsafe: change back-reference from sub-device
  2019-03-05  9:52 ` [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh
@ 2019-03-05 16:48   ` Gaëtan Rivet
  2019-03-07  9:01     ` Raslan Darawsheh
  0 siblings, 1 reply; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-05 16:48 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen

Beside the squash referenced in p1,

On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote:
> In multiprocess context, the sub-device structure is shared
> between processes. The reference to the failsafe device was
> a per process pointer. It's changed to port id which is the
> same for all processes.
> 
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> v2: changed macro to an inline function
> ---
>  drivers/net/failsafe/failsafe_eal.c     |  2 +-
>  drivers/net/failsafe/failsafe_ether.c   | 15 ++++++++-------
>  drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
>  drivers/net/failsafe/failsafe_private.h | 13 ++++++++++---
>  4 files changed, 24 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
> index 8a888b1..56d1669 100644
> --- a/drivers/net/failsafe/failsafe_eal.c
> +++ b/drivers/net/failsafe/failsafe_eal.c
> @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev)
>  		}
>  		ETH(sdev) = &rte_eth_devices[pid];
>  		SUB_ID(sdev) = i;
> -		sdev->fs_dev = dev;
> +		sdev->fs_port_id = dev->data->port_id;
>  		sdev->dev = ETH(sdev)->device;
>  		sdev->state = DEV_PROBED;
>  	}
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index 1783165..d5b1488 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev)
>  		break;
>  	}
>  	sdev->remove = 0;
> -	failsafe_hotplug_alarm_install(sdev->fs_dev);
> +	failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev));
>  }
>  
>  static void
> @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev)
>  			WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
>  				 (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
>  	}
> -	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
> -			err ? &sdev->stats_snapshot.stats : &stats);
> +	failsafe_stats_increment
> +		(&PRIV(fsdev_from_subdev(sdev))->stats_accumulator,
> +		err ? &sdev->stats_snapshot.stats : &stats);
>  	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
>  }
>  
> @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
>  {
>  	struct sub_device *sdev = cb_arg;
>  
> -	fs_lock(sdev->fs_dev, 0);
> +	fs_lock(fsdev_from_subdev(sdev), 0);
>  	/* Switch as soon as possible tx_dev. */
> -	fs_switch_dev(sdev->fs_dev, sdev);
> +	fs_switch_dev(fsdev_from_subdev(sdev), sdev);
>  	/* Use safe bursts in any case. */
> -	failsafe_set_burst_fn(sdev->fs_dev, 1);
> +	failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1);
>  	/*
>  	 * Async removal, the sub-PMD will try to unregister
>  	 * the callback at the source of the current thread context.
>  	 */
>  	sdev->remove = 1;
> -	fs_unlock(sdev->fs_dev, 0);
> +	fs_unlock(fsdev_from_subdev(sdev), 0);
>  	return 0;
>  }
>  
> diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c
> index 09aa3c6..d372d09 100644
> --- a/drivers/net/failsafe/failsafe_intr.c
> +++ b/drivers/net/failsafe/failsafe_intr.c
> @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op)
>  	int rc;
>  	int ret = 0;
>  
> +	fsdev = fsdev_from_subdev(sdev);
>  	if (sdev == NULL || (ETH(sdev) == NULL) ||
> -	    sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) {
> +		fsdev == NULL || (PRIV(fsdev) == NULL)) {
>  		ERROR("Called with invalid arguments");
>  		return -EINVAL;
>  	}
>  	dev = ETH(sdev);
> -	fsdev = sdev->fs_dev;
> -	epfd = PRIV(sdev->fs_dev)->rxp.efd;
> +	epfd = PRIV(fsdev)->rxp.efd;
>  	pid = PORT_ID(sdev);
>  
>  	if (epfd <= 0) {
> @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct sub_device *sdev)
>  	const struct rte_intr_conf *const intr_conf =
>  				&ETH(sdev)->data->dev_conf.intr_conf;
>  
> -	fsdev = sdev->fs_dev;
> +	fsdev = fsdev_from_subdev(sdev);
>  	rxq = (struct rxq **)fsdev->data->rx_queues;
>  	if (intr_conf->rxq == 0)
>  		return 0;
> @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev)
>  	struct rte_eth_dev *fsdev;
>  	struct rxq *fsrxq;
>  
> -	fsdev = sdev->fs_dev;
> +	fsdev = fsdev_from_subdev(sdev);
>  	for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
>  		if (qid < fsdev->data->nb_rx_queues) {
>  			fsrxq = fsdev->data->rx_queues[qid];
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index 29dfd40..84e847f 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -117,7 +117,7 @@ struct sub_device {
>  	/* Others are retrieved through a file descriptor */
>  	char *fd_str;
>  	/* fail-safe device backreference */
> -	struct rte_eth_dev *fs_dev;
> +	uint16_t fs_port_id; /* shared between processes*/
>  	/* flag calling for recollection */
>  	volatile unsigned int remove:1;
>  	/* flow isolation state */
> @@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg;
>   */
>  #define FS_ATOMIC_RX(s, i) \
>  	rte_atomic64_read( \
> -	 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
> +	 &((struct rxq *) \
> +	 (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \
>  	)
>  /**
>   * s: (struct sub_device *)
> @@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg;
>   */
>  #define FS_ATOMIC_TX(s, i) \
>  	rte_atomic64_read( \
> -	 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
> +	 &((struct txq *) \
> +	 (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \
>  	)
>  
>  #ifdef RTE_EXEC_ENV_BSDAPP
> @@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev,
>  	return &subs[sid];
>  }
>  
> +static inline struct rte_eth_dev *
> +fsdev_from_subdev(struct sub_device *sdev) {
> +	return &rte_eth_devices[sdev->fs_port_id];
> +}
> +

Using a static inline that already enforce parameter type makes the
_from_subdev suffix redundant.

I think

fs_dev(struct sub_device *sdev);

would be more readable in most of the changes above, while keeping with
the fs_ prefix used in all static failsafe functions.

>  /*
>   * Lock hot-plug mutex.
>   * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
> -- 
> 2.7.4
> 

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-05  9:52 ` [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh
  2019-03-05  9:59   ` Thomas Monjalon
@ 2019-03-05 17:38   ` Gaëtan Rivet
  2019-03-05 17:58     ` Thomas Monjalon
  1 sibling, 1 reply; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-05 17:38 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen

On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote:
> In multiprocess context, the pointer to sub-device is shared between
> processes. Previously, it was a pointer to per process eth_dev so
> it's needed to replace this dependency.
> 
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> ---
> v2: - moved comment in fs_sdev about subs to this commit
>     - added parenthesis around macro arguments.
> ---
>  drivers/net/failsafe/failsafe_eal.c     |  2 +-
>  drivers/net/failsafe/failsafe_ether.c   |  7 ++++---
>  drivers/net/failsafe/failsafe_private.h | 13 ++++++++-----
>  3 files changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
> index 56d1669..6fac4b6 100644
> --- a/drivers/net/failsafe/failsafe_eal.c
> +++ b/drivers/net/failsafe/failsafe_eal.c
> @@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev)
>  				continue;
>  			}
>  		}
> -		ETH(sdev) = &rte_eth_devices[pid];
> +		sdev->data = rte_eth_devices[pid].data;
>  		SUB_ID(sdev) = i;
>  		sdev->fs_port_id = dev->data->port_id;
>  		sdev->dev = ETH(sdev)->device;
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index d5b1488..e1fff59 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -267,18 +267,19 @@ static void
>  fs_dev_remove(struct sub_device *sdev)
>  {
>  	int ret;
> +	struct rte_eth_dev *edev = ETH(sdev);

I'd have added that above the "int ret;".
(inverse christmas tree and all that.)

>  
>  	if (sdev == NULL)
>  		return;
>  	switch (sdev->state) {
>  	case DEV_STARTED:
>  		failsafe_rx_intr_uninstall_subdevice(sdev);
> -		rte_eth_dev_stop(PORT_ID(sdev));
> +		rte_eth_dev_stop(edev->data->port_id);
>  		sdev->state = DEV_ACTIVE;
>  		/* fallthrough */
>  	case DEV_ACTIVE:
>  		failsafe_eth_dev_unregister_callbacks(sdev);
> -		rte_eth_dev_close(PORT_ID(sdev));
> +		rte_eth_dev_close(edev->data->port_id);

Ok I see. I missed that during the first reading, the private_data is
zeroed on dev_close(), so ETH(sdev) becomes invalid here.

What happens when a primary process closes a device before a secondary?
Is the secondary unable to stop / close its own then? Isn't there some
missing uninit?

This seems dangerous to me. Why not instead allocating a per-process
slab of memory that would hold the relevant references and outlive the
shared data (a per-process rte_eth_dev private data...).

>  		sdev->state = DEV_PROBED;
>  		/* fallthrough */
>  	case DEV_PROBED:
> @@ -287,7 +288,7 @@ fs_dev_remove(struct sub_device *sdev)
>  			ERROR("Bus detach failed for sub_device %u",
>  			      SUB_ID(sdev));
>  		} else {
> -			rte_eth_dev_release_port(ETH(sdev));
> +			rte_eth_dev_release_port(edev);
>  		}
>  		sdev->state = DEV_PARSED;
>  		/* fallthrough */
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index 84e847f..1e2ad2d 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -100,13 +100,16 @@ struct fs_stats {
>  	uint64_t timestamp;
>  };
>  
> +/*
> + * Allocated in shared memory.
> + */
>  struct sub_device {
>  	/* Exhaustive DPDK device description */
>  	struct sub_device *next;
>  	struct rte_devargs devargs;
> -	struct rte_bus *bus;
> -	struct rte_device *dev;
> -	struct rte_eth_dev *edev;
> +	struct rte_bus *bus; /* per process. */
> +	struct rte_device *dev; /* per process. */
> +	struct rte_eth_dev_data *data; /* shared between processes */
>  	uint8_t sid;
>  	/* Device state machine */
>  	enum dev_state state;
> @@ -139,7 +142,7 @@ struct fs_priv {
>  	 * subs[0] is the preferred device
>  	 * any other is just another slave
>  	 */
> -	struct sub_device *subs;
> +	struct sub_device *subs;  /* shared between processes */
>  	uint8_t subs_head; /* if head == tail, no subs */
>  	uint8_t subs_tail; /* first invalid */
>  	uint8_t subs_tx; /* current emitting device */
> @@ -254,7 +257,7 @@ extern int failsafe_mac_from_arg;
>  
>  /* sdev: (struct sub_device *) */
>  #define ETH(sdev) \
> -	((sdev)->edev)
> +	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id])
>  
>  /* sdev: (struct sub_device *) */
>  #define PORT_ID(sdev) \
> -- 
> 2.7.4
> 

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/4] net/failsafe: replace local device with shared data
  2019-03-05 16:43 ` [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet
@ 2019-03-05 17:40   ` Gaëtan Rivet
  2019-03-05 17:41     ` Thomas Monjalon
  0 siblings, 1 reply; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-05 17:40 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen

On Tue, Mar 05, 2019 at 05:43:26PM +0100, Gaëtan Rivet wrote:
> Hello Raslan,
> 
> Sorry for the delay.
> 
> I have had a little trouble reading the patches. I think the 3 first
> should be squashed into a single one, it would be more coherent.
> 
> I think I have seen a few points where doing so would have prevented
> some unnecessary changes for example, simplifying the series. (thinking
> about at least two PORT_ID() and a few ETH() removal that might have
> been prevented, I will try to point them all to you.)
> 
> On Tue, Mar 05, 2019 at 09:52:04AM +0000, Raslan Darawsheh wrote:
> > In multiprocess context, the private structure is shared between
> > processes. The back reference from private to generic data was using
> > a pointer to a per process eth_dev. It's now changed to a reference of
> > the shared data.
> > 
> > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> >  drivers/net/failsafe/failsafe.c         |  2 +-
> >  drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
> >  drivers/net/failsafe/failsafe_ops.c     |  4 ++--
> >  drivers/net/failsafe/failsafe_private.h |  6 +++++-
> >  drivers/net/failsafe/failsafe_rxtx.c    |  4 ++--
> >  5 files changed, 15 insertions(+), 11 deletions(-)
> > 
> 
> Beside the squashing, this patch seems ok.

Okay after reading a little more closely, it does not seem interesting
to squash finaly, and it will be simpler for you to continue with your
current series, so forget about that.

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/4] net/failsafe: replace local device with shared data
  2019-03-05 17:40   ` Gaëtan Rivet
@ 2019-03-05 17:41     ` Thomas Monjalon
  0 siblings, 0 replies; 26+ messages in thread
From: Thomas Monjalon @ 2019-03-05 17:41 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: Raslan Darawsheh, dev, stephen

05/03/2019 18:40, Gaëtan Rivet:
> On Tue, Mar 05, 2019 at 05:43:26PM +0100, Gaëtan Rivet wrote:
> > I have had a little trouble reading the patches. I think the 3 first
> > should be squashed into a single one, it would be more coherent.
> > 
> > I think I have seen a few points where doing so would have prevented
> > some unnecessary changes for example, simplifying the series. (thinking
> > about at least two PORT_ID() and a few ETH() removal that might have
> > been prevented, I will try to point them all to you.)
> > 
[...]
> > 
> > Beside the squashing, this patch seems ok.
> 
> Okay after reading a little more closely, it does not seem interesting
> to squash finaly, and it will be simpler for you to continue with your
> current series, so forget about that.

Yes, and it's easier to track changes when split as it is.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-05 17:38   ` Gaëtan Rivet
@ 2019-03-05 17:58     ` Thomas Monjalon
  2019-03-06 10:46       ` Gaëtan Rivet
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Monjalon @ 2019-03-05 17:58 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: Raslan Darawsheh, dev, stephen

Hi,

05/03/2019 18:38, Gaëtan Rivet:
> >  fs_dev_remove(struct sub_device *sdev)
[...]
> > -		rte_eth_dev_close(PORT_ID(sdev));
> > +		rte_eth_dev_close(edev->data->port_id);
> 
> Ok I see. I missed that during the first reading, the private_data is
> zeroed on dev_close(), so ETH(sdev) becomes invalid here.

I don't follow you. What do you mean with this comment?

> What happens when a primary process closes a device before a secondary?
> Is the secondary unable to stop / close its own then? Isn't there some
> missing uninit?

Is the secondary process supposed to do any closing?
The device management should be done only by the primary process.

Note: anyway all this hotplug related code should be dropped
from failsafe to be replaced by EAL hotplug management.

> This seems dangerous to me. Why not instead allocating a per-process
> slab of memory that would hold the relevant references and outlive the
> shared data (a per-process rte_eth_dev private data...).

Which data do you think should be allocated per process?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-05 17:58     ` Thomas Monjalon
@ 2019-03-06 10:46       ` Gaëtan Rivet
  2019-03-06 18:02         ` Thomas Monjalon
  0 siblings, 1 reply; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-06 10:46 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Raslan Darawsheh, dev, stephen

On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> Hi,
> 
> 05/03/2019 18:38, Gaëtan Rivet:
> > >  fs_dev_remove(struct sub_device *sdev)
> [...]
> > > -		rte_eth_dev_close(PORT_ID(sdev));
> > > +		rte_eth_dev_close(edev->data->port_id);
> > 
> > Ok I see. I missed that during the first reading, the private_data is
> > zeroed on dev_close(), so ETH(sdev) becomes invalid here.
> 
> I don't follow you. What do you mean with this comment?
> 

PORT_ID(sdev) uses ETH(sdev).

ETH(sdev) will now point to `&rte_eth_devices[(sdev)->data->port_id]`,
so data->port_id will be zeroed on sdev close.

So once the sub-device has been closed, calling
rte_eth_dev_release_port(ETH(sdev)) is not possible anymore, justifying
the change (taking the reference to ETH(sdev) first, then using it after
shared data has been overwritten).

> > What happens when a primary process closes a device before a secondary?
> > Is the secondary unable to stop / close its own then? Isn't there some
> > missing uninit?
> 
> Is the secondary process supposed to do any closing?
> The device management should be done only by the primary process.
> 
> Note: anyway all this hotplug related code should be dropped
> from failsafe to be replaced by EAL hotplug management.
> 

I don't know, I've never used secondary process.
However, cursory reading the code of rte_eth_dev_close(), I don't see
a guard against calling it from a secondary process?

Reading code like

   rte_free(dev->data->rx_queues);
   dev->data->rx_queues = NULL;

within makes me think the issue has been seen at least once, where
shared data is freed multiple times, so I guessed some secondary
processes were calling it. Maybe they are not meant to, but what
prevents them from being badly written?

Also, given rte_dev_remove IPC call to transfer the order to the
primary, it seems that at least secondary processes are expected to call
rte_dev_remove() at some point? So are they only authorized to call
rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
there a specific documentation detailing the design of secondary process
and the related responsibilities in the lifetime of a device? How are
they synching their rte_eth_devices list if they are not calling
rte_eth_dev_close(), ever?

> > This seems dangerous to me. Why not instead allocating a per-process
> > slab of memory that would hold the relevant references and outlive the
> > shared data (a per-process rte_eth_dev private data...).
> 
> Which data do you think should be allocated per process?
> 
> 

[-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
+--------------------------------------------------------------+
| +------------------+                +- rte_eth_devices[n] -+ |
| |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
| |                  |   +dev_priv-+  |                      | |
| |      dev_private +-->|         |  |                      | |
| |              ... |   |         |  |                      | |
| |          port_id |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   |         |  +----------------------+ |
| |                  |   |         |  +- rte_eth_devices[n] -+ |
| |                  |   |         |  |                      | | SECONDARY
| |                  |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   +---------+  |                      | |
| |                  |<---------------+ data                 | |
| +------------------+                +----------------------+ |
+--------------------------------------------------------------+

Here port_id is used within fail-safe to get back to rte_eth_devices[n].
This disappears once a device is closed, as all shared space is zeroed.

This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
and at some point it is not anymore, once a sub-device has been
closed. This seems dangerous.

I was thinking initially that allocating a place where each sdev would
store their rte_eth_devices / port_id back-reference could alleviate the
issue, meaning that the fail-safe would not zero it on sdev_close(), and
it would remain valid for the lifetime of a sub-device, so even when a
sub-device is in DEV_PROBED state.

But now that I think about it, it could probably be simpler: instead of
using (ETH(sdev)->data->port_id) for the port_id of an sdev (meaning
that it is dependent on the lifetime of the sdev, instead of the
lifetime of the failsafe), the port-id itself should be stored in the
sub_device structure. This structure will be available for the lifetime
of the failsafe, and the port_id is correct accross all processes.

So PORT_ID(sdev) would be defined to something like (sdev->port_id), and
ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
correct even once the primary has closed the sub-device.

What do you think? Do you agree that the current state is dangerous, and
do you think the solution would alleviate the issue? Maybe the concern
is unfounded and the only issue is within fs_dev_remove().

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-06 10:46       ` Gaëtan Rivet
@ 2019-03-06 18:02         ` Thomas Monjalon
  2019-03-07  8:43           ` Raslan Darawsheh
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Monjalon @ 2019-03-06 18:02 UTC (permalink / raw)
  To: Gaëtan Rivet, Raslan Darawsheh; +Cc: dev, stephen

06/03/2019 11:46, Gaëtan Rivet:
> On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > 05/03/2019 18:38, Gaëtan Rivet:
> > > What happens when a primary process closes a device before a secondary?
> > > Is the secondary unable to stop / close its own then? Isn't there some
> > > missing uninit?
> > 
> > Is the secondary process supposed to do any closing?
> > The device management should be done only by the primary process.
> > 
> > Note: anyway all this hotplug related code should be dropped
> > from failsafe to be replaced by EAL hotplug management.
> > 
> 
> I don't know, I've never used secondary process.
> However, cursory reading the code of rte_eth_dev_close(), I don't see
> a guard against calling it from a secondary process?

Yes indeed, there is no guard.
That's something not clear in DPDK, previously we were
attaching some vdevs in secondary only.

> Reading code like
> 
>    rte_free(dev->data->rx_queues);
>    dev->data->rx_queues = NULL;
> 
> within makes me think the issue has been seen at least once, where
> shared data is freed multiple times, so I guessed some secondary
> processes were calling it. Maybe they are not meant to, but what
> prevents them from being badly written?
> 
> Also, given rte_dev_remove IPC call to transfer the order to the
> primary, it seems that at least secondary processes are expected to call
> rte_dev_remove() at some point? So are they only authorized to call
> rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
> there a specific documentation detailing the design of secondary process
> and the related responsibilities in the lifetime of a device? How are
> they synching their rte_eth_devices list if they are not calling
> rte_eth_dev_close(), ever?

All these calls should be done in primary.
The IPC mechanism calls the attach/detach in secondary at EAL level.
The PMDs does the bridge between EAL device and ethdev port status.
But you are right, there can be a sync issue if closing an ethdev port
and not removing the EAL device.
This is a generic question about deciding whether we want all ethdev ports
to be synced in multi-process or not.

In failsafe context, we are closing the EAL device and change
the state of the sub-device accordingly. So I think there is no issue.

> > > This seems dangerous to me. Why not instead allocating a per-process
> > > slab of memory that would hold the relevant references and outlive the
> > > shared data (a per-process rte_eth_dev private data...).
> > 
> > Which data do you think should be allocated per process?
> > 
> > 
> 
> [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> +--------------------------------------------------------------+
> | +------------------+                +- rte_eth_devices[n] -+ |
> | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> | |                  |   +dev_priv-+  |                      | |
> | |      dev_private +-->|         |  |                      | |
> | |              ... |   |         |  |                      | |
> | |          port_id |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   |         |  +----------------------+ |
> | |                  |   |         |  +- rte_eth_devices[n] -+ |
> | |                  |   |         |  |                      | | SECONDARY
> | |                  |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   +---------+  |                      | |
> | |                  |<---------------+ data                 | |
> | +------------------+                +----------------------+ |
> +--------------------------------------------------------------+
> 
> Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> This disappears once a device is closed, as all shared space is zeroed.
> 
> This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
> and at some point it is not anymore, once a sub-device has been
> closed. This seems dangerous.

The state of the sub-device is changed.
I don't see any issue.

> I was thinking initially that allocating a place where each sdev would
> store their rte_eth_devices / port_id back-reference could alleviate the
> issue, meaning that the fail-safe would not zero it on sdev_close(), and
> it would remain valid for the lifetime of a sub-device, so even when a
> sub-device is in DEV_PROBED state.
> 
> But now that I think about it, it could probably be simpler: instead of
> using (ETH(sdev)->data->port_id) for the port_id of an sdev (meaning
> that it is dependent on the lifetime of the sdev, instead of the
> lifetime of the failsafe), the port-id itself should be stored in the
> sub_device structure. This structure will be available for the lifetime
> of the failsafe, and the port_id is correct accross all processes.
> 
> So PORT_ID(sdev) would be defined to something like (sdev->port_id), and
> ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
> correct even once the primary has closed the sub-device.
> 
> What do you think? Do you agree that the current state is dangerous, and
> do you think the solution would alleviate the issue? Maybe the concern
> is unfounded and the only issue is within fs_dev_remove().

Yes it is only seen in fs_dev_remove().
I discussed about your proposal with Raslan, and we agree we
could change from sub_device.data to sub_device.port_id,
it may be more future-proof.

I have only one doubt: look at the macro in this patch:

#define ETH(sdev) \
	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id])

The NULL check cannot be done with a port id.
I think it was needed to manage one case. Raslan?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-06 18:02         ` Thomas Monjalon
@ 2019-03-07  8:43           ` Raslan Darawsheh
  2019-03-07  9:47             ` Gaëtan Rivet
  0 siblings, 1 reply; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-07  8:43 UTC (permalink / raw)
  To: Thomas Monjalon, Gaëtan Rivet; +Cc: dev, stephen

Hi,

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, March 6, 2019 8:02 PM
> To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> <rasland@mellanox.com>
> Cc: dev@dpdk.org; stephen@networkplumber.org
> Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> with shared data
> 
> 06/03/2019 11:46, Gaëtan Rivet:
> > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > What happens when a primary process closes a device before a
> secondary?
> > > > Is the secondary unable to stop / close its own then? Isn't there
> > > > some missing uninit?
> > >
> > > Is the secondary process supposed to do any closing?
> > > The device management should be done only by the primary process.
> > >
> > > Note: anyway all this hotplug related code should be dropped from
> > > failsafe to be replaced by EAL hotplug management.
> > >
> >
> > I don't know, I've never used secondary process.
> > However, cursory reading the code of rte_eth_dev_close(), I don't see
> > a guard against calling it from a secondary process?
> 
> Yes indeed, there is no guard.
> That's something not clear in DPDK, previously we were attaching some
> vdevs in secondary only.
> 
> > Reading code like
> >
> >    rte_free(dev->data->rx_queues);
> >    dev->data->rx_queues = NULL;
> >
> > within makes me think the issue has been seen at least once, where
> > shared data is freed multiple times, so I guessed some secondary
> > processes were calling it. Maybe they are not meant to, but what
> > prevents them from being badly written?
> >
> > Also, given rte_dev_remove IPC call to transfer the order to the
> > primary, it seems that at least secondary processes are expected to
> > call
> > rte_dev_remove() at some point? So are they only authorized to call
> > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
> > there a specific documentation detailing the design of secondary
> > process and the related responsibilities in the lifetime of a device?
> > How are they synching their rte_eth_devices list if they are not
> > calling rte_eth_dev_close(), ever?
> 
> All these calls should be done in primary.
> The IPC mechanism calls the attach/detach in secondary at EAL level.
> The PMDs does the bridge between EAL device and ethdev port status.
> But you are right, there can be a sync issue if closing an ethdev port and not
> removing the EAL device.
> This is a generic question about deciding whether we want all ethdev ports to
> be synced in multi-process or not.
> 
> In failsafe context, we are closing the EAL device and change the state of the
> sub-device accordingly. So I think there is no issue.
> 
> > > > This seems dangerous to me. Why not instead allocating a
> > > > per-process slab of memory that would hold the relevant references
> > > > and outlive the shared data (a per-process rte_eth_dev private data...).
> > >
> > > Which data do you think should be allocated per process?
> > >
> > >
> >
> > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > +--------------------------------------------------------------+
> > | +------------------+                +- rte_eth_devices[n] -+ |
> > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > | |                  |   +dev_priv-+  |                      | |
> > | |      dev_private +-->|         |  |                      | |
> > | |              ... |   |         |  |                      | |
> > | |          port_id |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  +----------------------+ |
> > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > | |                  |   |         |  |                      | | SECONDARY
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   +---------+  |                      | |
> > | |                  |<---------------+ data                 | |
> > | +------------------+                +----------------------+ |
> > +--------------------------------------------------------------+
> >
> > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > This disappears once a device is closed, as all shared space is zeroed.
> >
> > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, and
> > at some point it is not anymore, once a sub-device has been closed.
> > This seems dangerous.
> 
> The state of the sub-device is changed.
> I don't see any issue.
> 
> > I was thinking initially that allocating a place where each sdev would
> > store their rte_eth_devices / port_id back-reference could alleviate
> > the issue, meaning that the fail-safe would not zero it on
> > sdev_close(), and it would remain valid for the lifetime of a
> > sub-device, so even when a sub-device is in DEV_PROBED state.
> >
> > But now that I think about it, it could probably be simpler: instead
> > of using (ETH(sdev)->data->port_id) for the port_id of an sdev
> > (meaning that it is dependent on the lifetime of the sdev, instead of
> > the lifetime of the failsafe), the port-id itself should be stored in
> > the sub_device structure. This structure will be available for the
> > lifetime of the failsafe, and the port_id is correct accross all processes.
> >
> > So PORT_ID(sdev) would be defined to something like (sdev->port_id),
> > and
> > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
> > correct even once the primary has closed the sub-device.
> >
> > What do you think? Do you agree that the current state is dangerous,
> > and do you think the solution would alleviate the issue? Maybe the
> > concern is unfounded and the only issue is within fs_dev_remove().
> 
> Yes it is only seen in fs_dev_remove().
> I discussed about your proposal with Raslan, and we agree we could change
> from sub_device.data to sub_device.port_id, it may be more future-proof.
> 
> I have only one doubt: look at the macro in this patch:
> 
> #define ETH(sdev) \
> 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> >port_id])
> 
> The NULL check cannot be done with a port id.
> I think it was needed to manage one case. Raslan?

That's right since we need it for fs_tx_unsafe, to add a protection for plugged out devices during TX.
> 


> 
Kindest regards
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] net/failsafe: change back-reference from sub-device
  2019-03-05 16:48   ` Gaëtan Rivet
@ 2019-03-07  9:01     ` Raslan Darawsheh
  2019-03-07  9:43       ` Gaëtan Rivet
  0 siblings, 1 reply; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-07  9:01 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: dev, Thomas Monjalon, stephen

Hi,

> -----Original Message-----
> From: Gaëtan Rivet <gaetan.rivet@6wind.com>
> Sent: Tuesday, March 5, 2019 6:49 PM
> To: Raslan Darawsheh <rasland@mellanox.com>
> Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>;
> stephen@networkplumber.org
> Subject: Re: [PATCH v2 2/4] net/failsafe: change back-reference from sub-
> device
> 
> Beside the squash referenced in p1,
> 
> On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote:
> > In multiprocess context, the sub-device structure is shared between
> > processes. The reference to the failsafe device was a per process
> > pointer. It's changed to port id which is the same for all processes.
> >
> > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > ---
> > v2: changed macro to an inline function
> > ---
> >  drivers/net/failsafe/failsafe_eal.c     |  2 +-
> >  drivers/net/failsafe/failsafe_ether.c   | 15 ++++++++-------
> >  drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
> >  drivers/net/failsafe/failsafe_private.h | 13 ++++++++++---
> >  4 files changed, 24 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/net/failsafe/failsafe_eal.c
> > b/drivers/net/failsafe/failsafe_eal.c
> > index 8a888b1..56d1669 100644
> > --- a/drivers/net/failsafe/failsafe_eal.c
> > +++ b/drivers/net/failsafe/failsafe_eal.c
> > @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev)
> >  		}
> >  		ETH(sdev) = &rte_eth_devices[pid];
> >  		SUB_ID(sdev) = i;
> > -		sdev->fs_dev = dev;
> > +		sdev->fs_port_id = dev->data->port_id;
> >  		sdev->dev = ETH(sdev)->device;
> >  		sdev->state = DEV_PROBED;
> >  	}
> > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > b/drivers/net/failsafe/failsafe_ether.c
> > index 1783165..d5b1488 100644
> > --- a/drivers/net/failsafe/failsafe_ether.c
> > +++ b/drivers/net/failsafe/failsafe_ether.c
> > @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev)
> >  		break;
> >  	}
> >  	sdev->remove = 0;
> > -	failsafe_hotplug_alarm_install(sdev->fs_dev);
> > +	failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev));
> >  }
> >
> >  static void
> > @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev)
> >  			WARN("Using latest snapshot taken before
> %"PRIu64" seconds.\n",
> >  				 (rte_rdtsc() - timestamp) /
> rte_get_tsc_hz());
> >  	}
> > -	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
> > -			err ? &sdev->stats_snapshot.stats : &stats);
> > +	failsafe_stats_increment
> > +		(&PRIV(fsdev_from_subdev(sdev))->stats_accumulator,
> > +		err ? &sdev->stats_snapshot.stats : &stats);
> >  	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
> }
> >
> > @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t
> port_id
> > __rte_unused,  {
> >  	struct sub_device *sdev = cb_arg;
> >
> > -	fs_lock(sdev->fs_dev, 0);
> > +	fs_lock(fsdev_from_subdev(sdev), 0);
> >  	/* Switch as soon as possible tx_dev. */
> > -	fs_switch_dev(sdev->fs_dev, sdev);
> > +	fs_switch_dev(fsdev_from_subdev(sdev), sdev);
> >  	/* Use safe bursts in any case. */
> > -	failsafe_set_burst_fn(sdev->fs_dev, 1);
> > +	failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1);
> >  	/*
> >  	 * Async removal, the sub-PMD will try to unregister
> >  	 * the callback at the source of the current thread context.
> >  	 */
> >  	sdev->remove = 1;
> > -	fs_unlock(sdev->fs_dev, 0);
> > +	fs_unlock(fsdev_from_subdev(sdev), 0);
> >  	return 0;
> >  }
> >
> > diff --git a/drivers/net/failsafe/failsafe_intr.c
> > b/drivers/net/failsafe/failsafe_intr.c
> > index 09aa3c6..d372d09 100644
> > --- a/drivers/net/failsafe/failsafe_intr.c
> > +++ b/drivers/net/failsafe/failsafe_intr.c
> > @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct
> sub_device *sdev, int op)
> >  	int rc;
> >  	int ret = 0;
> >
> > +	fsdev = fsdev_from_subdev(sdev);
> >  	if (sdev == NULL || (ETH(sdev) == NULL) ||
> > -	    sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) {
> > +		fsdev == NULL || (PRIV(fsdev) == NULL)) {
> >  		ERROR("Called with invalid arguments");
> >  		return -EINVAL;
> >  	}
> >  	dev = ETH(sdev);
> > -	fsdev = sdev->fs_dev;
> > -	epfd = PRIV(sdev->fs_dev)->rxp.efd;
> > +	epfd = PRIV(fsdev)->rxp.efd;
> >  	pid = PORT_ID(sdev);
> >
> >  	if (epfd <= 0) {
> > @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct
> sub_device *sdev)
> >  	const struct rte_intr_conf *const intr_conf =
> >  				&ETH(sdev)->data->dev_conf.intr_conf;
> >
> > -	fsdev = sdev->fs_dev;
> > +	fsdev = fsdev_from_subdev(sdev);
> >  	rxq = (struct rxq **)fsdev->data->rx_queues;
> >  	if (intr_conf->rxq == 0)
> >  		return 0;
> > @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct
> sub_device *sdev)
> >  	struct rte_eth_dev *fsdev;
> >  	struct rxq *fsrxq;
> >
> > -	fsdev = sdev->fs_dev;
> > +	fsdev = fsdev_from_subdev(sdev);
> >  	for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
> >  		if (qid < fsdev->data->nb_rx_queues) {
> >  			fsrxq = fsdev->data->rx_queues[qid]; diff --git
> > a/drivers/net/failsafe/failsafe_private.h
> > b/drivers/net/failsafe/failsafe_private.h
> > index 29dfd40..84e847f 100644
> > --- a/drivers/net/failsafe/failsafe_private.h
> > +++ b/drivers/net/failsafe/failsafe_private.h
> > @@ -117,7 +117,7 @@ struct sub_device {
> >  	/* Others are retrieved through a file descriptor */
> >  	char *fd_str;
> >  	/* fail-safe device backreference */
> > -	struct rte_eth_dev *fs_dev;
> > +	uint16_t fs_port_id; /* shared between processes*/
> >  	/* flag calling for recollection */
> >  	volatile unsigned int remove:1;
> >  	/* flow isolation state */
> > @@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg;
> >   */
> >  #define FS_ATOMIC_RX(s, i) \
> >  	rte_atomic64_read( \
> > -	 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid]
> \
> > +	 &((struct rxq *) \
> > +	 (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \
> >  	)
> >  /**
> >   * s: (struct sub_device *)
> > @@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg;
> >   */
> >  #define FS_ATOMIC_TX(s, i) \
> >  	rte_atomic64_read( \
> > -	 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid]
> \
> > +	 &((struct txq *) \
> > +	 (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \
> >  	)
> >
> >  #ifdef RTE_EXEC_ENV_BSDAPP
> > @@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev,
> >  	return &subs[sid];
> >  }
> >
> > +static inline struct rte_eth_dev *
> > +fsdev_from_subdev(struct sub_device *sdev) {
> > +	return &rte_eth_devices[sdev->fs_port_id];
> > +}
> > +
> 
> Using a static inline that already enforce parameter type makes the
> _from_subdev suffix redundant.
> 
> I think
> 
> fs_dev(struct sub_device *sdev);
> 
> would be more readable in most of the changes above, while keeping with
> the fs_ prefix used in all static failsafe functions.
> 

I think you are right if I'm only reading the prototype of the function/implementation.
But, if I'm reading it from the function call, it would be easier to read/understand it.
I would change the prefix to be fs_dev_from_subdev instead what do you think?


> >  /*
> >   * Lock hot-plug mutex.
> >   * is_alarm means that the caller is, for sure, the hot-plug alarm
> mechanism.
> > --
> > 2.7.4
> >
> 
> --
> Gaëtan Rivet
> 6WIND


Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 2/4] net/failsafe: change back-reference from sub-device
  2019-03-07  9:01     ` Raslan Darawsheh
@ 2019-03-07  9:43       ` Gaëtan Rivet
  0 siblings, 0 replies; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-07  9:43 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen

Hello Raslan,

On Thu, Mar 07, 2019 at 09:01:11AM +0000, Raslan Darawsheh wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Gaëtan Rivet <gaetan.rivet@6wind.com>
> > Sent: Tuesday, March 5, 2019 6:49 PM
> > To: Raslan Darawsheh <rasland@mellanox.com>
> > Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>;
> > stephen@networkplumber.org
> > Subject: Re: [PATCH v2 2/4] net/failsafe: change back-reference from sub-
> > device
> > 
> > Beside the squash referenced in p1,
> > 
> > On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote:
> > > In multiprocess context, the sub-device structure is shared between
> > > processes. The reference to the failsafe device was a per process
> > > pointer. It's changed to port id which is the same for all processes.
> > >
> > > Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> > > ---
> > > v2: changed macro to an inline function
> > > ---
> > >  drivers/net/failsafe/failsafe_eal.c     |  2 +-
> > >  drivers/net/failsafe/failsafe_ether.c   | 15 ++++++++-------
> > >  drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
> > >  drivers/net/failsafe/failsafe_private.h | 13 ++++++++++---
> > >  4 files changed, 24 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/drivers/net/failsafe/failsafe_eal.c
> > > b/drivers/net/failsafe/failsafe_eal.c
> > > index 8a888b1..56d1669 100644
> > > --- a/drivers/net/failsafe/failsafe_eal.c
> > > +++ b/drivers/net/failsafe/failsafe_eal.c
> > > @@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev)
> > >  		}
> > >  		ETH(sdev) = &rte_eth_devices[pid];
> > >  		SUB_ID(sdev) = i;
> > > -		sdev->fs_dev = dev;
> > > +		sdev->fs_port_id = dev->data->port_id;
> > >  		sdev->dev = ETH(sdev)->device;
> > >  		sdev->state = DEV_PROBED;
> > >  	}
> > > diff --git a/drivers/net/failsafe/failsafe_ether.c
> > > b/drivers/net/failsafe/failsafe_ether.c
> > > index 1783165..d5b1488 100644
> > > --- a/drivers/net/failsafe/failsafe_ether.c
> > > +++ b/drivers/net/failsafe/failsafe_ether.c
> > > @@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev)
> > >  		break;
> > >  	}
> > >  	sdev->remove = 0;
> > > -	failsafe_hotplug_alarm_install(sdev->fs_dev);
> > > +	failsafe_hotplug_alarm_install(fsdev_from_subdev(sdev));
> > >  }
> > >
> > >  static void
> > > @@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev)
> > >  			WARN("Using latest snapshot taken before
> > %"PRIu64" seconds.\n",
> > >  				 (rte_rdtsc() - timestamp) /
> > rte_get_tsc_hz());
> > >  	}
> > > -	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
> > > -			err ? &sdev->stats_snapshot.stats : &stats);
> > > +	failsafe_stats_increment
> > > +		(&PRIV(fsdev_from_subdev(sdev))->stats_accumulator,
> > > +		err ? &sdev->stats_snapshot.stats : &stats);
> > >  	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
> > }
> > >
> > > @@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t
> > port_id
> > > __rte_unused,  {
> > >  	struct sub_device *sdev = cb_arg;
> > >
> > > -	fs_lock(sdev->fs_dev, 0);
> > > +	fs_lock(fsdev_from_subdev(sdev), 0);
> > >  	/* Switch as soon as possible tx_dev. */
> > > -	fs_switch_dev(sdev->fs_dev, sdev);
> > > +	fs_switch_dev(fsdev_from_subdev(sdev), sdev);
> > >  	/* Use safe bursts in any case. */
> > > -	failsafe_set_burst_fn(sdev->fs_dev, 1);
> > > +	failsafe_set_burst_fn(fsdev_from_subdev(sdev), 1);
> > >  	/*
> > >  	 * Async removal, the sub-PMD will try to unregister
> > >  	 * the callback at the source of the current thread context.
> > >  	 */
> > >  	sdev->remove = 1;
> > > -	fs_unlock(sdev->fs_dev, 0);
> > > +	fs_unlock(fsdev_from_subdev(sdev), 0);
> > >  	return 0;
> > >  }
> > >
> > > diff --git a/drivers/net/failsafe/failsafe_intr.c
> > > b/drivers/net/failsafe/failsafe_intr.c
> > > index 09aa3c6..d372d09 100644
> > > --- a/drivers/net/failsafe/failsafe_intr.c
> > > +++ b/drivers/net/failsafe/failsafe_intr.c
> > > @@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct
> > sub_device *sdev, int op)
> > >  	int rc;
> > >  	int ret = 0;
> > >
> > > +	fsdev = fsdev_from_subdev(sdev);
> > >  	if (sdev == NULL || (ETH(sdev) == NULL) ||
> > > -	    sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) {
> > > +		fsdev == NULL || (PRIV(fsdev) == NULL)) {
> > >  		ERROR("Called with invalid arguments");
> > >  		return -EINVAL;
> > >  	}
> > >  	dev = ETH(sdev);
> > > -	fsdev = sdev->fs_dev;
> > > -	epfd = PRIV(sdev->fs_dev)->rxp.efd;
> > > +	epfd = PRIV(fsdev)->rxp.efd;
> > >  	pid = PORT_ID(sdev);
> > >
> > >  	if (epfd <= 0) {
> > > @@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct
> > sub_device *sdev)
> > >  	const struct rte_intr_conf *const intr_conf =
> > >  				&ETH(sdev)->data->dev_conf.intr_conf;
> > >
> > > -	fsdev = sdev->fs_dev;
> > > +	fsdev = fsdev_from_subdev(sdev);
> > >  	rxq = (struct rxq **)fsdev->data->rx_queues;
> > >  	if (intr_conf->rxq == 0)
> > >  		return 0;
> > > @@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct
> > sub_device *sdev)
> > >  	struct rte_eth_dev *fsdev;
> > >  	struct rxq *fsrxq;
> > >
> > > -	fsdev = sdev->fs_dev;
> > > +	fsdev = fsdev_from_subdev(sdev);
> > >  	for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
> > >  		if (qid < fsdev->data->nb_rx_queues) {
> > >  			fsrxq = fsdev->data->rx_queues[qid]; diff --git
> > > a/drivers/net/failsafe/failsafe_private.h
> > > b/drivers/net/failsafe/failsafe_private.h
> > > index 29dfd40..84e847f 100644
> > > --- a/drivers/net/failsafe/failsafe_private.h
> > > +++ b/drivers/net/failsafe/failsafe_private.h
> > > @@ -117,7 +117,7 @@ struct sub_device {
> > >  	/* Others are retrieved through a file descriptor */
> > >  	char *fd_str;
> > >  	/* fail-safe device backreference */
> > > -	struct rte_eth_dev *fs_dev;
> > > +	uint16_t fs_port_id; /* shared between processes*/
> > >  	/* flag calling for recollection */
> > >  	volatile unsigned int remove:1;
> > >  	/* flow isolation state */
> > > @@ -324,7 +324,8 @@ extern int failsafe_mac_from_arg;
> > >   */
> > >  #define FS_ATOMIC_RX(s, i) \
> > >  	rte_atomic64_read( \
> > > -	 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid]
> > \
> > > +	 &((struct rxq *) \
> > > +	 (fsdev_from_subdev(s)->data->rx_queues[i]))->refcnt[(s)->sid] \
> > >  	)
> > >  /**
> > >   * s: (struct sub_device *)
> > > @@ -332,7 +333,8 @@ extern int failsafe_mac_from_arg;
> > >   */
> > >  #define FS_ATOMIC_TX(s, i) \
> > >  	rte_atomic64_read( \
> > > -	 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid]
> > \
> > > +	 &((struct txq *) \
> > > +	 (fsdev_from_subdev(s)->data->tx_queues[i]))->refcnt[(s)->sid] \
> > >  	)
> > >
> > >  #ifdef RTE_EXEC_ENV_BSDAPP
> > > @@ -379,6 +381,11 @@ fs_find_next(struct rte_eth_dev *dev,
> > >  	return &subs[sid];
> > >  }
> > >
> > > +static inline struct rte_eth_dev *
> > > +fsdev_from_subdev(struct sub_device *sdev) {
> > > +	return &rte_eth_devices[sdev->fs_port_id];
> > > +}
> > > +
> > 
> > Using a static inline that already enforce parameter type makes the
> > _from_subdev suffix redundant.
> > 
> > I think
> > 
> > fs_dev(struct sub_device *sdev);
> > 
> > would be more readable in most of the changes above, while keeping with
> > the fs_ prefix used in all static failsafe functions.
> > 
> 
> I think you are right if I'm only reading the prototype of the function/implementation.
> But, if I'm reading it from the function call, it would be easier to read/understand it.
> I would change the prefix to be fs_dev_from_subdev instead what do you think?
> 

I prefer reading fs_dev(sdev) instead of fs{,_}dev_from_subdev(sdev), so
I'd still vote for using fs_dev();

I think in the context of fail-safe it is clear enough that it gives a
handle to the fail-safe device.

> 
> > >  /*
> > >   * Lock hot-plug mutex.
> > >   * is_alarm means that the caller is, for sure, the hot-plug alarm
> > mechanism.
> > > --
> > > 2.7.4
> > >
> > 
> > --
> > Gaëtan Rivet
> > 6WIND
> 
> 
> Kindest regards,
> Raslan Darawsheh

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-07  8:43           ` Raslan Darawsheh
@ 2019-03-07  9:47             ` Gaëtan Rivet
  2019-03-07 11:34               ` Raslan Darawsheh
  0 siblings, 1 reply; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-07  9:47 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: Thomas Monjalon, dev, stephen

On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Wednesday, March 6, 2019 8:02 PM
> > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> > <rasland@mellanox.com>
> > Cc: dev@dpdk.org; stephen@networkplumber.org
> > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> > with shared data
> > 
> > 06/03/2019 11:46, Gaëtan Rivet:
> > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > > What happens when a primary process closes a device before a
> > secondary?
> > > > > Is the secondary unable to stop / close its own then? Isn't there
> > > > > some missing uninit?
> > > >
> > > > Is the secondary process supposed to do any closing?
> > > > The device management should be done only by the primary process.
> > > >
> > > > Note: anyway all this hotplug related code should be dropped from
> > > > failsafe to be replaced by EAL hotplug management.
> > > >
> > >
> > > I don't know, I've never used secondary process.
> > > However, cursory reading the code of rte_eth_dev_close(), I don't see
> > > a guard against calling it from a secondary process?
> > 
> > Yes indeed, there is no guard.
> > That's something not clear in DPDK, previously we were attaching some
> > vdevs in secondary only.
> > 
> > > Reading code like
> > >
> > >    rte_free(dev->data->rx_queues);
> > >    dev->data->rx_queues = NULL;
> > >
> > > within makes me think the issue has been seen at least once, where
> > > shared data is freed multiple times, so I guessed some secondary
> > > processes were calling it. Maybe they are not meant to, but what
> > > prevents them from being badly written?
> > >
> > > Also, given rte_dev_remove IPC call to transfer the order to the
> > > primary, it seems that at least secondary processes are expected to
> > > call
> > > rte_dev_remove() at some point? So are they only authorized to call
> > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
> > > there a specific documentation detailing the design of secondary
> > > process and the related responsibilities in the lifetime of a device?
> > > How are they synching their rte_eth_devices list if they are not
> > > calling rte_eth_dev_close(), ever?
> > 
> > All these calls should be done in primary.
> > The IPC mechanism calls the attach/detach in secondary at EAL level.
> > The PMDs does the bridge between EAL device and ethdev port status.
> > But you are right, there can be a sync issue if closing an ethdev port and not
> > removing the EAL device.
> > This is a generic question about deciding whether we want all ethdev ports to
> > be synced in multi-process or not.
> > 
> > In failsafe context, we are closing the EAL device and change the state of the
> > sub-device accordingly. So I think there is no issue.
> > 
> > > > > This seems dangerous to me. Why not instead allocating a
> > > > > per-process slab of memory that would hold the relevant references
> > > > > and outlive the shared data (a per-process rte_eth_dev private data...).
> > > >
> > > > Which data do you think should be allocated per process?
> > > >
> > > >
> > >
> > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > > +--------------------------------------------------------------+
> > > | +------------------+                +- rte_eth_devices[n] -+ |
> > > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > > | |                  |   +dev_priv-+  |                      | |
> > > | |      dev_private +-->|         |  |                      | |
> > > | |              ... |   |         |  |                      | |
> > > | |          port_id |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  +----------------------+ |
> > > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > > | |                  |   |         |  |                      | | SECONDARY
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   +---------+  |                      | |
> > > | |                  |<---------------+ data                 | |
> > > | +------------------+                +----------------------+ |
> > > +--------------------------------------------------------------+
> > >
> > > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > > This disappears once a device is closed, as all shared space is zeroed.
> > >
> > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, and
> > > at some point it is not anymore, once a sub-device has been closed.
> > > This seems dangerous.
> > 
> > The state of the sub-device is changed.
> > I don't see any issue.
> > 
> > > I was thinking initially that allocating a place where each sdev would
> > > store their rte_eth_devices / port_id back-reference could alleviate
> > > the issue, meaning that the fail-safe would not zero it on
> > > sdev_close(), and it would remain valid for the lifetime of a
> > > sub-device, so even when a sub-device is in DEV_PROBED state.
> > >
> > > But now that I think about it, it could probably be simpler: instead
> > > of using (ETH(sdev)->data->port_id) for the port_id of an sdev
> > > (meaning that it is dependent on the lifetime of the sdev, instead of
> > > the lifetime of the failsafe), the port-id itself should be stored in
> > > the sub_device structure. This structure will be available for the
> > > lifetime of the failsafe, and the port_id is correct accross all processes.
> > >
> > > So PORT_ID(sdev) would be defined to something like (sdev->port_id),
> > > and
> > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
> > > correct even once the primary has closed the sub-device.
> > >
> > > What do you think? Do you agree that the current state is dangerous,
> > > and do you think the solution would alleviate the issue? Maybe the
> > > concern is unfounded and the only issue is within fs_dev_remove().
> > 
> > Yes it is only seen in fs_dev_remove().
> > I discussed about your proposal with Raslan, and we agree we could change
> > from sub_device.data to sub_device.port_id, it may be more future-proof.
> > 
> > I have only one doubt: look at the macro in this patch:
> > 
> > #define ETH(sdev) \
> > 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> > >port_id])
> > 
> > The NULL check cannot be done with a port id.
> > I think it was needed to manage one case. Raslan?
> 
> That's right since we need it for fs_tx_unsafe, to add a protection for plugged out devices during TX.

Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling
above I needed to write down the stuff to think about it.

You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way
the value is kept unsigned and there are several checks against this
specific value otherwise.

so ETH(sdev) could be

        (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL : &rte_eth_devices[PORT_ID(sdev)])

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-07  9:47             ` Gaëtan Rivet
@ 2019-03-07 11:34               ` Raslan Darawsheh
  2019-03-07 11:50                 ` Gaëtan Rivet
  0 siblings, 1 reply; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-07 11:34 UTC (permalink / raw)
  To: Gaëtan Rivet; +Cc: Thomas Monjalon, dev, stephen

Hi,

> -----Original Message-----
> From: Gaëtan Rivet <gaetan.rivet@6wind.com>
> Sent: Thursday, March 7, 2019 11:48 AM
> To: Raslan Darawsheh <rasland@mellanox.com>
> Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org;
> stephen@networkplumber.org
> Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> with shared data
> 
> On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote:
> > Hi,
> >
> > > -----Original Message-----
> > > From: Thomas Monjalon <thomas@monjalon.net>
> > > Sent: Wednesday, March 6, 2019 8:02 PM
> > > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> > > <rasland@mellanox.com>
> > > Cc: dev@dpdk.org; stephen@networkplumber.org
> > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local
> > > sub-device with shared data
> > >
> > > 06/03/2019 11:46, Gaëtan Rivet:
> > > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > > > What happens when a primary process closes a device before a
> > > secondary?
> > > > > > Is the secondary unable to stop / close its own then? Isn't
> > > > > > there some missing uninit?
> > > > >
> > > > > Is the secondary process supposed to do any closing?
> > > > > The device management should be done only by the primary process.
> > > > >
> > > > > Note: anyway all this hotplug related code should be dropped
> > > > > from failsafe to be replaced by EAL hotplug management.
> > > > >
> > > >
> > > > I don't know, I've never used secondary process.
> > > > However, cursory reading the code of rte_eth_dev_close(), I don't
> > > > see a guard against calling it from a secondary process?
> > >
> > > Yes indeed, there is no guard.
> > > That's something not clear in DPDK, previously we were attaching
> > > some vdevs in secondary only.
> > >
> > > > Reading code like
> > > >
> > > >    rte_free(dev->data->rx_queues);
> > > >    dev->data->rx_queues = NULL;
> > > >
> > > > within makes me think the issue has been seen at least once, where
> > > > shared data is freed multiple times, so I guessed some secondary
> > > > processes were calling it. Maybe they are not meant to, but what
> > > > prevents them from being badly written?
> > > >
> > > > Also, given rte_dev_remove IPC call to transfer the order to the
> > > > primary, it seems that at least secondary processes are expected
> > > > to call
> > > > rte_dev_remove() at some point? So are they only authorized to
> > > > call
> > > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()?
> > > > Is there a specific documentation detailing the design of
> > > > secondary process and the related responsibilities in the lifetime of a
> device?
> > > > How are they synching their rte_eth_devices list if they are not
> > > > calling rte_eth_dev_close(), ever?
> > >
> > > All these calls should be done in primary.
> > > The IPC mechanism calls the attach/detach in secondary at EAL level.
> > > The PMDs does the bridge between EAL device and ethdev port status.
> > > But you are right, there can be a sync issue if closing an ethdev
> > > port and not removing the EAL device.
> > > This is a generic question about deciding whether we want all ethdev
> > > ports to be synced in multi-process or not.
> > >
> > > In failsafe context, we are closing the EAL device and change the
> > > state of the sub-device accordingly. So I think there is no issue.
> > >
> > > > > > This seems dangerous to me. Why not instead allocating a
> > > > > > per-process slab of memory that would hold the relevant
> > > > > > references and outlive the shared data (a per-process rte_eth_dev
> private data...).
> > > > >
> > > > > Which data do you think should be allocated per process?
> > > > >
> > > > >
> > > >
> > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > > > +--------------------------------------------------------------+
> > > > | +------------------+                +- rte_eth_devices[n] -+ |
> > > > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > > > | |                  |   +dev_priv-+  |                      | |
> > > > | |      dev_private +-->|         |  |                      | |
> > > > | |              ... |   |         |  |                      | |
> > > > | |          port_id |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  +----------------------+ |
> > > > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > > > | |                  |   |         |  |                      | | SECONDARY
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   +---------+  |                      | |
> > > > | |                  |<---------------+ data                 | |
> > > > | +------------------+                +----------------------+ |
> > > > +--------------------------------------------------------------+
> > > >
> > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > > > This disappears once a device is closed, as all shared space is zeroed.
> > > >
> > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
> > > > and at some point it is not anymore, once a sub-device has been closed.
> > > > This seems dangerous.
> > >
> > > The state of the sub-device is changed.
> > > I don't see any issue.
> > >
> > > > I was thinking initially that allocating a place where each sdev
> > > > would store their rte_eth_devices / port_id back-reference could
> > > > alleviate the issue, meaning that the fail-safe would not zero it
> > > > on sdev_close(), and it would remain valid for the lifetime of a
> > > > sub-device, so even when a sub-device is in DEV_PROBED state.
> > > >
> > > > But now that I think about it, it could probably be simpler:
> > > > instead of using (ETH(sdev)->data->port_id) for the port_id of an
> > > > sdev (meaning that it is dependent on the lifetime of the sdev,
> > > > instead of the lifetime of the failsafe), the port-id itself
> > > > should be stored in the sub_device structure. This structure will
> > > > be available for the lifetime of the failsafe, and the port_id is correct
> accross all processes.
> > > >
> > > > So PORT_ID(sdev) would be defined to something like
> > > > (sdev->port_id), and
> > > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would
> > > > remain correct even once the primary has closed the sub-device.
> > > >
> > > > What do you think? Do you agree that the current state is
> > > > dangerous, and do you think the solution would alleviate the
> > > > issue? Maybe the concern is unfounded and the only issue is within
> fs_dev_remove().
> > >
> > > Yes it is only seen in fs_dev_remove().
> > > I discussed about your proposal with Raslan, and we agree we could
> > > change from sub_device.data to sub_device.port_id, it may be more
> future-proof.
> > >
> > > I have only one doubt: look at the macro in this patch:
> > >
> > > #define ETH(sdev) \
> > > 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> > > >port_id])
> > >
> > > The NULL check cannot be done with a port id.
> > > I think it was needed to manage one case. Raslan?
> >
> > That's right since we need it for fs_tx_unsafe, to add a protection for
> plugged out devices during TX.
> 
> Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling
> above I needed to write down the stuff to think about it.
> 
> You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way the
> value is kept unsigned and there are several checks against this specific value
> otherwise.
> 
> so ETH(sdev) could be
> 
>         (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL :
> &rte_eth_devices[PORT_ID(sdev)])
> 
But, this mean that you have to explicitly set the port id  to RTE_MAX_ETH_PORTS in the sdev after fs_dev_remove and you shouldn't rely on the port ID anymore.

> --

> Gaëtan Rivet
> 6WIND

Kindest regards
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 3/4] net/failsafe: replace local sub-device with shared data
  2019-03-07 11:34               ` Raslan Darawsheh
@ 2019-03-07 11:50                 ` Gaëtan Rivet
  0 siblings, 0 replies; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-07 11:50 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: Thomas Monjalon, dev, stephen

On Thu, Mar 07, 2019 at 11:34:42AM +0000, Raslan Darawsheh wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Gaëtan Rivet <gaetan.rivet@6wind.com>
> > Sent: Thursday, March 7, 2019 11:48 AM
> > To: Raslan Darawsheh <rasland@mellanox.com>
> > Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org;
> > stephen@networkplumber.org
> > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> > with shared data
> > 
> > On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote:
> > > Hi,
> > >
> > > > -----Original Message-----
> > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > Sent: Wednesday, March 6, 2019 8:02 PM
> > > > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> > > > <rasland@mellanox.com>
> > > > Cc: dev@dpdk.org; stephen@networkplumber.org
> > > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local
> > > > sub-device with shared data
> > > >
> > > > 06/03/2019 11:46, Gaëtan Rivet:
> > > > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > > > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > > > > What happens when a primary process closes a device before a
> > > > secondary?
> > > > > > > Is the secondary unable to stop / close its own then? Isn't
> > > > > > > there some missing uninit?
> > > > > >
> > > > > > Is the secondary process supposed to do any closing?
> > > > > > The device management should be done only by the primary process.
> > > > > >
> > > > > > Note: anyway all this hotplug related code should be dropped
> > > > > > from failsafe to be replaced by EAL hotplug management.
> > > > > >
> > > > >
> > > > > I don't know, I've never used secondary process.
> > > > > However, cursory reading the code of rte_eth_dev_close(), I don't
> > > > > see a guard against calling it from a secondary process?
> > > >
> > > > Yes indeed, there is no guard.
> > > > That's something not clear in DPDK, previously we were attaching
> > > > some vdevs in secondary only.
> > > >
> > > > > Reading code like
> > > > >
> > > > >    rte_free(dev->data->rx_queues);
> > > > >    dev->data->rx_queues = NULL;
> > > > >
> > > > > within makes me think the issue has been seen at least once, where
> > > > > shared data is freed multiple times, so I guessed some secondary
> > > > > processes were calling it. Maybe they are not meant to, but what
> > > > > prevents them from being badly written?
> > > > >
> > > > > Also, given rte_dev_remove IPC call to transfer the order to the
> > > > > primary, it seems that at least secondary processes are expected
> > > > > to call
> > > > > rte_dev_remove() at some point? So are they only authorized to
> > > > > call
> > > > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()?
> > > > > Is there a specific documentation detailing the design of
> > > > > secondary process and the related responsibilities in the lifetime of a
> > device?
> > > > > How are they synching their rte_eth_devices list if they are not
> > > > > calling rte_eth_dev_close(), ever?
> > > >
> > > > All these calls should be done in primary.
> > > > The IPC mechanism calls the attach/detach in secondary at EAL level.
> > > > The PMDs does the bridge between EAL device and ethdev port status.
> > > > But you are right, there can be a sync issue if closing an ethdev
> > > > port and not removing the EAL device.
> > > > This is a generic question about deciding whether we want all ethdev
> > > > ports to be synced in multi-process or not.
> > > >
> > > > In failsafe context, we are closing the EAL device and change the
> > > > state of the sub-device accordingly. So I think there is no issue.
> > > >
> > > > > > > This seems dangerous to me. Why not instead allocating a
> > > > > > > per-process slab of memory that would hold the relevant
> > > > > > > references and outlive the shared data (a per-process rte_eth_dev
> > private data...).
> > > > > >
> > > > > > Which data do you think should be allocated per process?
> > > > > >
> > > > > >
> > > > >
> > > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > > > > +--------------------------------------------------------------+
> > > > > | +------------------+                +- rte_eth_devices[n] -+ |
> > > > > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > > > > | |                  |   +dev_priv-+  |                      | |
> > > > > | |      dev_private +-->|         |  |                      | |
> > > > > | |              ... |   |         |  |                      | |
> > > > > | |          port_id |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  +----------------------+ |
> > > > > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > > > > | |                  |   |         |  |                      | | SECONDARY
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   +---------+  |                      | |
> > > > > | |                  |<---------------+ data                 | |
> > > > > | +------------------+                +----------------------+ |
> > > > > +--------------------------------------------------------------+
> > > > >
> > > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > > > > This disappears once a device is closed, as all shared space is zeroed.
> > > > >
> > > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
> > > > > and at some point it is not anymore, once a sub-device has been closed.
> > > > > This seems dangerous.
> > > >
> > > > The state of the sub-device is changed.
> > > > I don't see any issue.
> > > >
> > > > > I was thinking initially that allocating a place where each sdev
> > > > > would store their rte_eth_devices / port_id back-reference could
> > > > > alleviate the issue, meaning that the fail-safe would not zero it
> > > > > on sdev_close(), and it would remain valid for the lifetime of a
> > > > > sub-device, so even when a sub-device is in DEV_PROBED state.
> > > > >
> > > > > But now that I think about it, it could probably be simpler:
> > > > > instead of using (ETH(sdev)->data->port_id) for the port_id of an
> > > > > sdev (meaning that it is dependent on the lifetime of the sdev,
> > > > > instead of the lifetime of the failsafe), the port-id itself
> > > > > should be stored in the sub_device structure. This structure will
> > > > > be available for the lifetime of the failsafe, and the port_id is correct
> > accross all processes.
> > > > >
> > > > > So PORT_ID(sdev) would be defined to something like
> > > > > (sdev->port_id), and
> > > > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would
> > > > > remain correct even once the primary has closed the sub-device.
> > > > >
> > > > > What do you think? Do you agree that the current state is
> > > > > dangerous, and do you think the solution would alleviate the
> > > > > issue? Maybe the concern is unfounded and the only issue is within
> > fs_dev_remove().
> > > >
> > > > Yes it is only seen in fs_dev_remove().
> > > > I discussed about your proposal with Raslan, and we agree we could
> > > > change from sub_device.data to sub_device.port_id, it may be more
> > future-proof.
> > > >
> > > > I have only one doubt: look at the macro in this patch:
> > > >
> > > > #define ETH(sdev) \
> > > > 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> > > > >port_id])
> > > >
> > > > The NULL check cannot be done with a port id.
> > > > I think it was needed to manage one case. Raslan?
> > >
> > > That's right since we need it for fs_tx_unsafe, to add a protection for
> > plugged out devices during TX.
> > 
> > Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling
> > above I needed to write down the stuff to think about it.
> > 
> > You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way the
> > value is kept unsigned and there are several checks against this specific value
> > otherwise.
> > 
> > so ETH(sdev) could be
> > 
> >         (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL :
> > &rte_eth_devices[PORT_ID(sdev)])
> > 
> But, this mean that you have to explicitly set the port id  to RTE_MAX_ETH_PORTS in the sdev after fs_dev_remove and you shouldn't rely on the port ID anymore.
> 

Yes, once a sub-device has completely finished its removal (from the
fail-safe PoV), the fail-safe marks the sub-device as not used anymore.
This seems correct.

If the fail-safe used the sdev->data->port_id instead, it would return
0, which is a valid port_id that is probably still used by another port.

> > --
> 
> > Gaëtan Rivet
> > 6WIND
> 
> Kindest regards
> Raslan Darawsheh

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v3 0/4] support secondary process for failsafe
  2019-03-05  9:52 [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
                   ` (3 preceding siblings ...)
  2019-03-05 16:43 ` [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet
@ 2019-03-18 16:05 ` Raslan Darawsheh
  2019-03-18 16:05   ` [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
                     ` (4 more replies)
  4 siblings, 5 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

this set of patches are intended to support secondary process for failsafe.

v3 integrates changes after review from Gaetan
There is no functional change.

Raslan Darawsheh (4):
  net/failsafe: replace local device with shared data
  net/failsafe: change back-reference from sub-device
  net/failsafe: replace sub-device pointer with port id
  net/failsafe: support secondary process

 drivers/net/failsafe/failsafe.c         | 53 +++++++++++++++++++++++++++++++--
 drivers/net/failsafe/failsafe_eal.c     |  4 +--
 drivers/net/failsafe/failsafe_ether.c   | 16 +++++-----
 drivers/net/failsafe/failsafe_intr.c    | 20 ++++++-------
 drivers/net/failsafe/failsafe_ops.c     |  4 +--
 drivers/net/failsafe/failsafe_private.h | 38 +++++++++++++++--------
 drivers/net/failsafe/failsafe_rxtx.c    |  4 +--
 7 files changed, 102 insertions(+), 37 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v3 1/4] net/failsafe: replace local device with shared data
  2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
@ 2019-03-18 16:05   ` Raslan Darawsheh
  2019-03-18 16:05   ` [PATCH v3 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

In multiprocess context, the private structure is shared between
processes. The back reference from private to generic data was using
a pointer to a per process eth_dev. It's now changed to a reference of
the shared data.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
 drivers/net/failsafe/failsafe.c         |  2 +-
 drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
 drivers/net/failsafe/failsafe_ops.c     |  4 ++--
 drivers/net/failsafe/failsafe_private.h |  6 +++++-
 drivers/net/failsafe/failsafe_rxtx.c    |  4 ++--
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 06e859e..68926ca 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -181,7 +181,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
 		return -1;
 	}
 	priv = PRIV(dev);
-	priv->dev = dev;
+	priv->data = dev->data;
 	dev->dev_ops = &failsafe_ops;
 	dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
 	dev->data->dev_link = eth_link;
diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c
index 1c2cb71..09aa3c6 100644
--- a/drivers/net/failsafe/failsafe_intr.c
+++ b/drivers/net/failsafe/failsafe_intr.c
@@ -133,8 +133,8 @@ fs_rx_event_proxy_service_install(struct fs_priv *priv)
 	/* prepare service info */
 	memset(&service, 0, sizeof(struct rte_service_spec));
 	snprintf(service.name, sizeof(service.name), "%s_Rx_service",
-		 priv->dev->data->name);
-	service.socket_id = priv->dev->data->numa_node;
+		 priv->data->name);
+	service.socket_id = priv->data->numa_node;
 	service.callback = fs_rx_event_proxy_routine;
 	service.callback_userdata = priv;
 
@@ -437,7 +437,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv)
 	unsigned int count;
 	struct rte_intr_handle *intr_handle;
 
-	rxqs_n = priv->dev->data->nb_rx_queues;
+	rxqs_n = priv->data->nb_rx_queues;
 	n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
 	count = 0;
 	intr_handle = &priv->intr_handle;
@@ -452,7 +452,7 @@ fs_rx_intr_vec_install(struct fs_priv *priv)
 		return -rte_errno;
 	}
 	for (i = 0; i < n; i++) {
-		struct rxq *rxq = priv->dev->data->rx_queues[i];
+		struct rxq *rxq = priv->data->rx_queues[i];
 
 		/* Skip queues that cannot request interrupts. */
 		if (rxq == NULL || rxq->event_fd < 0) {
@@ -521,7 +521,7 @@ failsafe_rx_intr_install(struct rte_eth_dev *dev)
 {
 	struct fs_priv *priv = PRIV(dev);
 	const struct rte_intr_conf *const intr_conf =
-			&priv->dev->data->dev_conf.intr_conf;
+			&priv->data->dev_conf.intr_conf;
 
 	if (intr_conf->rxq == 0 || dev->intr_handle != NULL)
 		return 0;
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index e3add40..65957a2 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -452,7 +452,7 @@ fs_rx_queue_release(void *queue)
 	if (queue == NULL)
 		return;
 	rxq = queue;
-	dev = rxq->priv->dev;
+	dev = &rte_eth_devices[rxq->priv->data->port_id];
 	fs_lock(dev, 0);
 	if (rxq->event_fd > 0)
 		close(rxq->event_fd);
@@ -636,7 +636,7 @@ fs_tx_queue_release(void *queue)
 	if (queue == NULL)
 		return;
 	txq = queue;
-	dev = txq->priv->dev;
+	dev = &rte_eth_devices[txq->priv->data->port_id];
 	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		if (ETH(sdev)->data->tx_queues != NULL &&
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 0dfea65..29dfd40 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -128,8 +128,12 @@ struct sub_device {
 	unsigned int lsc_callback:1;
 };
 
+/*
+ * This is referenced by eth_dev->data->dev_private
+ * This is shared between processes.
+ */
 struct fs_priv {
-	struct rte_eth_dev *dev;
+	struct rte_eth_dev_data *data; /* backreference to shared data. */
 	/*
 	 * Set of sub_devices.
 	 * subs[0] is the preferred device
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index 034f47b..231c832 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -126,7 +126,7 @@ failsafe_tx_burst(void *queue,
 	uint16_t nb_tx;
 
 	txq = queue;
-	sdev = TX_SUBDEV(txq->priv->dev);
+	sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]);
 	if (unlikely(fs_tx_unsafe(sdev)))
 		return 0;
 	sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
@@ -147,7 +147,7 @@ failsafe_tx_burst_fast(void *queue,
 	uint16_t nb_tx;
 
 	txq = queue;
-	sdev = TX_SUBDEV(txq->priv->dev);
+	sdev = TX_SUBDEV(&rte_eth_devices[txq->priv->data->port_id]);
 	RTE_ASSERT(!fs_tx_unsafe(sdev));
 	sub_txq = ETH(sdev)->data->tx_queues[txq->qid];
 	FS_ATOMIC_P(txq->refcnt[sdev->sid]);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v3 2/4] net/failsafe: change back-reference from sub-device
  2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
  2019-03-18 16:05   ` [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
@ 2019-03-18 16:05   ` Raslan Darawsheh
  2019-03-18 16:05   ` [PATCH v3 4/4] net/failsafe: support secondary process Raslan Darawsheh
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

In multiprocess context, the sub-device structure is shared
between processes. The reference to the failsafe device was
a per process pointer. It's changed to port id which is the
same for all processes.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
v2: changed macro to an inline function
v3: changed prefix of inline function.
  
---
 drivers/net/failsafe/failsafe_eal.c     |  2 +-
 drivers/net/failsafe/failsafe_ether.c   | 15 ++++++++-------
 drivers/net/failsafe/failsafe_intr.c    | 10 +++++-----
 drivers/net/failsafe/failsafe_private.h | 15 ++++++++++-----
 4 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 8a888b1..56d1669 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -114,7 +114,7 @@ fs_bus_init(struct rte_eth_dev *dev)
 		}
 		ETH(sdev) = &rte_eth_devices[pid];
 		SUB_ID(sdev) = i;
-		sdev->fs_dev = dev;
+		sdev->fs_port_id = dev->data->port_id;
 		sdev->dev = ETH(sdev)->device;
 		sdev->state = DEV_PROBED;
 	}
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 1783165..7fa209a 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -298,7 +298,7 @@ fs_dev_remove(struct sub_device *sdev)
 		break;
 	}
 	sdev->remove = 0;
-	failsafe_hotplug_alarm_install(sdev->fs_dev);
+	failsafe_hotplug_alarm_install(fs_dev(sdev));
 }
 
 static void
@@ -318,8 +318,9 @@ fs_dev_stats_save(struct sub_device *sdev)
 			WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
 				 (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
 	}
-	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
-			err ? &sdev->stats_snapshot.stats : &stats);
+	failsafe_stats_increment
+		(&PRIV(fs_dev(sdev))->stats_accumulator,
+		err ? &sdev->stats_snapshot.stats : &stats);
 	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
 }
 
@@ -566,17 +567,17 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
 {
 	struct sub_device *sdev = cb_arg;
 
-	fs_lock(sdev->fs_dev, 0);
+	fs_lock(fs_dev(sdev), 0);
 	/* Switch as soon as possible tx_dev. */
-	fs_switch_dev(sdev->fs_dev, sdev);
+	fs_switch_dev(fs_dev(sdev), sdev);
 	/* Use safe bursts in any case. */
-	failsafe_set_burst_fn(sdev->fs_dev, 1);
+	failsafe_set_burst_fn(fs_dev(sdev), 1);
 	/*
 	 * Async removal, the sub-PMD will try to unregister
 	 * the callback at the source of the current thread context.
 	 */
 	sdev->remove = 1;
-	fs_unlock(sdev->fs_dev, 0);
+	fs_unlock(fs_dev(sdev), 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_intr.c b/drivers/net/failsafe/failsafe_intr.c
index 09aa3c6..0f34c5b 100644
--- a/drivers/net/failsafe/failsafe_intr.c
+++ b/drivers/net/failsafe/failsafe_intr.c
@@ -274,14 +274,14 @@ failsafe_eth_rx_intr_ctl_subdevice(struct sub_device *sdev, int op)
 	int rc;
 	int ret = 0;
 
+	fsdev = fs_dev(sdev);
 	if (sdev == NULL || (ETH(sdev) == NULL) ||
-	    sdev->fs_dev == NULL || (PRIV(sdev->fs_dev) == NULL)) {
+		fsdev == NULL || (PRIV(fsdev) == NULL)) {
 		ERROR("Called with invalid arguments");
 		return -EINVAL;
 	}
 	dev = ETH(sdev);
-	fsdev = sdev->fs_dev;
-	epfd = PRIV(sdev->fs_dev)->rxp.efd;
+	epfd = PRIV(fsdev)->rxp.efd;
 	pid = PORT_ID(sdev);
 
 	if (epfd <= 0) {
@@ -330,7 +330,7 @@ int failsafe_rx_intr_install_subdevice(struct sub_device *sdev)
 	const struct rte_intr_conf *const intr_conf =
 				&ETH(sdev)->data->dev_conf.intr_conf;
 
-	fsdev = sdev->fs_dev;
+	fsdev = fs_dev(sdev);
 	rxq = (struct rxq **)fsdev->data->rx_queues;
 	if (intr_conf->rxq == 0)
 		return 0;
@@ -368,7 +368,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev)
 	struct rte_eth_dev *fsdev;
 	struct rxq *fsrxq;
 
-	fsdev = sdev->fs_dev;
+	fsdev = fs_dev(sdev);
 	for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
 		if (qid < fsdev->data->nb_rx_queues) {
 			fsrxq = fsdev->data->rx_queues[qid];
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 29dfd40..af0c9d1 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -117,7 +117,7 @@ struct sub_device {
 	/* Others are retrieved through a file descriptor */
 	char *fd_str;
 	/* fail-safe device backreference */
-	struct rte_eth_dev *fs_dev;
+	uint16_t fs_port_id; /* shared between processes */
 	/* flag calling for recollection */
 	volatile unsigned int remove:1;
 	/* flow isolation state */
@@ -324,16 +324,16 @@ extern int failsafe_mac_from_arg;
  */
 #define FS_ATOMIC_RX(s, i) \
 	rte_atomic64_read( \
-	 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \
-	)
+	 &((struct rxq *) \
+	 (fs_dev(s)->data->rx_queues[i]))->refcnt[(s)->sid])
 /**
  * s: (struct sub_device *)
  * i: uint16_t qid
  */
 #define FS_ATOMIC_TX(s, i) \
 	rte_atomic64_read( \
-	 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \
-	)
+	 &((struct txq *) \
+	 (fs_dev(s)->data->tx_queues[i]))->refcnt[(s)->sid])
 
 #ifdef RTE_EXEC_ENV_BSDAPP
 #define FS_THREADID_TYPE void*
@@ -379,6 +379,11 @@ fs_find_next(struct rte_eth_dev *dev,
 	return &subs[sid];
 }
 
+static inline struct rte_eth_dev *
+fs_dev(struct sub_device *sdev) {
+	return &rte_eth_devices[sdev->fs_port_id];
+}
+
 /*
  * Lock hot-plug mutex.
  * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id
  2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
                     ` (2 preceding siblings ...)
  2019-03-18 16:05   ` [PATCH v3 4/4] net/failsafe: support secondary process Raslan Darawsheh
@ 2019-03-18 16:05   ` Raslan Darawsheh
  2019-03-18 16:16   ` [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet
  4 siblings, 0 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

In multiprocess context, the pointer to sub-device is shared between
processes. Previously, it was a pointer to per process eth_dev so
it's needed to replace this dependency.

Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
---
v2: - moved comment in fs_sdev about subs to this commit
    - added parenthesis around macro arguments.

v3: - replaced shared data with port id for sub dev.
    - update comment on the sub_dev struct
---
 drivers/net/failsafe/failsafe.c         |  6 ++++++
 drivers/net/failsafe/failsafe_eal.c     |  2 +-
 drivers/net/failsafe/failsafe_ether.c   |  1 +
 drivers/net/failsafe/failsafe_private.h | 17 +++++++++++------
 4 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 68926ca..e53a89d 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -30,6 +30,8 @@ fs_sub_device_alloc(struct rte_eth_dev *dev,
 	uint8_t nb_subs;
 	int ret;
 	int i;
+	struct sub_device *sdev;
+	uint8_t sdev_iterator;
 
 	ret = failsafe_args_count_subdevice(dev, params);
 	if (ret)
@@ -51,6 +53,10 @@ fs_sub_device_alloc(struct rte_eth_dev *dev,
 	for (i = 1; i < nb_subs; i++)
 		PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs + i;
 	PRIV(dev)->subs[i - 1].next = PRIV(dev)->subs;
+
+	FOREACH_SUBDEV(sdev, sdev_iterator, dev) {
+		sdev->sdev_port_id = RTE_MAX_ETHPORTS;
+	}
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 56d1669..74fd8e9 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev)
 				continue;
 			}
 		}
-		ETH(sdev) = &rte_eth_devices[pid];
+		sdev->sdev_port_id = pid;
 		SUB_ID(sdev) = i;
 		sdev->fs_port_id = dev->data->port_id;
 		sdev->dev = ETH(sdev)->device;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index 7fa209a..7ac23d4 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -294,6 +294,7 @@ fs_dev_remove(struct sub_device *sdev)
 	case DEV_PARSED:
 	case DEV_UNDEFINED:
 		sdev->state = DEV_UNDEFINED;
+		sdev->sdev_port_id = RTE_MAX_ETHPORTS;
 		/* the end */
 		break;
 	}
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index af0c9d1..2a632d8 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -100,13 +100,15 @@ struct fs_stats {
 	uint64_t timestamp;
 };
 
+/*
+ * Allocated in shared memory.
+ */
 struct sub_device {
 	/* Exhaustive DPDK device description */
 	struct sub_device *next;
 	struct rte_devargs devargs;
-	struct rte_bus *bus;
-	struct rte_device *dev;
-	struct rte_eth_dev *edev;
+	struct rte_bus *bus; /* for primary process only. */
+	struct rte_device *dev; /* for primary process only. */
 	uint8_t sid;
 	/* Device state machine */
 	enum dev_state state;
@@ -118,6 +120,8 @@ struct sub_device {
 	char *fd_str;
 	/* fail-safe device backreference */
 	uint16_t fs_port_id; /* shared between processes */
+	/* sub device port id*/
+	uint16_t sdev_port_id; /* shared between processes */
 	/* flag calling for recollection */
 	volatile unsigned int remove:1;
 	/* flow isolation state */
@@ -139,7 +143,7 @@ struct fs_priv {
 	 * subs[0] is the preferred device
 	 * any other is just another slave
 	 */
-	struct sub_device *subs;
+	struct sub_device *subs;  /* shared between processes */
 	uint8_t subs_head; /* if head == tail, no subs */
 	uint8_t subs_tail; /* first invalid */
 	uint8_t subs_tx; /* current emitting device */
@@ -254,11 +258,12 @@ extern int failsafe_mac_from_arg;
 
 /* sdev: (struct sub_device *) */
 #define ETH(sdev) \
-	((sdev)->edev)
+	((sdev)->sdev_port_id == RTE_MAX_ETHPORTS ? \
+	NULL : &rte_eth_devices[(sdev)->sdev_port_id])
 
 /* sdev: (struct sub_device *) */
 #define PORT_ID(sdev) \
-	(ETH(sdev)->data->port_id)
+	((sdev)->sdev_port_id)
 
 /* sdev: (struct sub_device *) */
 #define SUB_ID(sdev) \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH v3 4/4] net/failsafe: support secondary process
  2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
  2019-03-18 16:05   ` [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
  2019-03-18 16:05   ` [PATCH v3 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh
@ 2019-03-18 16:05   ` Raslan Darawsheh
  2019-03-18 16:05   ` [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id Raslan Darawsheh
  2019-03-18 16:16   ` [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet
  4 siblings, 0 replies; 26+ messages in thread
From: Raslan Darawsheh @ 2019-03-18 16:05 UTC (permalink / raw)
  To: gaetan.rivet; +Cc: dev, Thomas Monjalon, Raslan Darawsheh, stephen

Add implementation for probe in secondary.

Failsafe will attempt to attach all the sub-devices in
secondary process.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
v2: changed devargs_alread_listed return value to be bool.

---
 drivers/net/failsafe/failsafe.c | 45 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index e53a89d..42dfaca 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -3,6 +3,8 @@
  * Copyright 2017 Mellanox Technologies, Ltd
  */
 
+#include <stdbool.h>
+
 #include <rte_alarm.h>
 #include <rte_malloc.h>
 #include <rte_ethdev_driver.h>
@@ -318,11 +320,28 @@ fs_rte_eth_free(const char *name)
 	return ret;
 }
 
+static bool
+devargs_already_listed(struct rte_devargs *devargs)
+{
+	struct rte_devargs *list_da;
+
+	RTE_EAL_DEVARGS_FOREACH(devargs->bus->name, list_da) {
+		if (strcmp(list_da->name, devargs->name) == 0)
+			/* devargs already in the list */
+			return true;
+	}
+	return false;
+}
+
 static int
 rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
 {
 	const char *name;
 	struct rte_eth_dev *eth_dev;
+	struct sub_device  *sdev;
+	struct rte_devargs devargs;
+	uint8_t i;
+	int ret;
 
 	name = rte_vdev_device_name(vdev);
 	INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
@@ -335,9 +354,33 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
 			ERROR("Failed to probe %s", name);
 			return -1;
 		}
-		/* TODO: request info from primary to set up Rx and Tx */
 		eth_dev->dev_ops = &failsafe_ops;
 		eth_dev->device = &vdev->device;
+		eth_dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
+		eth_dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
+		/*
+		 * Failsafe will attempt to probe all of its sub-devices.
+		 * Any failure in sub-devices is not a fatal error.
+		 * A sub-device can be plugged later.
+		 */
+		FOREACH_SUBDEV(sdev, i, eth_dev) {
+			/* rebuild devargs to be able to get the bus name. */
+			ret = rte_devargs_parse(&devargs,
+						sdev->devargs.name);
+			if (ret != 0) {
+				ERROR("Failed to parse devargs %s",
+					devargs.name);
+				continue;
+			}
+			if (!devargs_already_listed(&devargs)) {
+				ret = rte_dev_probe(devargs.name);
+				if (ret != 0) {
+					ERROR("Failed to probe devargs %s",
+					      devargs.name);
+					continue;
+				}
+			}
+		}
 		rte_eth_dev_probing_finish(eth_dev);
 		return 0;
 	}
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v3 0/4] support secondary process for failsafe
  2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
                     ` (3 preceding siblings ...)
  2019-03-18 16:05   ` [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id Raslan Darawsheh
@ 2019-03-18 16:16   ` Gaëtan Rivet
  2019-03-27 14:08     ` Ferruh Yigit
  4 siblings, 1 reply; 26+ messages in thread
From: Gaëtan Rivet @ 2019-03-18 16:16 UTC (permalink / raw)
  To: Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen

Hello Raslan,

It all seems okay, thank you for the reworks and the added features :).

For the series:
Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>

On Mon, Mar 18, 2019 at 04:05:24PM +0000, Raslan Darawsheh wrote:
> this set of patches are intended to support secondary process for failsafe.
> 
> v3 integrates changes after review from Gaetan
> There is no functional change.
> 
> Raslan Darawsheh (4):
>   net/failsafe: replace local device with shared data
>   net/failsafe: change back-reference from sub-device
>   net/failsafe: replace sub-device pointer with port id
>   net/failsafe: support secondary process
> 
>  drivers/net/failsafe/failsafe.c         | 53 +++++++++++++++++++++++++++++++--
>  drivers/net/failsafe/failsafe_eal.c     |  4 +--
>  drivers/net/failsafe/failsafe_ether.c   | 16 +++++-----
>  drivers/net/failsafe/failsafe_intr.c    | 20 ++++++-------
>  drivers/net/failsafe/failsafe_ops.c     |  4 +--
>  drivers/net/failsafe/failsafe_private.h | 38 +++++++++++++++--------
>  drivers/net/failsafe/failsafe_rxtx.c    |  4 +--
>  7 files changed, 102 insertions(+), 37 deletions(-)
> 
> -- 
> 2.7.4
> 

-- 
Gaëtan Rivet
6WIND

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v3 0/4] support secondary process for failsafe
  2019-03-18 16:16   ` [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet
@ 2019-03-27 14:08     ` Ferruh Yigit
  0 siblings, 0 replies; 26+ messages in thread
From: Ferruh Yigit @ 2019-03-27 14:08 UTC (permalink / raw)
  To: Gaëtan Rivet, Raslan Darawsheh; +Cc: dev, Thomas Monjalon, stephen

<...>

> On Mon, Mar 18, 2019 at 04:05:24PM +0000, Raslan Darawsheh wrote:
>> this set of patches are intended to support secondary process for failsafe.
>>
>> v3 integrates changes after review from Gaetan
>> There is no functional change.
>>
>> Raslan Darawsheh (4):
>>   net/failsafe: replace local device with shared data
>>   net/failsafe: change back-reference from sub-device
>>   net/failsafe: replace sub-device pointer with port id
>>   net/failsafe: support secondary process
>>
> For the series:
> Acked-by: Gaetan Rivet <gaetan.rivet@6wind.com>

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2019-03-27 14:08 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-05  9:52 [PATCH v2 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
2019-03-05  9:52 ` [PATCH v2 3/4] net/failsafe: replace local sub-device " Raslan Darawsheh
2019-03-05  9:59   ` Thomas Monjalon
2019-03-05 17:38   ` Gaëtan Rivet
2019-03-05 17:58     ` Thomas Monjalon
2019-03-06 10:46       ` Gaëtan Rivet
2019-03-06 18:02         ` Thomas Monjalon
2019-03-07  8:43           ` Raslan Darawsheh
2019-03-07  9:47             ` Gaëtan Rivet
2019-03-07 11:34               ` Raslan Darawsheh
2019-03-07 11:50                 ` Gaëtan Rivet
2019-03-05  9:52 ` [PATCH v2 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh
2019-03-05 16:48   ` Gaëtan Rivet
2019-03-07  9:01     ` Raslan Darawsheh
2019-03-07  9:43       ` Gaëtan Rivet
2019-03-05  9:52 ` [PATCH v2 4/4] net/failsafe: support secondary process Raslan Darawsheh
2019-03-05 16:43 ` [PATCH v2 1/4] net/failsafe: replace local device with shared data Gaëtan Rivet
2019-03-05 17:40   ` Gaëtan Rivet
2019-03-05 17:41     ` Thomas Monjalon
2019-03-18 16:05 ` [PATCH v3 0/4] support secondary process for failsafe Raslan Darawsheh
2019-03-18 16:05   ` [PATCH v3 1/4] net/failsafe: replace local device with shared data Raslan Darawsheh
2019-03-18 16:05   ` [PATCH v3 2/4] net/failsafe: change back-reference from sub-device Raslan Darawsheh
2019-03-18 16:05   ` [PATCH v3 4/4] net/failsafe: support secondary process Raslan Darawsheh
2019-03-18 16:05   ` [PATCH v3 3/4] net/failsafe: replace sub-device pointer with port id Raslan Darawsheh
2019-03-18 16:16   ` [PATCH v3 0/4] support secondary process for failsafe Gaëtan Rivet
2019-03-27 14:08     ` Ferruh Yigit

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.