From mboxrd@z Thu Jan 1 00:00:00 1970 From: Gaetan Rivet Subject: [PATCH v7 09/11] net/failsafe: support device removal Date: Sun, 9 Jul 2017 03:47:30 +0200 Message-ID: <39f84ba65c0b3489e063a3697beadd86431bd568.1499564333.git.gaetan.rivet@6wind.com> References: Cc: Gaetan Rivet To: dev@dpdk.org Return-path: Received: from mail-wr0-f178.google.com (mail-wr0-f178.google.com [209.85.128.178]) by dpdk.org (Postfix) with ESMTP id D1EDE7CBD for ; Sun, 9 Jul 2017 03:47:56 +0200 (CEST) Received: by mail-wr0-f178.google.com with SMTP id k67so93442183wrc.2 for ; Sat, 08 Jul 2017 18:47:56 -0700 (PDT) In-Reply-To: In-Reply-To: References: List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Listen to INTR_RMV events issued by slaves. Add atomic flags on slave queues to detect use of slave bursts function. If a removal is detected, set the recollection flag on this slave. During a slave upkeep round, if its recollection flag is set and its burst functions are not in use by any thread, remove that slave. Signed-off-by: Gaetan Rivet Acked-by: Olga Shern --- doc/guides/nics/fail_safe.rst | 14 +++++ drivers/net/failsafe/failsafe.c | 1 + drivers/net/failsafe/failsafe_args.c | 13 +++++ drivers/net/failsafe/failsafe_eal.c | 3 +- drivers/net/failsafe/failsafe_ether.c | 96 ++++++++++++++++++++++++++++++--- drivers/net/failsafe/failsafe_ops.c | 38 +++++++++++-- drivers/net/failsafe/failsafe_private.h | 72 ++++++++++++++++++++++--- drivers/net/failsafe/failsafe_rxtx.c | 17 +++++- 8 files changed, 234 insertions(+), 20 deletions(-) diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst index 1b6e110..4154f0a 100644 --- a/doc/guides/nics/fail_safe.rst +++ b/doc/guides/nics/fail_safe.rst @@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. If you plan to use a device underneath the Fail-safe PMD with a specific feature, this feature must be supported by the Fail-safe PMD to avoid throwing any error. +A notable exception is the device removal feature. The fail-safe PMD being a +virtual device, it cannot currently be removed in the sense of a specific bus +hotplug, like for PCI for example. It will however enable this feature for its +sub-device automatically, detecting those that are capable and register the +relevant callback for such event. + Check the feature matrix for the complete set of supported features. Compilation options @@ -170,3 +176,11 @@ emit and receive packets. It will store any applied configuration, and try to apply it upon the probing of its missing sub-device. After this configuration pass, the new sub-device will be synchronized with other sub-devices, i.e. be started if the fail-safe PMD has been started by the user before. + +Plug-out feature +---------------- + +A sub-device supporting the device removal event can be removed from its bus at +any time. The fail-safe PMD will register a callback for such event and react +accordingly. It will try to safely stop, close and uninit the sub-device having +emitted this event, allowing it to free its eventual resources. diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index 6557255..4d35860 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -132,6 +132,7 @@ fs_hotplug_alarm(void *arg) if (!PRIV(dev)->pending_alarm) return; PRIV(dev)->pending_alarm = 0; + failsafe_dev_remove(dev); FOREACH_SUBDEV(sdev, i, dev) if (sdev->state != PRIV(dev)->state) break; diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c index c723ca3..dd55aaf 100644 --- a/drivers/net/failsafe/failsafe_args.c +++ b/drivers/net/failsafe/failsafe_args.c @@ -443,6 +443,17 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev, dev, params); } +static int +fs_parse_sub_device(struct sub_device *sdev) +{ + struct rte_devargs *da; + char devstr[DEVARGS_MAXLEN] = ""; + + da = &sdev->devargs; + snprintf(devstr, sizeof(devstr), "%s,%s", da->name, da->args); + return fs_parse_device(sdev, devstr); +} + int failsafe_args_parse_subs(struct rte_eth_dev *dev) { @@ -455,6 +466,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev) continue; if (sdev->cmdline) ret = fs_execute_cmd(sdev, sdev->cmdline); + else + ret = fs_parse_sub_device(sdev); if (ret == 0) sdev->state = DEV_PARSED; } diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c index 86e16a6..3321dda 100644 --- a/drivers/net/failsafe/failsafe_eal.c +++ b/drivers/net/failsafe/failsafe_eal.c @@ -79,6 +79,7 @@ fs_bus_init(struct rte_eth_dev *dev) return -ENODEV; } SUB_ID(sdev) = i; + sdev->fs_dev = dev; sdev->dev = ETH(sdev)->device; ETH(sdev)->state = RTE_ETH_DEV_DEFERRED; sdev->state = DEV_PROBED; @@ -96,7 +97,7 @@ failsafe_eal_init(struct rte_eth_dev *dev) return ret; if (PRIV(dev)->state < DEV_PROBED) PRIV(dev)->state = DEV_PROBED; - fs_switch_dev(dev); + fs_switch_dev(dev, NULL); return 0; } diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c index 2958207..ea3105c 100644 --- a/drivers/net/failsafe/failsafe_ether.c +++ b/drivers/net/failsafe/failsafe_ether.c @@ -250,6 +250,67 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev, return 0; } +static void +fs_dev_remove(struct sub_device *sdev) +{ + int ret; + + if (sdev == NULL) + return; + switch (sdev->state) { + case DEV_STARTED: + rte_eth_dev_stop(PORT_ID(sdev)); + sdev->state = DEV_ACTIVE; + /* fallthrough */ + case DEV_ACTIVE: + rte_eth_dev_close(PORT_ID(sdev)); + sdev->state = DEV_PROBED; + /* fallthrough */ + case DEV_PROBED: + ret = rte_eal_hotplug_remove(sdev->bus->name, + sdev->dev->name); + if (ret) { + ERROR("Bus detach failed for sub_device %u", + SUB_ID(sdev)); + } else { + ETH(sdev)->state = RTE_ETH_DEV_UNUSED; + } + sdev->state = DEV_PARSED; + /* fallthrough */ + case DEV_PARSED: + case DEV_UNDEFINED: + sdev->state = DEV_UNDEFINED; + /* the end */ + break; + } + failsafe_hotplug_alarm_install(sdev->fs_dev); +} + +static inline int +fs_rxtx_clean(struct sub_device *sdev) +{ + uint16_t i; + + for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++) + if (FS_ATOMIC_RX(sdev, i)) + return 0; + for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++) + if (FS_ATOMIC_TX(sdev, i)) + return 0; + return 1; +} + +void +failsafe_dev_remove(struct rte_eth_dev *dev) +{ + struct sub_device *sdev; + uint8_t i; + + FOREACH_SUBDEV_ST(sdev, i, dev, DEV_ACTIVE) + if (sdev->remove && fs_rxtx_clean(sdev)) + fs_dev_remove(sdev); +} + int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) { @@ -263,13 +324,13 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) ret = failsafe_args_parse_subs(dev); if (ret) - return ret; + goto err_remove; if (PRIV(dev)->state < DEV_PROBED) return 0; ret = failsafe_eal_init(dev); if (ret) - return ret; + goto err_remove; if (PRIV(dev)->state < DEV_ACTIVE) return 0; inactive = 0; @@ -278,15 +339,14 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) inactive |= UINT32_C(1) << i; ret = dev->dev_ops->dev_configure(dev); if (ret) - return ret; + goto err_remove; FOREACH_SUBDEV(sdev, i, dev) { if (inactive & (UINT32_C(1) << i)) { ret = fs_eth_dev_conf_apply(dev, sdev); if (ret) { ERROR("Could not apply configuration to sub_device %d", i); - /* TODO: disable device */ - return ret; + goto err_remove; } } } @@ -300,6 +360,30 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev) return 0; ret = dev->dev_ops->dev_start(dev); if (ret) - return ret; + goto err_remove; + return 0; +err_remove: + FOREACH_SUBDEV(sdev, i, dev) + if (sdev->state != PRIV(dev)->state) + sdev->remove = 1; + return ret; +} + +int +failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused, + enum rte_eth_event_type event __rte_unused, + void *cb_arg, void *out __rte_unused) +{ + struct sub_device *sdev = cb_arg; + + /* Switch as soon as possible tx_dev. */ + fs_switch_dev(sdev->fs_dev, sdev); + /* Use safe bursts in any case. */ + set_burst_fn(sdev->fs_dev, 1); + /* + * Async removal, the sub-PMD will try to unregister + * the callback at the source of the current thread context. + */ + sdev->remove = 1; return 0; } diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c index 5fb0135..2e1c798 100644 --- a/drivers/net/failsafe/failsafe_ops.c +++ b/drivers/net/failsafe/failsafe_ops.c @@ -33,6 +33,8 @@ #include #include + +#include #include #include #include @@ -204,9 +206,21 @@ fs_dev_configure(struct rte_eth_dev *dev) } } FOREACH_SUBDEV(sdev, i, dev) { + int rmv_interrupt = 0; + if (sdev->state != DEV_PROBED) continue; + + rmv_interrupt = ETH(sdev)->data->dev_flags & + RTE_ETH_DEV_INTR_RMV; + if (rmv_interrupt) { + DEBUG("Enabling RMV interrupts for sub_device %d", i); + dev->data->dev_conf.intr_conf.rmv = 1; + } else { + DEBUG("sub_device %d does not support RMV event", i); + } DEBUG("Configuring sub-device %d", i); + sdev->remove = 0; ret = rte_eth_dev_configure(PORT_ID(sdev), dev->data->nb_rx_queues, dev->data->nb_tx_queues, @@ -215,6 +229,16 @@ fs_dev_configure(struct rte_eth_dev *dev) ERROR("Could not configure sub_device %d", i); return ret; } + if (rmv_interrupt) { + ret = rte_eth_dev_callback_register(PORT_ID(sdev), + RTE_ETH_EVENT_INTR_RMV, + failsafe_eth_rmv_event_callback, + sdev); + if (ret) + WARN("Failed to register RMV callback for sub_device %d", + SUB_ID(sdev)); + } + dev->data->dev_conf.intr_conf.rmv = 0; sdev->state = DEV_ACTIVE; } if (PRIV(dev)->state < DEV_ACTIVE) @@ -240,7 +264,7 @@ fs_dev_start(struct rte_eth_dev *dev) } if (PRIV(dev)->state < DEV_STARTED) PRIV(dev)->state = DEV_STARTED; - fs_switch_dev(dev); + fs_switch_dev(dev, NULL); return 0; } @@ -351,10 +375,14 @@ fs_rx_queue_setup(struct rte_eth_dev *dev, fs_rx_queue_release(rxq); dev->data->rx_queues[rx_queue_id] = NULL; } - rxq = rte_zmalloc(NULL, sizeof(*rxq), + rxq = rte_zmalloc(NULL, + sizeof(*rxq) + + sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail, RTE_CACHE_LINE_SIZE); if (rxq == NULL) return -ENOMEM; + FOREACH_SUBDEV(sdev, i, dev) + rte_atomic64_init(&rxq->refcnt[i]); rxq->qid = rx_queue_id; rxq->socket_id = socket_id; rxq->info.mp = mb_pool; @@ -414,10 +442,14 @@ fs_tx_queue_setup(struct rte_eth_dev *dev, fs_tx_queue_release(txq); dev->data->tx_queues[tx_queue_id] = NULL; } - txq = rte_zmalloc("ethdev TX queue", sizeof(*txq), + txq = rte_zmalloc("ethdev TX queue", + sizeof(*txq) + + sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail, RTE_CACHE_LINE_SIZE); if (txq == NULL) return -ENOMEM; + FOREACH_SUBDEV(sdev, i, dev) + rte_atomic64_init(&txq->refcnt[i]); txq->qid = tx_queue_id; txq->socket_id = socket_id; txq->info.conf = *tx_conf; diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h index 25a4dac..6a8041d 100644 --- a/drivers/net/failsafe/failsafe_private.h +++ b/drivers/net/failsafe/failsafe_private.h @@ -36,6 +36,7 @@ #include +#include #include #include #include @@ -65,6 +66,7 @@ struct rxq { uint8_t last_polled; unsigned int socket_id; struct rte_eth_rxq_info info; + rte_atomic64_t refcnt[]; }; struct txq { @@ -72,6 +74,7 @@ struct txq { uint16_t qid; unsigned int socket_id; struct rte_eth_txq_info info; + rte_atomic64_t refcnt[]; }; struct rte_flow { @@ -101,6 +104,10 @@ struct sub_device { enum dev_state state; /* Some device are defined as a command line */ char *cmdline; + /* fail-safe device backreference */ + struct rte_eth_dev *fs_dev; + /* flag calling for recollection */ + volatile unsigned int remove:1; }; struct fs_priv { @@ -168,6 +175,10 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev); /* ETH_DEV */ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev); +void failsafe_dev_remove(struct rte_eth_dev *dev); +int failsafe_eth_rmv_event_callback(uint8_t port_id, + enum rte_eth_event_type type, + void *arg, void *out); /* GLOBALS */ @@ -233,6 +244,39 @@ extern int mac_from_arg; #define SUBOPS(s, ops) \ (ETH(s)->dev_ops->ops) +/** + * Atomic guard + */ + +/** + * a: (rte_atomic64_t *) + */ +#define FS_ATOMIC_P(a) \ + rte_atomic64_add(&(a), 1) + +/** + * a: (rte_atomic64_t *) + */ +#define FS_ATOMIC_V(a) \ + rte_atomic64_sub(&(a), 1) + +/** + * s: (struct sub_device *) + * i: uint16_t qid + */ +#define FS_ATOMIC_RX(s, i) \ + rte_atomic64_read( \ + &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ + ) +/** + * s: (struct sub_device *) + * i: uint16_t qid + */ +#define FS_ATOMIC_TX(s, i) \ + rte_atomic64_read( \ + &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ + ) + #ifndef NDEBUG #include #define DEBUG__(m, ...) \ @@ -274,33 +318,45 @@ fs_find_next(struct rte_eth_dev *dev, uint8_t sid, return sid; } +/* + * Switch emitting device. + * If banned is set, banned must not be considered for + * the role of emitting device. + */ static inline void -fs_switch_dev(struct rte_eth_dev *dev) +fs_switch_dev(struct rte_eth_dev *dev, + struct sub_device *banned) { + struct sub_device *txd; enum dev_state req_state; req_state = PRIV(dev)->state; - if (PREFERRED_SUBDEV(dev)->state >= req_state) { - if (TX_SUBDEV(dev) != PREFERRED_SUBDEV(dev) && - (TX_SUBDEV(dev) == NULL || + txd = TX_SUBDEV(dev); + if (PREFERRED_SUBDEV(dev)->state >= req_state && + PREFERRED_SUBDEV(dev) != banned) { + if (txd != PREFERRED_SUBDEV(dev) && + (txd == NULL || (req_state == DEV_STARTED) || - (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < DEV_STARTED))) { + (txd && txd->state < DEV_STARTED))) { DEBUG("Switching tx_dev to preferred sub_device"); PRIV(dev)->subs_tx = 0; } - } else if ((TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) || - TX_SUBDEV(dev) == NULL) { + } else if ((txd && txd->state < req_state) || + txd == NULL || + txd == banned) { struct sub_device *sdev; uint8_t i; /* Using acceptable device */ FOREACH_SUBDEV_ST(sdev, i, dev, req_state) { + if (sdev == banned) + continue; DEBUG("Switching tx_dev to sub_device %d", i); PRIV(dev)->subs_tx = i; break; } - } else if (TX_SUBDEV(dev) && TX_SUBDEV(dev)->state < req_state) { + } else if (txd && txd->state < req_state) { DEBUG("No device ready, deactivating tx_dev"); PRIV(dev)->subs_tx = PRIV(dev)->subs_tail; } else { diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c index c15025f..82a8c4e 100644 --- a/drivers/net/failsafe/failsafe_rxtx.c +++ b/drivers/net/failsafe/failsafe_rxtx.c @@ -33,6 +33,7 @@ #include +#include #include #include @@ -113,8 +114,10 @@ failsafe_rx_burst(void *queue, if (unlikely(fs_rx_unsafe(sdev))) continue; sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid]; + FS_ATOMIC_P(rxq->refcnt[sdev->sid]); nb_rx = ETH(sdev)-> rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts); + FS_ATOMIC_V(rxq->refcnt[sdev->sid]); if (nb_rx) { rxq->last_polled = i; return nb_rx; @@ -147,8 +150,10 @@ failsafe_rx_burst_fast(void *queue, sdev = &priv->subs[i]; assert(!fs_rx_unsafe(sdev)); sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid]; + FS_ATOMIC_P(rxq->refcnt[sdev->sid]); nb_rx = ETH(sdev)-> rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts); + FS_ATOMIC_V(rxq->refcnt[sdev->sid]); if (nb_rx) { rxq->last_polled = i; return nb_rx; @@ -165,13 +170,17 @@ failsafe_tx_burst(void *queue, struct sub_device *sdev; struct txq *txq; void *sub_txq; + uint16_t nb_tx; txq = queue; sdev = TX_SUBDEV(txq->priv->dev); if (unlikely(fs_tx_unsafe(sdev))) return 0; sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; - return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_P(txq->refcnt[sdev->sid]); + nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_V(txq->refcnt[sdev->sid]); + return nb_tx; } uint16_t @@ -182,10 +191,14 @@ failsafe_tx_burst_fast(void *queue, struct sub_device *sdev; struct txq *txq; void *sub_txq; + uint16_t nb_tx; txq = queue; sdev = TX_SUBDEV(txq->priv->dev); assert(!fs_tx_unsafe(sdev)); sub_txq = ETH(sdev)->data->tx_queues[txq->qid]; - return ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_P(txq->refcnt[sdev->sid]); + nb_tx = ETH(sdev)->tx_pkt_burst(sub_txq, tx_pkts, nb_pkts); + FS_ATOMIC_V(txq->refcnt[sdev->sid]); + return nb_tx; } -- 2.1.4