All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] net/mlx4: implement isolated mode from flow API
@ 2017-05-25 13:02 Vasily Philipov
  2017-05-25 13:02 ` [PATCH 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
                   ` (27 more replies)
  0 siblings, 28 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 13:02 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration,
the default RSS ring isn't created in this case.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
The series depends on:

http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
---
 drivers/net/mlx4/mlx4.c      | 431 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx4/mlx4.h      |  18 +-
 drivers/net/mlx4/mlx4_flow.c |  39 ++++
 drivers/net/mlx4/mlx4_flow.h |   4 +
 4 files changed, 364 insertions(+), 128 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index ec4419a..fc995c1 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -533,13 +533,96 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new created strucutre will be on the head of priv parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent)
+		return -ENOMEM;
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return -ret;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return 0;
+}
+
+/**
+ * Cleanup RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	if (parent) {
+		LIST_REMOVE(parent, next);
+		rxq_cleanup(parent);
+		rte_free(parent);
+	}
+}
+
+/**
+ * Clean up parent structures from the parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parents_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents)) {
+		struct rxq *parent = LIST_FIRST(&priv->parents);
+
+		rxq_parent_cleanup(parent);
+	}
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -569,7 +652,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -588,7 +671,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -613,7 +696,9 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
+	if (priv->isolated)
+		return 0;
+	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
 	/* Failure, rollback. */
@@ -2499,11 +2584,12 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2570,7 +2656,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -2748,12 +2834,13 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->rd != NULL) {
@@ -3330,15 +3417,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3368,7 +3458,7 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
@@ -3377,7 +3467,7 @@ struct txq_mp2mr_mbuf_check_data {
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3413,13 +3503,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3451,7 +3535,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3464,6 +3548,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3471,12 +3557,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3485,9 +3565,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3495,8 +3572,15 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3562,6 +3646,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3589,6 +3675,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (!priv->isolated && (parent || !priv->rss))  {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3606,14 +3802,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3621,17 +3824,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3711,45 +3912,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if ((parent) || (!priv->rss))  {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3760,29 +3922,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3794,21 +3941,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3846,6 +3983,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3880,9 +4018,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss && !priv->isolated) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3919,7 +4064,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -3970,8 +4114,11 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4053,8 +4200,11 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4188,7 +4338,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4481,6 +4631,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4514,6 +4666,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4561,6 +4719,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4569,7 +4733,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4609,12 +4773,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4641,6 +4805,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4649,7 +4819,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4693,8 +4863,8 @@ struct txq_mp2mr_mbuf_check_data {
 		priv_unlock(priv);
 		return;
 	}
-	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+	if (priv->rss && !priv->isolated) {
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4832,7 +5002,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5003,7 +5173,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5011,7 +5181,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5025,7 +5195,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5033,7 +5203,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5067,6 +5237,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5079,6 +5255,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 9a3bae9..b5fe1b4 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -246,6 +247,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -334,12 +339,12 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -348,10 +353,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..3fd2716 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,45 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * RSS action is possible only if this mode was requested.
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..6afc57f 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -83,6 +83,10 @@ struct rte_flow *
 int
 mlx4_flow_flush(struct rte_eth_dev *dev,
 		struct rte_flow_error *error);
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
 
 /** Structure to pass to the conversion function. */
 struct mlx4_flow {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 2/3] net/mlx4: support for the RSS flow action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-05-25 13:02 ` Vasily Philipov
  2017-05-25 13:02 ` [PATCH 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
                   ` (26 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 13:02 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The isolated mode should be enabled.
The number of queues in RSS ring must be power of 2.
The sharing a queue between several RSS rings is impossible.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |   2 +-
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 196 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 200 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index fc995c1..b51fef4 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -556,7 +556,7 @@ void priv_unlock(struct priv *priv)
  * @return
  *   0 on success, negative errno value on failure.
  */
-static int
+int
 priv_create_parent(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index b5fe1b4..e95e3b5 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -370,4 +370,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 3fd2716..55d65b1 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,80 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns a existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_get_parent(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	int ret;
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int overlap = 0;
+
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j)
+				if (i != j &&
+				    parent->rss.queues[j] == queues[i]) {
+					goto error;
+				} else if (parent->rss.queues[j] == queues[i]) {
+					++overlap;
+					break;
+				}
+		}
+		if (overlap == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap > 0)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp) {
+			goto error;
+		}
+	}
+	ret = priv_create_parent(priv, queues, children_n);
+	if (ret) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return LIST_FIRST(&priv->parents);
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -831,9 +976,41 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
-
-		qp = rxq->qp;
+		int ret;
+		unsigned int i;
+		struct rxq *rxq = NULL;
+		struct rxq *rxq_parent = NULL;
+
+		if (action->queues_n > 1) {
+			rxq_parent = priv_get_parent(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
+				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rxq_parent_cleanup(rxq_parent);
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
+			}
+		}
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -909,11 +1086,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 6afc57f..823d3b6 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -97,7 +97,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 3/3] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
  2017-05-25 13:02 ` [PATCH 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
@ 2017-05-25 13:02 ` Vasily Philipov
  2017-05-25 14:05 ` [PATCH v2 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (25 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 13:02 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

In case if --isolated-mode parameter was added to the command line
the rte flow isolate action will be applied on each port before
the device configuring.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..c21a117 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+                       if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+                               isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index d1041af..610e675 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -267,6 +267,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1422,6 +1427,15 @@ static void eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e6c43ba..dbe9898 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v2 1/3] net/mlx4: implement isolated mode from flow API
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
  2017-05-25 13:02 ` [PATCH 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
  2017-05-25 13:02 ` [PATCH 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-05-25 14:05 ` Vasily Philipov
  2017-05-25 14:05 ` [PATCH v2 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
                   ` (24 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 14:05 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration,
the default RSS ring isn't created in this case.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 431 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx4/mlx4.h      |  18 +-
 drivers/net/mlx4/mlx4_flow.c |  39 ++++
 drivers/net/mlx4/mlx4_flow.h |   4 +
 4 files changed, 364 insertions(+), 128 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index ec4419a..fc995c1 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -533,13 +533,96 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new created strucutre will be on the head of priv parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent)
+		return -ENOMEM;
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return -ret;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return 0;
+}
+
+/**
+ * Cleanup RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	if (parent) {
+		LIST_REMOVE(parent, next);
+		rxq_cleanup(parent);
+		rte_free(parent);
+	}
+}
+
+/**
+ * Clean up parent structures from the parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parents_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents)) {
+		struct rxq *parent = LIST_FIRST(&priv->parents);
+
+		rxq_parent_cleanup(parent);
+	}
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -569,7 +652,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -588,7 +671,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -613,7 +696,9 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
+	if (priv->isolated)
+		return 0;
+	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
 	/* Failure, rollback. */
@@ -2499,11 +2584,12 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2570,7 +2656,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -2748,12 +2834,13 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->rd != NULL) {
@@ -3330,15 +3417,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3368,7 +3458,7 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
@@ -3377,7 +3467,7 @@ struct txq_mp2mr_mbuf_check_data {
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3413,13 +3503,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3451,7 +3535,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3464,6 +3548,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3471,12 +3557,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3485,9 +3565,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3495,8 +3572,15 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3562,6 +3646,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3589,6 +3675,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (!priv->isolated && (parent || !priv->rss))  {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3606,14 +3802,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3621,17 +3824,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3711,45 +3912,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if ((parent) || (!priv->rss))  {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3760,29 +3922,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3794,21 +3941,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3846,6 +3983,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3880,9 +4018,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss && !priv->isolated) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3919,7 +4064,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -3970,8 +4114,11 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4053,8 +4200,11 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4188,7 +4338,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4481,6 +4631,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4514,6 +4666,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4561,6 +4719,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4569,7 +4733,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4609,12 +4773,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4641,6 +4805,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4649,7 +4819,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4693,8 +4863,8 @@ struct txq_mp2mr_mbuf_check_data {
 		priv_unlock(priv);
 		return;
 	}
-	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+	if (priv->rss && !priv->isolated) {
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4832,7 +5002,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5003,7 +5173,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5011,7 +5181,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5025,7 +5195,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5033,7 +5203,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5067,6 +5237,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5079,6 +5255,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 9a3bae9..b5fe1b4 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -246,6 +247,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -334,12 +339,12 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -348,10 +353,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..3fd2716 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,45 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * RSS action is possible only if this mode was requested.
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..6afc57f 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -83,6 +83,10 @@ struct rte_flow *
 int
 mlx4_flow_flush(struct rte_eth_dev *dev,
 		struct rte_flow_error *error);
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
 
 /** Structure to pass to the conversion function. */
 struct mlx4_flow {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v2 2/3] net/mlx4: support for the RSS flow action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (2 preceding siblings ...)
  2017-05-25 14:05 ` [PATCH v2 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-05-25 14:05 ` Vasily Philipov
  2017-05-25 14:05 ` [PATCH v2 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
                   ` (23 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 14:05 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The isolated mode should be enabled.
The number of queues in RSS ring must be power of 2.
The sharing a queue between several RSS rings is impossible.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |   2 +-
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 195 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 199 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index fc995c1..b51fef4 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -556,7 +556,7 @@ void priv_unlock(struct priv *priv)
  * @return
  *   0 on success, negative errno value on failure.
  */
-static int
+int
 priv_create_parent(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index b5fe1b4..e95e3b5 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -370,4 +370,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 3fd2716..8bc911e 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,79 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns a existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_get_parent(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	int ret;
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int overlap = 0;
+
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j)
+				if (i != j &&
+				    parent->rss.queues[j] == queues[i]) {
+					goto error;
+				} else if (parent->rss.queues[j] == queues[i]) {
+					++overlap;
+					break;
+				}
+		}
+		if (overlap == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap > 0)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp)
+			goto error;
+	}
+	ret = priv_create_parent(priv, queues, children_n);
+	if (ret) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return LIST_FIRST(&priv->parents);
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -831,9 +975,41 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
-
-		qp = rxq->qp;
+		int ret;
+		unsigned int i;
+		struct rxq *rxq = NULL;
+		struct rxq *rxq_parent = NULL;
+
+		if (action->queues_n > 1) {
+			rxq_parent = priv_get_parent(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
+				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rxq_parent_cleanup(rxq_parent);
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
+			}
+		}
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -909,11 +1085,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 6afc57f..823d3b6 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -97,7 +97,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v2 3/3] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (3 preceding siblings ...)
  2017-05-25 14:05 ` [PATCH v2 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
@ 2017-05-25 14:05 ` Vasily Philipov
  2017-05-25 14:10 ` [PATCH v3 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (22 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 14:05 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

In case if --isolated-mode parameter was added to the command line
the rte flow isolate action will be applied on each port before
the device configuring.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..e313871 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+			if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+				isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index d1041af..610e675 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -267,6 +267,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1422,6 +1427,15 @@ static void eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e6c43ba..dbe9898 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v3 1/3] net/mlx4: implement isolated mode from flow API
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (4 preceding siblings ...)
  2017-05-25 14:05 ` [PATCH v2 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-05-25 14:10 ` Vasily Philipov
  2017-05-25 14:10 ` [PATCH v3 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
                   ` (21 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 14:10 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration,
the default RSS ring isn't created in this case.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
The series depends on:

http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
---
 drivers/net/mlx4/mlx4.c      | 431 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx4/mlx4.h      |  18 +-
 drivers/net/mlx4/mlx4_flow.c |  39 ++++
 drivers/net/mlx4/mlx4_flow.h |   4 +
 4 files changed, 364 insertions(+), 128 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index ec4419a..fc995c1 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -533,13 +533,96 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new created strucutre will be on the head of priv parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent)
+		return -ENOMEM;
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return -ret;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return 0;
+}
+
+/**
+ * Cleanup RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	if (parent) {
+		LIST_REMOVE(parent, next);
+		rxq_cleanup(parent);
+		rte_free(parent);
+	}
+}
+
+/**
+ * Clean up parent structures from the parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parents_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents)) {
+		struct rxq *parent = LIST_FIRST(&priv->parents);
+
+		rxq_parent_cleanup(parent);
+	}
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -569,7 +652,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -588,7 +671,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -613,7 +696,9 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
+	if (priv->isolated)
+		return 0;
+	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
 	/* Failure, rollback. */
@@ -2499,11 +2584,12 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2570,7 +2656,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -2748,12 +2834,13 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->rd != NULL) {
@@ -3330,15 +3417,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3368,7 +3458,7 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
@@ -3377,7 +3467,7 @@ struct txq_mp2mr_mbuf_check_data {
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3413,13 +3503,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3451,7 +3535,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3464,6 +3548,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3471,12 +3557,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3485,9 +3565,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3495,8 +3572,15 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3562,6 +3646,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3589,6 +3675,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (!priv->isolated && (parent || !priv->rss))  {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3606,14 +3802,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3621,17 +3824,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3711,45 +3912,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if ((parent) || (!priv->rss))  {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3760,29 +3922,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3794,21 +3941,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3846,6 +3983,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3880,9 +4018,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss && !priv->isolated) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3919,7 +4064,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -3970,8 +4114,11 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4053,8 +4200,11 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4188,7 +4338,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4481,6 +4631,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4514,6 +4666,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4561,6 +4719,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4569,7 +4733,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4609,12 +4773,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4641,6 +4805,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4649,7 +4819,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4693,8 +4863,8 @@ struct txq_mp2mr_mbuf_check_data {
 		priv_unlock(priv);
 		return;
 	}
-	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+	if (priv->rss && !priv->isolated) {
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4832,7 +5002,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5003,7 +5173,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5011,7 +5181,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5025,7 +5195,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5033,7 +5203,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5067,6 +5237,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5079,6 +5255,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 9a3bae9..b5fe1b4 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -246,6 +247,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -334,12 +339,12 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -348,10 +353,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..3fd2716 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,45 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * RSS action is possible only if this mode was requested.
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..6afc57f 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -83,6 +83,10 @@ struct rte_flow *
 int
 mlx4_flow_flush(struct rte_eth_dev *dev,
 		struct rte_flow_error *error);
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
 
 /** Structure to pass to the conversion function. */
 struct mlx4_flow {
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v3 2/3] net/mlx4: support for the RSS flow action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (5 preceding siblings ...)
  2017-05-25 14:10 ` [PATCH v3 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-05-25 14:10 ` Vasily Philipov
  2017-05-25 14:10 ` [PATCH v3 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
                   ` (20 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 14:10 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The isolated mode should be enabled.
The number of queues in RSS ring must be power of 2.
The sharing a queue between several RSS rings is impossible.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |   2 +-
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 195 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 199 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index fc995c1..b51fef4 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -556,7 +556,7 @@ void priv_unlock(struct priv *priv)
  * @return
  *   0 on success, negative errno value on failure.
  */
-static int
+int
 priv_create_parent(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index b5fe1b4..e95e3b5 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -370,4 +370,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 3fd2716..8bc911e 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,79 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns a existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_get_parent(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	int ret;
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int overlap = 0;
+
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j)
+				if (i != j &&
+				    parent->rss.queues[j] == queues[i]) {
+					goto error;
+				} else if (parent->rss.queues[j] == queues[i]) {
+					++overlap;
+					break;
+				}
+		}
+		if (overlap == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap > 0)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp)
+			goto error;
+	}
+	ret = priv_create_parent(priv, queues, children_n);
+	if (ret) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return LIST_FIRST(&priv->parents);
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -831,9 +975,41 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
-
-		qp = rxq->qp;
+		int ret;
+		unsigned int i;
+		struct rxq *rxq = NULL;
+		struct rxq *rxq_parent = NULL;
+
+		if (action->queues_n > 1) {
+			rxq_parent = priv_get_parent(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
+				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rxq_parent_cleanup(rxq_parent);
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
+			}
+		}
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -909,11 +1085,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 6afc57f..823d3b6 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -97,7 +97,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v3 3/3] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (6 preceding siblings ...)
  2017-05-25 14:10 ` [PATCH v3 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
@ 2017-05-25 14:10 ` Vasily Philipov
  2017-06-04 13:34 ` [PATCH v4 1/4] net/mlx4: RSS parent queues new method maintenance Vasily Philipov
                   ` (19 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-05-25 14:10 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

In case if --isolated-mode parameter was added to the command line
the rte flow isolate action will be applied on each port before
the device configuring.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..e313871 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+			if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+				isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index d1041af..610e675 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -267,6 +267,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1422,6 +1427,15 @@ static void eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e6c43ba..dbe9898 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v4 1/4] net/mlx4: RSS parent queues new method maintenance
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (7 preceding siblings ...)
  2017-05-25 14:10 ` [PATCH v3 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-06-04 13:34 ` Vasily Philipov
  2017-06-04 13:35 ` [PATCH v4 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (18 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-06-04 13:34 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Insert just created parent queue in a list, keep the list in private
structure.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
The series depends on:

http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
---
 drivers/net/mlx4/mlx4.c | 375 +++++++++++++++++++++++++++++++++---------------
 drivers/net/mlx4/mlx4.h |  17 ++-
 2 files changed, 273 insertions(+), 119 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index ec4419a..2fdc889 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -533,13 +533,94 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new created strucutre will be on the head of priv parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent)
+		return -ENOMEM;
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return -ret;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return 0;
+}
+
+/**
+ * Cleanup RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	LIST_REMOVE(parent, next);
+	rxq_cleanup(parent);
+	rte_free(parent);
+}
+
+/**
+ * Clean up parent structures from the parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parents_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents)) {
+		struct rxq *parent = LIST_FIRST(&priv->parents);
+
+		rxq_parent_cleanup(parent);
+	}
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -588,7 +669,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -613,7 +694,7 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
+	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
 	/* Failure, rollback. */
@@ -2503,7 +2584,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2570,7 +2651,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -2752,8 +2833,9 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->rd != NULL) {
@@ -3330,15 +3412,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3368,7 +3453,7 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
@@ -3377,7 +3462,7 @@ struct txq_mp2mr_mbuf_check_data {
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3413,13 +3498,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3464,6 +3543,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3471,12 +3552,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3485,9 +3560,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3495,6 +3567,13 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
 	if (!priv->rss) {
 		rxq_mac_addrs_add(&tmpl);
@@ -3562,6 +3641,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3589,6 +3670,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (parent || !priv->rss) {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3606,14 +3797,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3621,17 +3819,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3711,45 +3907,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if ((parent) || (!priv->rss))  {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3760,29 +3917,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3794,21 +3936,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3846,6 +3978,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3880,9 +4013,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3919,7 +4059,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -3971,7 +4110,7 @@ struct txq_mp2mr_mbuf_check_data {
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
 	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4054,7 +4193,7 @@ struct txq_mp2mr_mbuf_check_data {
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
 	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4188,7 +4327,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4569,7 +4708,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4614,7 +4753,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4649,7 +4788,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4694,7 +4833,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -5003,7 +5142,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5011,7 +5150,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5025,7 +5164,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5033,7 +5172,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 9a3bae9..fd24888 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -246,6 +247,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -339,7 +344,6 @@ struct priv {
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -348,10 +352,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v4 2/4] net/mlx4: implement isolated mode from flow API
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (8 preceding siblings ...)
  2017-06-04 13:34 ` [PATCH v4 1/4] net/mlx4: RSS parent queues new method maintenance Vasily Philipov
@ 2017-06-04 13:35 ` Vasily Philipov
  2017-06-04 13:35 ` [PATCH v4 3/4] net/mlx4: support for the RSS flow action Vasily Philipov
                   ` (17 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-06-04 13:35 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration,
the default RSS ring isn't created in this case.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 58 +++++++++++++++++++++++++++++++++++---------
 drivers/net/mlx4/mlx4.h      |  1 +
 drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  5 ++++
 4 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 2fdc889..9cf2064 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -650,7 +650,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -694,6 +694,8 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
+	if (priv->isolated)
+		return 0;
 	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
@@ -2580,6 +2582,7 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
@@ -2829,7 +2832,7 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
@@ -3530,7 +3533,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3575,7 +3578,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return err;
 	}
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3734,7 +3737,7 @@ struct txq_mp2mr_mbuf_check_data {
 		      strerror(ret));
 		return ret;
 	}
-	if (parent || !priv->rss) {
+	if (!priv->isolated && (parent || !priv->rss)) {
 		/* Configure MAC and broadcast addresses. */
 		ret = rxq_mac_addrs_add(rxq);
 		if (ret) {
@@ -4013,7 +4016,7 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (priv->rss) {
+	if (priv->rss && !priv->isolated) {
 		/* The list consists of the single default one. */
 		parent = LIST_FIRST(&priv->parents);
 		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
@@ -4109,7 +4112,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
@@ -4192,7 +4198,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
@@ -4620,6 +4629,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4653,6 +4664,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4700,6 +4717,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4748,7 +4771,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
@@ -4780,6 +4803,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4832,7 +4861,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv_unlock(priv);
 		return;
 	}
-	if (priv->rss) {
+	if (priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
@@ -4971,7 +5000,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5206,6 +5235,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5218,6 +5253,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index fd24888..b5fe1b4 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -339,6 +339,7 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..3fd2716 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,45 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * RSS action is possible only if this mode was requested.
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..4d007da 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -90,6 +90,11 @@ struct mlx4_flow {
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 };
 
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
+
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v4 3/4] net/mlx4: support for the RSS flow action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (9 preceding siblings ...)
  2017-06-04 13:35 ` [PATCH v4 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-06-04 13:35 ` Vasily Philipov
  2017-06-04 13:35 ` [PATCH v4 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
                   ` (16 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-06-04 13:35 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The isolated mode should be enabled.
The number of queues in RSS ring must be power of 2.
The sharing a queue between several RSS rings is impossible.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |  21 +++--
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 197 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 9cf2064..fa7b768 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -554,9 +554,9 @@ void priv_unlock(struct priv *priv)
  *   The number of entries in queues[].
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   Pointer to a parent rxq structure, NULL on failure.
  */
-static int
+struct rxq *
 priv_create_parent(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
@@ -568,13 +568,15 @@ void priv_unlock(struct priv *priv)
 	parent = rte_zmalloc("parent queue",
 			     sizeof(*parent),
 			     RTE_CACHE_LINE_SIZE);
-	if (!parent)
-		return -ENOMEM;
+	if (!parent) {
+		ERROR("cannot allocate memory for RSS parent queue");
+		return NULL;
+	}
 	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
 			NULL, NULL, children_n, NULL);
 	if (ret) {
 		rte_free(parent);
-		return -ret;
+		return NULL;
 	}
 	parent->rss.queues_n = children_n;
 	if (queues) {
@@ -587,7 +589,7 @@ void priv_unlock(struct priv *priv)
 			parent->rss.queues[i] = i;
 	}
 	LIST_INSERT_HEAD(&priv->parents, parent, next);
-	return 0;
+	return parent;
 }
 
 /**
@@ -639,7 +641,6 @@ void priv_unlock(struct priv *priv)
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
 	unsigned int tmp;
-	int ret;
 
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
@@ -696,14 +697,12 @@ void priv_unlock(struct priv *priv)
 	priv->rxqs_n = rxqs_n;
 	if (priv->isolated)
 		return 0;
-	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
-	if (!ret)
+	if (priv_create_parent(priv, NULL, priv->rxqs_n))
 		return 0;
 	/* Failure, rollback. */
 	priv->rss = 0;
 	priv->rxqs_n = tmp;
-	assert(ret > 0);
-	return ret;
+	return ENOMEM;
 }
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index b5fe1b4..f45e017 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -370,4 +370,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+struct rxq *
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 3fd2716..9c0fba1 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,82 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns an existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_get_parent(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int same = 0;
+		unsigned int overlap = 0;
+
+		/*
+		 * Find out whether an appropriate parent queue already exists
+		 * and can be reused, otherwise make sure there are no overlaps.
+		 */
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j) {
+				if (parent->rss.queues[j] != queues[i])
+					continue;
+				++overlap;
+				if (i == j)
+					++same;
+			}
+		}
+		if (same == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp)
+			goto error;
+	}
+	parent = priv_create_parent(priv, queues, children_n);
+	if (!parent) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return parent;
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -819,6 +966,7 @@ struct rte_flow_drop {
 {
 	struct ibv_qp *qp;
 	struct rte_flow *rte_flow;
+	struct rxq *rxq_parent = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -831,9 +979,39 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
+		int ret;
+		unsigned int i;
+		struct rxq *rxq = NULL;
 
-		qp = rxq->qp;
+		if (action->queues_n > 1) {
+			rxq_parent = priv_get_parent(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
+				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
+			}
+		}
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -846,6 +1024,8 @@ struct rte_flow_drop {
 	return rte_flow;
 
 error:
+	if (rxq_parent)
+		rxq_parent_cleanup(rxq_parent);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -909,11 +1089,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 4d007da..beabcf2 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -98,7 +98,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v4 4/4] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (10 preceding siblings ...)
  2017-06-04 13:35 ` [PATCH v4 3/4] net/mlx4: support for the RSS flow action Vasily Philipov
@ 2017-06-04 13:35 ` Vasily Philipov
  2017-06-20  1:26   ` Wu, Jingjing
  2017-06-28 14:03 ` [PATCH v5 1/4] net/mlx4: RSS parent queues new method maintenance Vasily Philipov
                   ` (15 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-06-04 13:35 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

In case if --isolated-mode parameter was added to the command line
the rte flow isolate action will be applied on each port before
the device configuring.

Providing this parameter requests isolated mode from the flow API on all
ports at initialization time in order to ensure that all traffic is
received through the configured flow rules only (see flow command).

This automatically discards ports that do not support this mode.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..e313871 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+			if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+				isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index d1041af..610e675 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -267,6 +267,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1422,6 +1427,15 @@ static void eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e6c43ba..dbe9898 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH v4 4/4] app/testpmd: add isolated mode parameter
  2017-06-04 13:35 ` [PATCH v4 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-06-20  1:26   ` Wu, Jingjing
  2017-06-21  9:43     ` Vasily Philipov
  0 siblings, 1 reply; 51+ messages in thread
From: Wu, Jingjing @ 2017-06-20  1:26 UTC (permalink / raw)
  To: Vasily Philipov, dev; +Cc: Adrien Mazarguil, Nelio Laranjeiro


> +/*
>   * Avoids to check link status when starting/stopping a port.
>   */
>  uint8_t no_link_check = 0; /* check by default */
> @@ -1422,6 +1427,15 @@ static void eth_event_callback(uint8_t port_id,
>  		if (port->need_reconfig > 0) {
>  			port->need_reconfig = 0;
> 
> +			if (isolated_mode) {
> +				int ret = port_flow_isolate(pi, 1);
> +				if (ret) {
> +					printf("Failed to apply isolated"
> +					       " mode on port %d\n", pi);
> +					return -1;
> +				}
> +			}
> +
Should it block the app startup if isolated-mode setting fails?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v4 4/4] app/testpmd: add isolated mode parameter
  2017-06-20  1:26   ` Wu, Jingjing
@ 2017-06-21  9:43     ` Vasily Philipov
  2017-06-22  1:13       ` Wu, Jingjing
  0 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-06-21  9:43 UTC (permalink / raw)
  To: Wu, Jingjing, dev; +Cc: Adrien Mazarguil, Nélio Laranjeiro



> -----Original Message-----
> From: Wu, Jingjing [mailto:jingjing.wu@intel.com]
> Sent: Tuesday, June 20, 2017 04:27
> To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>
> Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode
> parameter
> 
> 
> > +/*
> >   * Avoids to check link status when starting/stopping a port.
> >   */
> >  uint8_t no_link_check = 0; /* check by default */ @@ -1422,6 +1427,15
> > @@ static void eth_event_callback(uint8_t port_id,
> >  		if (port->need_reconfig > 0) {
> >  			port->need_reconfig = 0;
> >
> > +			if (isolated_mode) {
> > +				int ret = port_flow_isolate(pi, 1);
> > +				if (ret) {
> > +					printf("Failed to apply isolated"
> > +					       " mode on port %d\n", pi);
> > +					return -1;
> > +				}
> > +			}
> > +
> Should it block the app startup if isolated-mode setting fails?

if isolated mode cannot be enabled on any port, that port cannot be initialized
and that causes testpmd to quit, at least it won't go against the user's wishes

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v4 4/4] app/testpmd: add isolated mode parameter
  2017-06-21  9:43     ` Vasily Philipov
@ 2017-06-22  1:13       ` Wu, Jingjing
  2017-06-26  5:53         ` Vasily Philipov
  2017-06-27  8:28         ` Thomas Monjalon
  0 siblings, 2 replies; 51+ messages in thread
From: Wu, Jingjing @ 2017-06-22  1:13 UTC (permalink / raw)
  To: Vasily Philipov, dev; +Cc: Adrien Mazarguil, Nélio Laranjeiro



> -----Original Message-----
> From: Vasily Philipov [mailto:vasilyf@mellanox.com]
> Sent: Wednesday, June 21, 2017 5:44 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; dev@dpdk.org
> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>
> Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode
> parameter
> 
> 
> 
> > -----Original Message-----
> > From: Wu, Jingjing [mailto:jingjing.wu@intel.com]
> > Sent: Tuesday, June 20, 2017 04:27
> > To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
> > Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> > <nelio.laranjeiro@6wind.com>
> > Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode
> > parameter
> >
> >
> > > +/*
> > >   * Avoids to check link status when starting/stopping a port.
> > >   */
> > >  uint8_t no_link_check = 0; /* check by default */ @@ -1422,6
> > > +1427,15 @@ static void eth_event_callback(uint8_t port_id,
> > >  		if (port->need_reconfig > 0) {
> > >  			port->need_reconfig = 0;
> > >
> > > +			if (isolated_mode) {
> > > +				int ret = port_flow_isolate(pi, 1);
> > > +				if (ret) {
> > > +					printf("Failed to apply isolated"
> > > +					       " mode on port %d\n", pi);
> > > +					return -1;
> > > +				}
> > > +			}
> > > +
> > Should it block the app startup if isolated-mode setting fails?
> 
> if isolated mode cannot be enabled on any port, that port cannot be initialized
> and that causes testpmd to quit, at least it won't go against the user's wishes

If so, I prefer the isolated_mode to be port's argument but not global one.
How about to add a command to configure the isolate mode?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v4 4/4] app/testpmd: add isolated mode parameter
  2017-06-22  1:13       ` Wu, Jingjing
@ 2017-06-26  5:53         ` Vasily Philipov
  2017-06-27  8:28         ` Thomas Monjalon
  1 sibling, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-06-26  5:53 UTC (permalink / raw)
  To: Wu, Jingjing, dev; +Cc: Adrien Mazarguil, Nélio Laranjeiro



> -----Original Message-----
> From: Wu, Jingjing [mailto:jingjing.wu@intel.com]
> Sent: Thursday, June 22, 2017 04:13
> To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>	
> Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode
> parameter
> 
> 
> 
> > -----Original Message-----
> > From: Vasily Philipov [mailto:vasilyf@mellanox.com]
> > Sent: Wednesday, June 21, 2017 5:44 PM
> > To: Wu, Jingjing <jingjing.wu@intel.com>; dev@dpdk.org
> > Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> > <nelio.laranjeiro@6wind.com>
> > Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode
> > parameter
> >
> >
> >
> > > -----Original Message-----
> > > From: Wu, Jingjing [mailto:jingjing.wu@intel.com]
> > > Sent: Tuesday, June 20, 2017 04:27
> > > To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
> > > Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> > > <nelio.laranjeiro@6wind.com>
> > > Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated
> > > mode parameter
> > >
> > >
> > > > +/*
> > > >   * Avoids to check link status when starting/stopping a port.
> > > >   */
> > > >  uint8_t no_link_check = 0; /* check by default */ @@ -1422,6
> > > > +1427,15 @@ static void eth_event_callback(uint8_t port_id,
> > > >  		if (port->need_reconfig > 0) {
> > > >  			port->need_reconfig = 0;
> > > >
> > > > +			if (isolated_mode) {
> > > > +				int ret = port_flow_isolate(pi, 1);
> > > > +				if (ret) {
> > > > +					printf("Failed to apply isolated"
> > > > +					       " mode on port %d\n", pi);
> > > > +					return -1;
> > > > +				}
> > > > +			}
> > > > +
> > > Should it block the app startup if isolated-mode setting fails?
> >
> > if isolated mode cannot be enabled on any port, that port cannot be
> > initialized and that causes testpmd to quit, at least it won't go
> > against the user's wishes
> 
> If so, I prefer the isolated_mode to be port's argument but not global one.

Could you please provide some example of such argument - I will take it as a reference...


> How about to add a command to configure the isolate mode?

This is already exists - just type "flow isolate {port_id} {boolean}" from testpmd prompt 
Please see at:

http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v4 4/4] app/testpmd: add isolated mode parameter
  2017-06-22  1:13       ` Wu, Jingjing
  2017-06-26  5:53         ` Vasily Philipov
@ 2017-06-27  8:28         ` Thomas Monjalon
  2017-06-29  5:52           ` Wu, Jingjing
  1 sibling, 1 reply; 51+ messages in thread
From: Thomas Monjalon @ 2017-06-27  8:28 UTC (permalink / raw)
  To: Wu, Jingjing
  Cc: dev, Vasily Philipov, Adrien Mazarguil, Nélio Laranjeiro

22/06/2017 03:13, Wu, Jingjing:
> From: Vasily Philipov [mailto:vasilyf@mellanox.com]
> > From: Wu, Jingjing [mailto:jingjing.wu@intel.com]
> > >
> > > > +/*
> > > >   * Avoids to check link status when starting/stopping a port.
> > > >   */
> > > >  uint8_t no_link_check = 0; /* check by default */ @@ -1422,6
> > > > +1427,15 @@ static void eth_event_callback(uint8_t port_id,
> > > >  		if (port->need_reconfig > 0) {
> > > >  			port->need_reconfig = 0;
> > > >
> > > > +			if (isolated_mode) {
> > > > +				int ret = port_flow_isolate(pi, 1);
> > > > +				if (ret) {
> > > > +					printf("Failed to apply isolated"
> > > > +					       " mode on port %d\n", pi);
> > > > +					return -1;
> > > > +				}
> > > > +			}
> > > > +
> > > Should it block the app startup if isolated-mode setting fails?
> > 
> > if isolated mode cannot be enabled on any port, that port cannot be initialized
> > and that causes testpmd to quit, at least it won't go against the user's wishes
> 
> If so, I prefer the isolated_mode to be port's argument but not global one.
> How about to add a command to configure the isolate mode?

There is already a command to configure isolate mode per-port:
	http://dpdk.org/patch/25320
	http://dpdk.org/doc/guides/testpmd_app_ug/testpmd_funcs.html#flow-syntax

I think it does not make sense to replicate this per-port command in
command line parameters.
All the other parameters are global:
	http://dpdk.org/doc/guides/testpmd_app_ug/run_app.html#testpmd-command-line-options
The idea here is to have a global isolate mode with a general option.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH v5 1/4] net/mlx4: RSS parent queues new method maintenance
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (11 preceding siblings ...)
  2017-06-04 13:35 ` [PATCH v4 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-06-28 14:03 ` Vasily Philipov
  2017-06-29 16:51   ` Adrien Mazarguil
  2017-06-28 14:03 ` [PATCH v5 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (14 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-06-28 14:03 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Insert just created parent queue in a list, keep the list in private
structure.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
The series depends on:

http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/
---
 drivers/net/mlx4/mlx4.c | 377 +++++++++++++++++++++++++++++++++---------------
 drivers/net/mlx4/mlx4.h |  17 ++-
 2 files changed, 274 insertions(+), 120 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index ec4419a..9f3c746 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -533,13 +533,94 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new created strucutre will be on the head of priv parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent)
+		return -ENOMEM;
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return -ret;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return 0;
+}
+
+/**
+ * Cleanup RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	LIST_REMOVE(parent, next);
+	rxq_cleanup(parent);
+	rte_free(parent);
+}
+
+/**
+ * Clean up parent structures from the parents list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parents_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents)) {
+		struct rxq *parent = LIST_FIRST(&priv->parents);
+
+		rxq_parent_cleanup(parent);
+	}
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -588,7 +669,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -613,7 +694,7 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
+	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
 	/* Failure, rollback. */
@@ -2503,7 +2584,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2570,7 +2651,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -2752,8 +2833,9 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->rd != NULL) {
@@ -3330,15 +3412,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3368,16 +3453,16 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
 		attr.qpg.parent_attrib.rss_child_count =
-			rte_align32pow2(priv->rxqs_n + 1) >> 1;
+			rte_align32pow2(children_n + 1) >> 1;
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3413,13 +3498,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3464,6 +3543,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3471,12 +3552,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3485,9 +3560,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3495,6 +3567,13 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
 	if (!priv->rss) {
 		rxq_mac_addrs_add(&tmpl);
@@ -3562,6 +3641,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3589,6 +3670,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (parent || !priv->rss) {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3606,14 +3797,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3621,17 +3819,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3711,45 +3907,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if ((parent) || (!priv->rss))  {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3760,29 +3917,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3794,21 +3936,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3846,6 +3978,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3880,9 +4013,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3919,7 +4059,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -3971,7 +4110,7 @@ struct txq_mp2mr_mbuf_check_data {
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
 	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4054,7 +4193,7 @@ struct txq_mp2mr_mbuf_check_data {
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
 	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4188,7 +4327,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parents_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4569,7 +4708,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4614,7 +4753,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4649,7 +4788,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4694,7 +4833,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -5003,7 +5142,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5011,7 +5150,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5025,7 +5164,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5033,7 +5172,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 9a3bae9..fd24888 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -246,6 +247,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -339,7 +344,6 @@ struct priv {
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -348,10 +352,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v5 2/4] net/mlx4: implement isolated mode from flow API
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (12 preceding siblings ...)
  2017-06-28 14:03 ` [PATCH v5 1/4] net/mlx4: RSS parent queues new method maintenance Vasily Philipov
@ 2017-06-28 14:03 ` Vasily Philipov
  2017-06-29 16:52   ` Adrien Mazarguil
  2017-06-28 14:03 ` [PATCH v5 3/4] net/mlx4: support for the RSS flow action Vasily Philipov
                   ` (13 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-06-28 14:03 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration,
the default RSS ring isn't created in this case.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 58 +++++++++++++++++++++++++++++++++++---------
 drivers/net/mlx4/mlx4.h      |  1 +
 drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  5 ++++
 4 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 9f3c746..22fa7c6 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -650,7 +650,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -694,6 +694,8 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
+	if (priv->isolated)
+		return 0;
 	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
@@ -2580,6 +2582,7 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
@@ -2829,7 +2832,7 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
@@ -3530,7 +3533,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3575,7 +3578,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return err;
 	}
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3734,7 +3737,7 @@ struct txq_mp2mr_mbuf_check_data {
 		      strerror(ret));
 		return ret;
 	}
-	if (parent || !priv->rss) {
+	if (!priv->isolated && (parent || !priv->rss)) {
 		/* Configure MAC and broadcast addresses. */
 		ret = rxq_mac_addrs_add(rxq);
 		if (ret) {
@@ -4013,7 +4016,7 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (priv->rss) {
+	if (priv->rss && !priv->isolated) {
 		/* The list consists of the single default one. */
 		parent = LIST_FIRST(&priv->parents);
 		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
@@ -4109,7 +4112,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
@@ -4192,7 +4198,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
@@ -4620,6 +4629,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4653,6 +4664,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4700,6 +4717,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4748,7 +4771,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
@@ -4780,6 +4803,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4832,7 +4861,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv_unlock(priv);
 		return;
 	}
-	if (priv->rss) {
+	if (priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
@@ -4971,7 +5000,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5206,6 +5235,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5218,6 +5253,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index fd24888..b5fe1b4 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -339,6 +339,7 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..3fd2716 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,45 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * RSS action is possible only if this mode was requested.
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..4d007da 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -90,6 +90,11 @@ struct mlx4_flow {
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 };
 
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
+
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v5 3/4] net/mlx4: support for the RSS flow action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (13 preceding siblings ...)
  2017-06-28 14:03 ` [PATCH v5 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-06-28 14:03 ` Vasily Philipov
  2017-06-29 16:53   ` Adrien Mazarguil
  2017-06-28 14:03 ` [PATCH v5 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
                   ` (12 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-06-28 14:03 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The isolated mode should be enabled.
The number of queues in RSS ring must be power of 2.
The sharing a queue between several RSS rings is impossible.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |  21 +++--
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 197 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 22fa7c6..6ab7241 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -554,9 +554,9 @@ void priv_unlock(struct priv *priv)
  *   The number of entries in queues[].
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   Pointer to a parent rxq structure, NULL on failure.
  */
-static int
+struct rxq *
 priv_create_parent(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
@@ -568,13 +568,15 @@ void priv_unlock(struct priv *priv)
 	parent = rte_zmalloc("parent queue",
 			     sizeof(*parent),
 			     RTE_CACHE_LINE_SIZE);
-	if (!parent)
-		return -ENOMEM;
+	if (!parent) {
+		ERROR("cannot allocate memory for RSS parent queue");
+		return NULL;
+	}
 	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
 			NULL, NULL, children_n, NULL);
 	if (ret) {
 		rte_free(parent);
-		return -ret;
+		return NULL;
 	}
 	parent->rss.queues_n = children_n;
 	if (queues) {
@@ -587,7 +589,7 @@ void priv_unlock(struct priv *priv)
 			parent->rss.queues[i] = i;
 	}
 	LIST_INSERT_HEAD(&priv->parents, parent, next);
-	return 0;
+	return parent;
 }
 
 /**
@@ -639,7 +641,6 @@ void priv_unlock(struct priv *priv)
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
 	unsigned int tmp;
-	int ret;
 
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
@@ -696,14 +697,12 @@ void priv_unlock(struct priv *priv)
 	priv->rxqs_n = rxqs_n;
 	if (priv->isolated)
 		return 0;
-	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
-	if (!ret)
+	if (priv_create_parent(priv, NULL, priv->rxqs_n))
 		return 0;
 	/* Failure, rollback. */
 	priv->rss = 0;
 	priv->rxqs_n = tmp;
-	assert(ret > 0);
-	return ret;
+	return ENOMEM;
 }
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index b5fe1b4..f45e017 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -370,4 +370,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+struct rxq *
+priv_create_parent(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 3fd2716..9c0fba1 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,82 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns an existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_get_parent(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int same = 0;
+		unsigned int overlap = 0;
+
+		/*
+		 * Find out whether an appropriate parent queue already exists
+		 * and can be reused, otherwise make sure there are no overlaps.
+		 */
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j) {
+				if (parent->rss.queues[j] != queues[i])
+					continue;
+				++overlap;
+				if (i == j)
+					++same;
+			}
+		}
+		if (same == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp)
+			goto error;
+	}
+	parent = priv_create_parent(priv, queues, children_n);
+	if (!parent) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return parent;
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -819,6 +966,7 @@ struct rte_flow_drop {
 {
 	struct ibv_qp *qp;
 	struct rte_flow *rte_flow;
+	struct rxq *rxq_parent = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -831,9 +979,39 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
+		int ret;
+		unsigned int i;
+		struct rxq *rxq = NULL;
 
-		qp = rxq->qp;
+		if (action->queues_n > 1) {
+			rxq_parent = priv_get_parent(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
+				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
+			}
+		}
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -846,6 +1024,8 @@ struct rte_flow_drop {
 	return rte_flow;
 
 error:
+	if (rxq_parent)
+		rxq_parent_cleanup(rxq_parent);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -909,11 +1089,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 4d007da..beabcf2 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -98,7 +98,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v5 4/4] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (14 preceding siblings ...)
  2017-06-28 14:03 ` [PATCH v5 3/4] net/mlx4: support for the RSS flow action Vasily Philipov
@ 2017-06-28 14:03 ` Vasily Philipov
  2017-06-29 16:53   ` Adrien Mazarguil
  2017-07-02 12:32 ` [PATCH v6 1/4] " Vasily Philipov
                   ` (11 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-06-28 14:03 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

In case if --isolated-mode parameter was added to the command line
the rte flow isolate action will be applied on each port before
the device configuring.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..e313871 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+			if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+				isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index d1041af..610e675 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -267,6 +267,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1422,6 +1427,15 @@ static void eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index e6c43ba..dbe9898 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH v4 4/4] app/testpmd: add isolated mode parameter
  2017-06-27  8:28         ` Thomas Monjalon
@ 2017-06-29  5:52           ` Wu, Jingjing
  0 siblings, 0 replies; 51+ messages in thread
From: Wu, Jingjing @ 2017-06-29  5:52 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Vasily Philipov, Adrien Mazarguil, Nélio Laranjeiro

> > > > Should it block the app startup if isolated-mode setting fails?
> > >
> > > if isolated mode cannot be enabled on any port, that port cannot be
> > > initialized and that causes testpmd to quit, at least it won't go
> > > against the user's wishes
> >
> > If so, I prefer the isolated_mode to be port's argument but not global one.
> > How about to add a command to configure the isolate mode?
> 
> There is already a command to configure isolate mode per-port:
> 	http://dpdk.org/patch/25320
> 	http://dpdk.org/doc/guides/testpmd_app_ug/testpmd_funcs.html#flow
> -syntax
> 
> I think it does not make sense to replicate this per-port command in command
> line parameters.
> All the other parameters are global:
> 	http://dpdk.org/doc/guides/testpmd_app_ug/run_app.html#testpmd-
> command-line-options
> The idea here is to have a global isolate mode with a general option.

OK. Thanks
Then I'm fine with it.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v5 1/4] net/mlx4: RSS parent queues new method maintenance
  2017-06-28 14:03 ` [PATCH v5 1/4] net/mlx4: RSS parent queues new method maintenance Vasily Philipov
@ 2017-06-29 16:51   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-06-29 16:51 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

Hi Vasily,

You should rearrange this series more logically, currently:

- 1/4 adds the ability to have multiple RSS parent QPs in mlx4.
- 2/4 adds rte_flow isolated mode support to mlx4 with the limitation that
      it must be enabled during init.
- 3/4 updates mlx4 to support the rte_flow RSS action with the limitation
      that it only works when isolated mode is also enabled.
- 4/4 adds a testpmd parameter to enable rte_flow isolated mode on all ports
      automatically during init.

Which makes it impossible to validate any of these commits with testpmd
before the last one. How about:

- 1/4 adds a testpmd parameter to enable rte_flow isolated mode on all ports
      automatically during init.
- 2/4 adds rte_flow isolated mode support to mlx4 with the limitation that
      it must be enabled during init.
- 3/4 adds the ability to have multiple RSS parent QPs in mlx4.
- 4/4 updates mlx4 to support the rte_flow RSS action with the limitation
      that it only works when isolated mode is also enabled.

Back to the current commit, please see below.

On Wed, Jun 28, 2017 at 05:03:54PM +0300, Vasily Philipov wrote:
> Insert just created parent queue in a list, keep the list in private
> structure.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

This log message is not very helpful to people not familiar with the mlx4
PMD, it doesn't explain why it's done (think of the other contributors!)
Here's an alternative suggestion:

 net/mlx4: refactor RSS parent queue allocation

 A special "parent" queue must be allocated in addition to a group of
 standard Rx queues for RSS to work. This is done automatically outside
 of isolated mode by the PMD when applications request several Rx queues.

 Since each configured flow rule with the RSS action may target a different
 set of queues, the PMD must have the ability to dynamically allocate
 several parent queues, one per RSS group.

 Refactor RSS parent queue allocations (currently limited to a single
 parent) in preparation for flow API RSS action support.

> ---
> The series depends on:
> 
> http://dpdk.org/ml/archives/dev/2017-April/064327.html
> http://dpdk.org/dev/patchwork/patch/23741/

You can drop this part from subsequent iterations as it's now applied.

> ---
>  drivers/net/mlx4/mlx4.c | 377 +++++++++++++++++++++++++++++++++---------------
>  drivers/net/mlx4/mlx4.h |  17 ++-
>  2 files changed, 274 insertions(+), 120 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index ec4419a..9f3c746 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -533,13 +533,94 @@ void priv_unlock(struct priv *priv)
>  
>  static int
>  rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
> -	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
> -	  struct rte_mempool *mp);
> +	  unsigned int socket, int inactive,
> +	  const struct rte_eth_rxconf *conf,
> +	  struct rte_mempool *mp, int children_n,
> +	  struct rxq *rxq_parent);
>  
>  static void
>  rxq_cleanup(struct rxq *rxq);
>  
>  /**
> + * Create RSS parent queue.
> + *
> + * The new created strucutre will be on the head of priv parents list.

strucutre => structure

How about:

 The new parent is inserted in front of the list in the private
 structure.

> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param queues
> + *   queues indices array, if NULL use all Rx queues.

queues => Queues

(being pedantic and all)

> + * @param children_n
> + *   The number of entries in queues[].
> + *
> + * @return
> + *   0 on success, negative errno value on failure.
> + */
> +static int
> +priv_create_parent(struct priv *priv,
> +		   uint16_t queues[],
> +		   uint16_t children_n)

Please rename this function "priv_parent_create()" for consistency.

> +{
> +	int ret;
> +	uint16_t i;
> +	struct rxq *parent;
> +
> +	parent = rte_zmalloc("parent queue",
> +			     sizeof(*parent),
> +			     RTE_CACHE_LINE_SIZE);
> +	if (!parent)
> +		return -ENOMEM;
> +	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
> +			NULL, NULL, children_n, NULL);
> +	if (ret) {
> +		rte_free(parent);
> +		return -ret;
> +	}
> +	parent->rss.queues_n = children_n;
> +	if (queues) {
> +		for (i = 0; i < children_n; ++i)
> +			parent->rss.queues[i] = queues[i];
> +	} else {
> +		/* the default RSS ring case */
> +		assert(priv->rxqs_n == children_n);
> +		for (i = 0; i < priv->rxqs_n; ++i)
> +			parent->rss.queues[i] = i;
> +	}
> +	LIST_INSERT_HEAD(&priv->parents, parent, next);
> +	return 0;
> +}

You should make this function return the new parent directly.

> +
> +/**
> + * Cleanup RX queue parent structure.

Cleanup => Clean up

> + *
> + * @param parexnt

parexnt => parent

> + *   RX queue parent structure.
> + */
> +void
> +rxq_parent_cleanup(struct rxq *parent)
> +{
> +	LIST_REMOVE(parent, next);
> +	rxq_cleanup(parent);
> +	rte_free(parent);
> +}
> +
> +/**
> + * Clean up parent structures from the parents list.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + */
> +static void
> +priv_parents_list_cleanup(struct priv *priv)

Please keep it under the same name space:

 priv_parent_list_cleanup()

> +{
> +	while (!LIST_EMPTY(&priv->parents)) {
> +		struct rxq *parent = LIST_FIRST(&priv->parents);
> +
> +		rxq_parent_cleanup(parent);

Relatively minor, this could be shortened as:

 rxq_parent_cleanup(LIST_FIRST(priv->parents));

> +	}
> +}
> +
> +/**
>   * Ethernet device configuration.
>   *
>   * Prepare the driver for a given number of TX and RX queues.
> @@ -588,7 +669,7 @@ void priv_unlock(struct priv *priv)
>  		for (i = 0; (i != priv->rxqs_n); ++i)
>  			if ((*priv->rxqs)[i] != NULL)
>  				return EINVAL;
> -		rxq_cleanup(&priv->rxq_parent);
> +		priv_parents_list_cleanup(priv);
>  		priv->rss = 0;
>  		priv->rxqs_n = 0;
>  	}
> @@ -613,7 +694,7 @@ void priv_unlock(struct priv *priv)
>  	priv->rss = 1;
>  	tmp = priv->rxqs_n;
>  	priv->rxqs_n = rxqs_n;
> -	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
> +	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
>  	if (!ret)
>  		return 0;
>  	/* Failure, rollback. */
> @@ -2503,7 +2584,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
>  		return;
>  	if (priv->rss) {
> -		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
> +		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
>  		goto end;
>  	}
>  	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
> @@ -2570,7 +2651,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		goto end;
>  	}
>  	if (priv->rss) {
> -		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
> +		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
>  		if (ret)
>  			return ret;
>  		goto end;
> @@ -2752,8 +2833,9 @@ struct txq_mp2mr_mbuf_check_data {
>  		rxq_promiscuous_disable(rxq);
>  		rxq_allmulticast_disable(rxq);
>  		rxq_mac_addrs_del(rxq);
> -		claim_zero(ibv_destroy_qp(rxq->qp));
>  	}
> +	if (rxq->qp != NULL)
> +		claim_zero(ibv_destroy_qp(rxq->qp));
>  	if (rxq->cq != NULL)
>  		claim_zero(ibv_destroy_cq(rxq->cq));
>  	if (rxq->rd != NULL) {
> @@ -3330,15 +3412,18 @@ struct txq_mp2mr_mbuf_check_data {
>   *   Completion queue to associate with QP.
>   * @param desc
>   *   Number of descriptors in QP (hint only).
> - * @param parent
> - *   If nonzero, create a parent QP, otherwise a child.
> + * @param children_n
> + *   If nonzero, a number of children for parent QP and zero for a child.
> + * @param rxq_parent
> + *   Pointer for a parent in a child case, NULL otherwise.
>   *
>   * @return
>   *   QP pointer or NULL in case of error.
>   */
>  static struct ibv_qp *
>  rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
> -		 int parent, struct ibv_exp_res_domain *rd)
> +		 int children_n, struct ibv_exp_res_domain *rd,
> +		 struct rxq *rxq_parent)
>  {
>  	struct ibv_exp_qp_init_attr attr = {
>  		/* CQ to be associated with the send queue. */
> @@ -3368,16 +3453,16 @@ struct txq_mp2mr_mbuf_check_data {
>  	attr.max_inl_recv = priv->inl_recv_size,
>  	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
>  #endif
> -	if (parent) {
> +	if (children_n > 0) {
>  		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
>  		/* TSS isn't necessary. */
>  		attr.qpg.parent_attrib.tss_child_count = 0;
>  		attr.qpg.parent_attrib.rss_child_count =
> -			rte_align32pow2(priv->rxqs_n + 1) >> 1;
> +			rte_align32pow2(children_n + 1) >> 1;
>  		DEBUG("initializing parent RSS queue");
>  	} else {
>  		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
> -		attr.qpg.qpg_parent = priv->rxq_parent.qp;
> +		attr.qpg.qpg_parent = rxq_parent->qp;
>  		DEBUG("initializing child RSS queue");
>  	}
>  	return ibv_exp_create_qp(priv->ctx, &attr);
> @@ -3413,13 +3498,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	struct ibv_recv_wr *bad_wr;
>  	unsigned int mb_len;
>  	int err;
> -	int parent = (rxq == &priv->rxq_parent);
>  
> -	if (parent) {
> -		ERROR("%p: cannot rehash parent queue %p",
> -		      (void *)dev, (void *)rxq);
> -		return EINVAL;
> -	}
>  	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
>  	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
>  	/* Number of descriptors and mbufs currently allocated. */
> @@ -3464,6 +3543,8 @@ struct txq_mp2mr_mbuf_check_data {
>  	}
>  	/* From now on, any failure will render the queue unusable.
>  	 * Reinitialize QP. */
> +	if (!tmpl.qp)
> +		goto skip_init;
>  	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
>  	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
>  	if (err) {
> @@ -3471,12 +3552,6 @@ struct txq_mp2mr_mbuf_check_data {
>  		assert(err > 0);
>  		return err;
>  	}
> -	err = ibv_resize_cq(tmpl.cq, desc_n);
> -	if (err) {
> -		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
> -		assert(err > 0);
> -		return err;
> -	}
>  	mod = (struct ibv_exp_qp_attr){
>  		/* Move the QP to this state. */
>  		.qp_state = IBV_QPS_INIT,
> @@ -3485,9 +3560,6 @@ struct txq_mp2mr_mbuf_check_data {
>  	};
>  	err = ibv_exp_modify_qp(tmpl.qp, &mod,
>  				(IBV_EXP_QP_STATE |
> -#ifdef RSS_SUPPORT
> -				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
> -#endif /* RSS_SUPPORT */
>  				 IBV_EXP_QP_PORT));
>  	if (err) {
>  		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
> @@ -3495,6 +3567,13 @@ struct txq_mp2mr_mbuf_check_data {
>  		assert(err > 0);
>  		return err;
>  	};
> +skip_init:
> +	err = ibv_resize_cq(tmpl.cq, desc_n);
> +	if (err) {
> +		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
> +		assert(err > 0);
> +		return err;
> +	}
>  	/* Reconfigure flows. Do not care for errors. */
>  	if (!priv->rss) {
>  		rxq_mac_addrs_add(&tmpl);
> @@ -3562,6 +3641,8 @@ struct txq_mp2mr_mbuf_check_data {
>  	rxq->elts_n = 0;
>  	rte_free(rxq->elts.sp);
>  	rxq->elts.sp = NULL;
> +	if (!tmpl.qp)
> +		goto skip_rtr;
>  	/* Post WRs. */
>  	err = ibv_post_recv(tmpl.qp,
>  			    (tmpl.sp ?
> @@ -3589,6 +3670,116 @@ struct txq_mp2mr_mbuf_check_data {
>  }
>  
>  /**
> + * Create verbs QP resources associated with a rxq.
> + *
> + * @param rxq
> + *   Pointer to RX queue structure.
> + * @param desc
> + *   Number of descriptors to configure in queue.
> + * @param inactive
> + *   If true, the queue is disabled because its index is higher or
> + *   equal to the real number of queues, which must be a power of 2.
> + * @param children_n
> + *   The number of children in a parent case, zero for a child.
> + * @param rxq_parent
> + *   The pointer to a parent RX structure for a child in RSS case,
> + *   NULL for parent.
> + *
> + * @return
> + *   0 on success, errno value on failure.
> + */
> +int
> +rxq_create_qp(struct rxq *rxq,
> +	      uint16_t desc,
> +	      int inactive,
> +	      int children_n,
> +	      struct rxq *rxq_parent)
> +{
> +	int ret;
> +	struct ibv_exp_qp_attr mod;
> +	struct ibv_exp_query_intf_params params;
> +	enum ibv_exp_query_intf_status status;
> +	struct ibv_recv_wr *bad_wr;
> +	int parent = (children_n > 0);
> +	struct priv *priv = rxq->priv;
> +
> +#ifdef RSS_SUPPORT
> +	if (priv->rss && !inactive && (rxq_parent || parent))
> +		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
> +					   children_n, rxq->rd,
> +					   rxq_parent);
> +	else
> +#endif /* RSS_SUPPORT */
> +		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
> +	if (rxq->qp == NULL) {
> +		ret = (errno ? errno : EINVAL);
> +		ERROR("QP creation failure: %s",
> +		      strerror(ret));
> +		return ret;
> +	}
> +	mod = (struct ibv_exp_qp_attr){
> +		/* Move the QP to this state. */
> +		.qp_state = IBV_QPS_INIT,
> +		/* Primary port number. */
> +		.port_num = priv->port
> +	};
> +	ret = ibv_exp_modify_qp(rxq->qp, &mod,
> +				(IBV_EXP_QP_STATE |
> +#ifdef RSS_SUPPORT
> +				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
> +#endif /* RSS_SUPPORT */
> +				 IBV_EXP_QP_PORT));
> +	if (ret) {
> +		ERROR("QP state to IBV_QPS_INIT failed: %s",
> +		      strerror(ret));
> +		return ret;
> +	}
> +	if (parent || !priv->rss) {
> +		/* Configure MAC and broadcast addresses. */
> +		ret = rxq_mac_addrs_add(rxq);
> +		if (ret) {
> +			ERROR("QP flow attachment failed: %s",
> +			      strerror(ret));
> +			return ret;
> +		}
> +	}
> +	if (!parent) {
> +		ret = ibv_post_recv(rxq->qp,
> +				    (rxq->sp ?
> +				     &(*rxq->elts.sp)[0].wr :
> +				     &(*rxq->elts.no_sp)[0].wr),
> +				    &bad_wr);
> +		if (ret) {
> +			ERROR("ibv_post_recv() failed for WR %p: %s",
> +			      (void *)bad_wr,
> +			      strerror(ret));
> +			return ret;
> +		}
> +	}
> +	mod = (struct ibv_exp_qp_attr){
> +		.qp_state = IBV_QPS_RTR
> +	};
> +	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
> +	if (ret) {
> +		ERROR("QP state to IBV_QPS_RTR failed: %s",
> +		      strerror(ret));
> +		return ret;
> +	}
> +	params = (struct ibv_exp_query_intf_params){
> +		.intf_scope = IBV_EXP_INTF_GLOBAL,
> +		.intf = IBV_EXP_INTF_QP_BURST,
> +		.obj = rxq->qp,
> +	};
> +	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
> +	if (rxq->if_qp == NULL) {
> +		ERROR("QP interface family query failed with status %d",
> +		      status);
> +		return errno;
> +	}
> +	return 0;
> +}
> +
> +/**
>   * Configure a RX queue.
>   *
>   * @param dev
> @@ -3606,14 +3797,21 @@ struct txq_mp2mr_mbuf_check_data {
>   *   Thresholds parameters.
>   * @param mp
>   *   Memory pool for buffer allocations.
> + * @param children_n
> + *   The number of children in a parent case, zero for a child.
> + * @param rxq_parent
> + *   The pointer to a parent RX structure (or NULL) in a child case,
> + *   NULL for parent.
>   *
>   * @return
>   *   0 on success, errno value on failure.
>   */
>  static int
>  rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
> -	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
> -	  struct rte_mempool *mp)
> +	  unsigned int socket, int inactive,
> +	  const struct rte_eth_rxconf *conf,
> +	  struct rte_mempool *mp, int children_n,
> +	  struct rxq *rxq_parent)
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct rxq tmpl = {
> @@ -3621,17 +3819,15 @@ struct txq_mp2mr_mbuf_check_data {
>  		.mp = mp,
>  		.socket = socket
>  	};
> -	struct ibv_exp_qp_attr mod;
>  	union {
>  		struct ibv_exp_query_intf_params params;
>  		struct ibv_exp_cq_init_attr cq;
>  		struct ibv_exp_res_domain_init_attr rd;
>  	} attr;
>  	enum ibv_exp_query_intf_status status;
> -	struct ibv_recv_wr *bad_wr;
>  	unsigned int mb_len;
>  	int ret = 0;
> -	int parent = (rxq == &priv->rxq_parent);
> +	int parent = (children_n > 0);
>  
>  	(void)conf; /* Thresholds configuration (ignored). */
>  	/*
> @@ -3711,45 +3907,6 @@ struct txq_mp2mr_mbuf_check_data {
>  	      priv->device_attr.max_qp_wr);
>  	DEBUG("priv->device_attr.max_sge is %d",
>  	      priv->device_attr.max_sge);
> -#ifdef RSS_SUPPORT
> -	if (priv->rss && !inactive)
> -		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
> -					   tmpl.rd);
> -	else
> -#endif /* RSS_SUPPORT */
> -		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
> -	if (tmpl.qp == NULL) {
> -		ret = (errno ? errno : EINVAL);
> -		ERROR("%p: QP creation failure: %s",
> -		      (void *)dev, strerror(ret));
> -		goto error;
> -	}
> -	mod = (struct ibv_exp_qp_attr){
> -		/* Move the QP to this state. */
> -		.qp_state = IBV_QPS_INIT,
> -		/* Primary port number. */
> -		.port_num = priv->port
> -	};
> -	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
> -				(IBV_EXP_QP_STATE |
> -#ifdef RSS_SUPPORT
> -				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
> -#endif /* RSS_SUPPORT */
> -				 IBV_EXP_QP_PORT));
> -	if (ret) {
> -		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
> -		      (void *)dev, strerror(ret));
> -		goto error;
> -	}
> -	if ((parent) || (!priv->rss))  {
> -		/* Configure MAC and broadcast addresses. */
> -		ret = rxq_mac_addrs_add(&tmpl);
> -		if (ret) {
> -			ERROR("%p: QP flow attachment failed: %s",
> -			      (void *)dev, strerror(ret));
> -			goto error;
> -		}
> -	}
>  	/* Allocate descriptors for RX queues, except for the RSS parent. */
>  	if (parent)
>  		goto skip_alloc;
> @@ -3760,29 +3917,14 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (ret) {
>  		ERROR("%p: RXQ allocation failed: %s",
>  		      (void *)dev, strerror(ret));
> -		goto error;
> -	}
> -	ret = ibv_post_recv(tmpl.qp,
> -			    (tmpl.sp ?
> -			     &(*tmpl.elts.sp)[0].wr :
> -			     &(*tmpl.elts.no_sp)[0].wr),
> -			    &bad_wr);
> -	if (ret) {
> -		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
> -		      (void *)dev,
> -		      (void *)bad_wr,
> -		      strerror(ret));
> -		goto error;
> +		return ret;
>  	}
>  skip_alloc:
> -	mod = (struct ibv_exp_qp_attr){
> -		.qp_state = IBV_QPS_RTR
> -	};
> -	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
> -	if (ret) {
> -		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
> -		      (void *)dev, strerror(ret));
> -		goto error;
> +	if (parent || rxq_parent || !priv->rss) {
> +		ret = rxq_create_qp(&tmpl, desc, inactive,
> +				    children_n, rxq_parent);
> +		if (ret)
> +			goto error;
>  	}
>  	/* Save port ID. */
>  	tmpl.port_id = dev->data->port_id;
> @@ -3794,21 +3936,11 @@ struct txq_mp2mr_mbuf_check_data {
>  	};
>  	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
>  	if (tmpl.if_cq == NULL) {
> +		ret = EINVAL;
>  		ERROR("%p: CQ interface family query failed with status %d",
>  		      (void *)dev, status);
>  		goto error;
>  	}
> -	attr.params = (struct ibv_exp_query_intf_params){
> -		.intf_scope = IBV_EXP_INTF_GLOBAL,
> -		.intf = IBV_EXP_INTF_QP_BURST,
> -		.obj = tmpl.qp,
> -	};
> -	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
> -	if (tmpl.if_qp == NULL) {
> -		ERROR("%p: QP interface family query failed with status %d",
> -		      (void *)dev, status);
> -		goto error;
> -	}
>  	/* Clean up rxq in case we're reinitializing it. */
>  	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
>  	rxq_cleanup(rxq);
> @@ -3846,6 +3978,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		    unsigned int socket, const struct rte_eth_rxconf *conf,
>  		    struct rte_mempool *mp)
>  {
> +	struct rxq *parent;
>  	struct priv *priv = dev->data->dev_private;
>  	struct rxq *rxq = (*priv->rxqs)[idx];
>  	int inactive = 0;
> @@ -3880,9 +4013,16 @@ struct txq_mp2mr_mbuf_check_data {
>  			return -ENOMEM;
>  		}
>  	}
> -	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
> -		inactive = 1;
> -	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
> +	if (priv->rss) {
> +		/* The list consists of the single default one. */
> +		parent = LIST_FIRST(&priv->parents);
> +		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
> +			inactive = 1;
> +	} else {
> +		parent = NULL;
> +	}
> +	ret = rxq_setup(dev, rxq, desc, socket,
> +			inactive, conf, mp, 0, parent);
>  	if (ret)
>  		rte_free(rxq);
>  	else {
> @@ -3919,7 +4059,6 @@ struct txq_mp2mr_mbuf_check_data {
>  		return;
>  	priv = rxq->priv;
>  	priv_lock(priv);
> -	assert(rxq != &priv->rxq_parent);
>  	for (i = 0; (i != priv->rxqs_n); ++i)
>  		if ((*priv->rxqs)[i] == rxq) {
>  			DEBUG("%p: removing RX queue %p from list",
> @@ -3971,7 +4110,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
>  	priv->started = 1;
>  	if (priv->rss) {
> -		rxq = &priv->rxq_parent;
> +		rxq = LIST_FIRST(&priv->parents);
>  		r = 1;
>  	} else {
>  		rxq = (*priv->rxqs)[0];
> @@ -4054,7 +4193,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
>  	priv->started = 0;
>  	if (priv->rss) {
> -		rxq = &priv->rxq_parent;
> +		rxq = LIST_FIRST(&priv->parents);
>  		r = 1;
>  	} else {
>  		rxq = (*priv->rxqs)[0];
> @@ -4188,7 +4327,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		priv->txqs = NULL;
>  	}
>  	if (priv->rss)
> -		rxq_cleanup(&priv->rxq_parent);
> +		priv_parents_list_cleanup(priv);
>  	if (priv->pd != NULL) {
>  		assert(priv->ctx != NULL);
>  		claim_zero(ibv_dealloc_pd(priv->pd));
> @@ -4569,7 +4708,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (!priv->started)
>  		goto end;
>  	if (priv->rss) {
> -		ret = rxq_promiscuous_enable(&priv->rxq_parent);
> +		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
>  		if (ret) {
>  			priv_unlock(priv);
>  			return;
> @@ -4614,7 +4753,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		return;
>  	}
>  	if (priv->rss) {
> -		rxq_promiscuous_disable(&priv->rxq_parent);
> +		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
>  		goto end;
>  	}
>  	for (i = 0; (i != priv->rxqs_n); ++i)
> @@ -4649,7 +4788,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (!priv->started)
>  		goto end;
>  	if (priv->rss) {
> -		ret = rxq_allmulticast_enable(&priv->rxq_parent);
> +		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
>  		if (ret) {
>  			priv_unlock(priv);
>  			return;
> @@ -4694,7 +4833,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		return;
>  	}
>  	if (priv->rss) {
> -		rxq_allmulticast_disable(&priv->rxq_parent);
> +		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
>  		goto end;
>  	}
>  	for (i = 0; (i != priv->rxqs_n); ++i)
> @@ -5003,7 +5142,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		 * Rehashing flows in all RX queues is necessary.
>  		 */
>  		if (priv->rss)
> -			rxq_mac_addrs_del(&priv->rxq_parent);
> +			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
>  		else
>  			for (i = 0; (i != priv->rxqs_n); ++i)
>  				if ((*priv->rxqs)[i] != NULL)
> @@ -5011,7 +5150,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		priv->vlan_filter[j].enabled = 1;
>  		if (priv->started) {
>  			if (priv->rss)
> -				rxq_mac_addrs_add(&priv->rxq_parent);
> +				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
>  			else
>  				for (i = 0; (i != priv->rxqs_n); ++i) {
>  					if ((*priv->rxqs)[i] == NULL)
> @@ -5025,7 +5164,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		 * Rehashing flows in all RX queues is necessary.
>  		 */
>  		if (priv->rss)
> -			rxq_mac_addrs_del(&priv->rxq_parent);
> +			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
>  		else
>  			for (i = 0; (i != priv->rxqs_n); ++i)
>  				if ((*priv->rxqs)[i] != NULL)
> @@ -5033,7 +5172,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		priv->vlan_filter[j].enabled = 0;
>  		if (priv->started) {
>  			if (priv->rss)
> -				rxq_mac_addrs_add(&priv->rxq_parent);
> +				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
>  			else
>  				for (i = 0; (i != priv->rxqs_n); ++i) {
>  					if ((*priv->rxqs)[i] == NULL)
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 9a3bae9..fd24888 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -219,6 +219,7 @@ struct rxq_elt {
>  
>  /* RX queue descriptor. */
>  struct rxq {
> +	LIST_ENTRY(rxq) next; /* Used by parent queue only */
>  	struct priv *priv; /* Back pointer to private data. */
>  	struct rte_mempool *mp; /* Memory Pool for allocations. */
>  	struct ibv_mr *mr; /* Memory Region (for mp). */
> @@ -246,6 +247,10 @@ struct rxq {
>  	struct mlx4_rxq_stats stats; /* RX queue counters. */
>  	unsigned int socket; /* CPU socket ID for allocations. */
>  	struct ibv_exp_res_domain *rd; /* Resource Domain. */
> +	struct {
> +		uint16_t queues_n;
> +		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
> +	} rss;
>  };
>  
>  /* TX element. */
> @@ -339,7 +344,6 @@ struct priv {
>  #endif
>  	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
>  	/* RX/TX queues. */
> -	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
>  	unsigned int rxqs_n; /* RX queues array size. */
>  	unsigned int txqs_n; /* TX queues array size. */
>  	struct rxq *(*rxqs)[]; /* RX queues. */
> @@ -348,10 +352,21 @@ struct priv {
>  	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
>  	LIST_HEAD(mlx4_flows, rte_flow) flows;
>  	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
> +	LIST_HEAD(mlx4_parents, rxq) parents;
>  	rte_spinlock_t lock; /* Lock for control functions. */
>  };
>  
>  void priv_lock(struct priv *priv);
>  void priv_unlock(struct priv *priv);
>  
> +int
> +rxq_create_qp(struct rxq *rxq,
> +	      uint16_t desc,
> +	      int inactive,
> +	      int children_n,
> +	      struct rxq *rxq_parent);
> +
> +void
> +rxq_parent_cleanup(struct rxq *parent);
> +
>  #endif /* RTE_PMD_MLX4_H_ */
> -- 
> 1.8.3.1
> 

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v5 2/4] net/mlx4: implement isolated mode from flow API
  2017-06-28 14:03 ` [PATCH v5 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-06-29 16:52   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-06-29 16:52 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Wed, Jun 28, 2017 at 05:03:55PM +0300, Vasily Philipov wrote:
> The user must request isolated mode before device configuration,
> the default RSS ring isn't created in this case.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Minor nit, please see below.

> ---
>  drivers/net/mlx4/mlx4.c      | 58 +++++++++++++++++++++++++++++++++++---------
>  drivers/net/mlx4/mlx4.h      |  1 +
>  drivers/net/mlx4/mlx4_flow.c | 39 +++++++++++++++++++++++++++++
>  drivers/net/mlx4/mlx4_flow.h |  5 ++++
>  4 files changed, 92 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 9f3c746..22fa7c6 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -650,7 +650,7 @@ void priv_unlock(struct priv *priv)
>  	}
>  	if (rxqs_n == priv->rxqs_n)
>  		return 0;
> -	if (!rte_is_power_of_2(rxqs_n)) {
> +	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
>  		unsigned n_active;
>  
>  		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
> @@ -694,6 +694,8 @@ void priv_unlock(struct priv *priv)
>  	priv->rss = 1;
>  	tmp = priv->rxqs_n;
>  	priv->rxqs_n = rxqs_n;
> +	if (priv->isolated)
> +		return 0;
>  	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
>  	if (!ret)
>  		return 0;
> @@ -2580,6 +2582,7 @@ struct txq_mp2mr_mbuf_check_data {
>  {
>  	unsigned int i;
>  
> +	assert(!priv->isolated);
>  	assert(mac_index < elemof(priv->mac));
>  	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
>  		return;
> @@ -2829,7 +2832,7 @@ struct txq_mp2mr_mbuf_check_data {
>  						rxq->if_cq,
>  						&params));
>  	}
> -	if (rxq->qp != NULL) {
> +	if (rxq->qp != NULL && !rxq->priv->isolated) {
>  		rxq_promiscuous_disable(rxq);
>  		rxq_allmulticast_disable(rxq);
>  		rxq_mac_addrs_del(rxq);
> @@ -3530,7 +3533,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		return 0;
>  	}
>  	/* Remove attached flows if RSS is disabled (no parent queue). */
> -	if (!priv->rss) {
> +	if (!priv->rss && !priv->isolated) {
>  		rxq_allmulticast_disable(&tmpl);
>  		rxq_promiscuous_disable(&tmpl);
>  		rxq_mac_addrs_del(&tmpl);
> @@ -3575,7 +3578,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		return err;
>  	}
>  	/* Reconfigure flows. Do not care for errors. */
> -	if (!priv->rss) {
> +	if (!priv->rss && !priv->isolated) {
>  		rxq_mac_addrs_add(&tmpl);
>  		if (priv->promisc)
>  			rxq_promiscuous_enable(&tmpl);
> @@ -3734,7 +3737,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		      strerror(ret));
>  		return ret;
>  	}
> -	if (parent || !priv->rss) {
> +	if (!priv->isolated && (parent || !priv->rss)) {
>  		/* Configure MAC and broadcast addresses. */
>  		ret = rxq_mac_addrs_add(rxq);
>  		if (ret) {
> @@ -4013,7 +4016,7 @@ struct txq_mp2mr_mbuf_check_data {
>  			return -ENOMEM;
>  		}
>  	}
> -	if (priv->rss) {
> +	if (priv->rss && !priv->isolated) {
>  		/* The list consists of the single default one. */
>  		parent = LIST_FIRST(&priv->parents);
>  		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
> @@ -4109,7 +4112,10 @@ struct txq_mp2mr_mbuf_check_data {
>  	}
>  	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
>  	priv->started = 1;
> -	if (priv->rss) {
> +	if (priv->isolated) {
> +		rxq = NULL;
> +		r = 1;
> +	} else if (priv->rss) {
>  		rxq = LIST_FIRST(&priv->parents);
>  		r = 1;
>  	} else {
> @@ -4192,7 +4198,10 @@ struct txq_mp2mr_mbuf_check_data {
>  	}
>  	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
>  	priv->started = 0;
> -	if (priv->rss) {
> +	if (priv->isolated) {
> +		rxq = NULL;
> +		r = 1;
> +	} else if (priv->rss) {
>  		rxq = LIST_FIRST(&priv->parents);
>  		r = 1;
>  	} else {
> @@ -4620,6 +4629,8 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (mlx4_is_secondary())
>  		return;
>  	priv_lock(priv);
> +	if (priv->isolated)
> +		goto end;
>  	DEBUG("%p: removing MAC address from index %" PRIu32,
>  	      (void *)dev, index);
>  	/* Last array entry is reserved for broadcast. */
> @@ -4653,6 +4664,12 @@ struct txq_mp2mr_mbuf_check_data {
>  		return -ENOTSUP;
>  	(void)vmdq;
>  	priv_lock(priv);
> +	if (priv->isolated) {
> +		DEBUG("%p: cannot add MAC address, "
> +		      "device is in isolated mode", (void *)dev);
> +		re = EPERM;
> +		goto end;
> +	}
>  	DEBUG("%p: adding MAC address at index %" PRIu32,
>  	      (void *)dev, index);
>  	/* Last array entry is reserved for broadcast. */
> @@ -4700,6 +4717,12 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (mlx4_is_secondary())
>  		return;
>  	priv_lock(priv);
> +	if (priv->isolated) {
> +		DEBUG("%p: cannot enable promiscuous, "
> +		      "device is in isolated mode", (void *)dev);
> +		priv_unlock(priv);
> +		return;
> +	}
>  	if (priv->promisc) {
>  		priv_unlock(priv);
>  		return;
> @@ -4748,7 +4771,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (mlx4_is_secondary())
>  		return;
>  	priv_lock(priv);
> -	if (!priv->promisc) {
> +	if (!priv->promisc || priv->isolated) {
>  		priv_unlock(priv);
>  		return;
>  	}
> @@ -4780,6 +4803,12 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (mlx4_is_secondary())
>  		return;
>  	priv_lock(priv);
> +	if (priv->isolated) {
> +		DEBUG("%p: cannot enable allmulticast, "
> +		      "device is in isolated mode", (void *)dev);
> +		priv_unlock(priv);
> +		return;
> +	}
>  	if (priv->allmulti) {
>  		priv_unlock(priv);
>  		return;
> @@ -4832,7 +4861,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		priv_unlock(priv);
>  		return;
>  	}
> -	if (priv->rss) {
> +	if (priv->rss && !priv->isolated) {
>  		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
>  		goto end;
>  	}
> @@ -4971,7 +5000,7 @@ struct txq_mp2mr_mbuf_check_data {
>  		}
>  		/* Reenable non-RSS queue attributes. No need to check
>  		 * for errors at this stage. */
> -		if (!priv->rss) {
> +		if (!priv->rss && !priv->isolated) {
>  			rxq_mac_addrs_add(rxq);
>  			if (priv->promisc)
>  				rxq_promiscuous_enable(rxq);
> @@ -5206,6 +5235,12 @@ struct txq_mp2mr_mbuf_check_data {
>  	if (mlx4_is_secondary())
>  		return -E_RTE_SECONDARY;
>  	priv_lock(priv);
> +	if (priv->isolated) {
> +		DEBUG("%p: cannot set vlan filter, "
> +		      "device is in isolated mode", (void *)dev);
> +		priv_unlock(priv);
> +		return -EINVAL;
> +	}
>  	ret = vlan_filter_set(dev, vlan_id, on);
>  	priv_unlock(priv);
>  	assert(ret >= 0);
> @@ -5218,6 +5253,7 @@ struct txq_mp2mr_mbuf_check_data {
>  	.destroy = mlx4_flow_destroy,
>  	.flush = mlx4_flow_flush,
>  	.query = NULL,
> +	.isolate = mlx4_flow_isolate,
>  };
>  
>  /**
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index fd24888..b5fe1b4 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -339,6 +339,7 @@ struct priv {
>  	unsigned int rss:1; /* RSS is enabled. */
>  	unsigned int vf:1; /* This is a VF device. */
>  	unsigned int pending_alarm:1; /* An alarm is pending. */
> +	unsigned int isolated:1; /* Toggle isolated mode. */
>  #ifdef INLINE_RECV
>  	unsigned int inl_recv_size; /* Inline recv size */
>  #endif
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> index edfac03..3fd2716 100644
> --- a/drivers/net/mlx4/mlx4_flow.c
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -957,6 +957,45 @@ struct rte_flow *
>  }
>  
>  /**
> + * @see rte_flow_isolate()
> + *
> + * Must be done before calling dev_configure().
> + *
> + * RSS action is possible only if this mode was requested.

This shouldn't be documented here but in the code that parses the RSS
action, where it's actually the case thanks to the associated check and
error message "RSS cannot be used without isolated mode".

You should just remove this line.

> + *
> + * @param dev
> + *   Pointer to the ethernet device structure.
> + * @param enable
> + *   Nonzero to enter isolated mode, attempt to leave it otherwise.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL. PMDs initialize this
> + *   structure in case of error only.
> + *
> + * @return
> + *   0 on success, a negative value on error.
> + */
> +int
> +mlx4_flow_isolate(struct rte_eth_dev *dev,
> +		  int enable,
> +		  struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +
> +	priv_lock(priv);
> +	if (priv->rxqs) {
> +		rte_flow_error_set(error, ENOTSUP,
> +				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +				   NULL, "isolated mode must be set"
> +				   " before configuring the device");
> +		priv_unlock(priv);
> +		return -rte_errno;
> +	}
> +	priv->isolated = !!enable;
> +	priv_unlock(priv);
> +	return 0;
> +}
> +
> +/**
>   * Destroy a flow.
>   *
>   * @param priv
> diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
> index 12a293e..4d007da 100644
> --- a/drivers/net/mlx4/mlx4_flow.h
> +++ b/drivers/net/mlx4/mlx4_flow.h
> @@ -90,6 +90,11 @@ struct mlx4_flow {
>  	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
>  };
>  
> +int
> +mlx4_flow_isolate(struct rte_eth_dev *dev,
> +		  int enable,
> +		  struct rte_flow_error *error);
> +
>  struct mlx4_flow_action {
>  	uint32_t drop:1; /**< Target is a drop queue. */
>  	uint32_t queue:1; /**< Target is a receive queue. */
> -- 
> 1.8.3.1
> 

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v5 3/4] net/mlx4: support for the RSS flow action
  2017-06-28 14:03 ` [PATCH v5 3/4] net/mlx4: support for the RSS flow action Vasily Philipov
@ 2017-06-29 16:53   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-06-29 16:53 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Wed, Jun 28, 2017 at 05:03:56PM +0300, Vasily Philipov wrote:
> The isolated mode should be enabled.
> The number of queues in RSS ring must be power of 2.
> The sharing a queue between several RSS rings is impossible.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Alternative suggestion for commit log:

 net/mlx4: support flow API RSS action

 This commit adds support for the flow API RSS action with the following
 limitations:

 - Only supported when isolated mode is enabled.
 - The number of queues specified by the action (rte_flow_action_rss.num)
   must be a power of two.
 - Each queue index can be specified at most once in the configuration
   (rte_flow_action_rss.queue[]).
 - Because a queue can be associated with a single RSS context, it cannot be
   targeted by multiple RSS actions simultaneously. 

A few more comments about this patch, see below.

> ---
>  drivers/net/mlx4/mlx4.c      |  21 +++--
>  drivers/net/mlx4/mlx4.h      |   5 ++
>  drivers/net/mlx4/mlx4_flow.c | 197 ++++++++++++++++++++++++++++++++++++++++++-
>  drivers/net/mlx4/mlx4_flow.h |   3 +-
>  4 files changed, 211 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 22fa7c6..6ab7241 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -554,9 +554,9 @@ void priv_unlock(struct priv *priv)
>   *   The number of entries in queues[].
>   *
>   * @return
> - *   0 on success, negative errno value on failure.
> + *   Pointer to a parent rxq structure, NULL on failure.
>   */
> -static int
> +struct rxq *
>  priv_create_parent(struct priv *priv,
>  		   uint16_t queues[],
>  		   uint16_t children_n)
> @@ -568,13 +568,15 @@ void priv_unlock(struct priv *priv)
>  	parent = rte_zmalloc("parent queue",
>  			     sizeof(*parent),
>  			     RTE_CACHE_LINE_SIZE);
> -	if (!parent)
> -		return -ENOMEM;
> +	if (!parent) {
> +		ERROR("cannot allocate memory for RSS parent queue");
> +		return NULL;
> +	}
>  	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
>  			NULL, NULL, children_n, NULL);
>  	if (ret) {
>  		rte_free(parent);
> -		return -ret;
> +		return NULL;
>  	}
>  	parent->rss.queues_n = children_n;
>  	if (queues) {
> @@ -587,7 +589,7 @@ void priv_unlock(struct priv *priv)
>  			parent->rss.queues[i] = i;
>  	}
>  	LIST_INSERT_HEAD(&priv->parents, parent, next);
> -	return 0;
> +	return parent;
>  }
> 
>  /**
> @@ -639,7 +641,6 @@ void priv_unlock(struct priv *priv)
>  	unsigned int rxqs_n = dev->data->nb_rx_queues;
>  	unsigned int txqs_n = dev->data->nb_tx_queues;
>  	unsigned int tmp;
> -	int ret;
>  
>  	priv->rxqs = (void *)dev->data->rx_queues;
>  	priv->txqs = (void *)dev->data->tx_queues;
> @@ -696,14 +697,12 @@ void priv_unlock(struct priv *priv)
>  	priv->rxqs_n = rxqs_n;
>  	if (priv->isolated)
>  		return 0;
> -	ret = priv_create_parent(priv, NULL, priv->rxqs_n);
> -	if (!ret)
> +	if (priv_create_parent(priv, NULL, priv->rxqs_n))
>  		return 0;
>  	/* Failure, rollback. */
>  	priv->rss = 0;
>  	priv->rxqs_n = tmp;
> -	assert(ret > 0);
> -	return ret;
> +	return ENOMEM;
>  }

I think the above changes should be merged in the previous commit
("net/mlx4: RSS parent queues new method maintenance"). priv_create_parent()
can return the rxq pointer (or NULL in case of error) from the start.

>  /**
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index b5fe1b4..f45e017 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -370,4 +370,9 @@ struct priv {
>  void
>  rxq_parent_cleanup(struct rxq *parent);
>  
> +struct rxq *
> +priv_create_parent(struct priv *priv,
> +		   uint16_t queues[],
> +		   uint16_t children_n);
> +
>  #endif /* RTE_PMD_MLX4_H_ */
> diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
> index 3fd2716..9c0fba1 100644
> --- a/drivers/net/mlx4/mlx4_flow.c
> +++ b/drivers/net/mlx4/mlx4_flow.c
> @@ -112,6 +112,7 @@ struct rte_flow_drop {
>  static const enum rte_flow_action_type valid_actions[] = {
>  	RTE_FLOW_ACTION_TYPE_DROP,
>  	RTE_FLOW_ACTION_TYPE_QUEUE,
> +	RTE_FLOW_ACTION_TYPE_RSS,
>  	RTE_FLOW_ACTION_TYPE_END,
>  };
>  
> @@ -672,6 +673,76 @@ struct rte_flow_drop {
>  			if (!queue || (queue->index > (priv->rxqs_n - 1)))
>  				goto exit_action_not_supported;
>  			action.queue = 1;
> +			action.queues_n = 1;
> +			action.queues[0] = queue->index;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
> +			int i;
> +			int ierr;
> +			const struct rte_flow_action_rss *rss =
> +				(const struct rte_flow_action_rss *)
> +				actions->conf;
> +
> +			if (!priv->hw_rss) {
> +				rte_flow_error_set(error, ENOTSUP,
> +					   RTE_FLOW_ERROR_TYPE_ACTION,
> +					   actions,
> +					   "RSS cannot be used with "
> +					   "the current configuration");
> +				return -rte_errno;
> +			}
> +			if (!priv->isolated) {
> +				rte_flow_error_set(error, ENOTSUP,
> +					   RTE_FLOW_ERROR_TYPE_ACTION,
> +					   actions,
> +					   "RSS cannot be used without "
> +					   "isolated mode");
> +				return -rte_errno;
> +			}
> +			if (!rte_is_power_of_2(rss->num)) {
> +				rte_flow_error_set(error, ENOTSUP,
> +					   RTE_FLOW_ERROR_TYPE_ACTION,
> +					   actions,
> +					   "the number of queues "
> +					   "should be power of two");
> +				return -rte_errno;
> +			}
> +			if (priv->max_rss_tbl_sz < rss->num) {
> +				rte_flow_error_set(error, ENOTSUP,
> +					   RTE_FLOW_ERROR_TYPE_ACTION,
> +					   actions,
> +					   "the number of queues "
> +					   "is too large");
> +				return -rte_errno;
> +			}
> +			/* checking indexes array */
> +			ierr = 0;
> +			for (i = 0; i < rss->num; ++i) {
> +				int j;
> +				if (rss->queue[i] >= priv->rxqs_n)
> +					ierr = 1;
> +				/*
> +				 * Prevent the user from specifying
> +				 * the same queue twice in the RSS array.
> +				 */
> +				for (j = i + 1; j < rss->num && !ierr; ++j)
> +					if (rss->queue[j] == rss->queue[i])
> +						ierr = 1;
> +				if (ierr) {
> +					rte_flow_error_set(
> +						error,
> +						ENOTSUP,
> +						RTE_FLOW_ERROR_TYPE_HANDLE,
> +						NULL,
> +						"RSS action only supports "
> +						"unique queue indices "
> +						"in a list");
> +					return -rte_errno;
> +				}
> +			}
> +			action.queue = 1;
> +			action.queues_n = rss->num;
> +			for (i = 0; i < rss->num; ++i)
> +				action.queues[i] = rss->queue[i];
>  		} else {
>  			goto exit_action_not_supported;
>  		}
> @@ -797,6 +868,82 @@ struct rte_flow_drop {
>  }
>  
>  /**
> + * Get RSS parent rxq structure for given queues.
> + *
> + * Creates a new or returns an existed one.
> + *
> + * @param priv
> + *   Pointer to private structure.
> + * @param queues
> + *   queues indices array, NULL in default RSS case.
> + * @param children_n
> + *   the size of queues array.
> + *
> + * @return
> + *   Pointer to a parent rxq structure, NULL on failure.
> + */
> +static struct rxq *
> +priv_get_parent(struct priv *priv,

Please use a common prefix for all these functions for consistency, you
should rename it "priv_parent_get()".

> +		uint16_t queues[],
> +		uint16_t children_n,
> +		struct rte_flow_error *error)
> +{
> +	unsigned int i;
> +	struct rxq *parent;
> +
> +	for (parent = LIST_FIRST(&priv->parents);
> +	     parent;
> +	     parent = LIST_NEXT(parent, next)) {
> +		unsigned int same = 0;
> +		unsigned int overlap = 0;
> +
> +		/*
> +		 * Find out whether an appropriate parent queue already exists
> +		 * and can be reused, otherwise make sure there are no overlaps.
> +		 */
> +		for (i = 0; i < children_n; ++i) {
> +			unsigned int j;
> +
> +			for (j = 0; j < parent->rss.queues_n; ++j) {
> +				if (parent->rss.queues[j] != queues[i])
> +					continue;
> +				++overlap;
> +				if (i == j)
> +					++same;
> +			}
> +		}
> +		if (same == children_n &&
> +			children_n == parent->rss.queues_n)
> +			return parent;
> +		else if (overlap)
> +			goto error;
> +	}
> +	/* Exclude the cases when some QPs were created without RSS */
> +	for (i = 0; i < children_n; ++i) {
> +		struct rxq *rxq = (*priv->rxqs)[queues[i]];
> +		if (rxq->qp)
> +			goto error;
> +	}
> +	parent = priv_create_parent(priv, queues, children_n);
> +	if (!parent) {
> +		rte_flow_error_set(error,
> +				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +				   NULL, "flow rule creation failure");
> +		return NULL;
> +	}
> +	return parent;
> +
> +error:
> +	rte_flow_error_set(error,
> +			   EEXIST,
> +			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +			   NULL,
> +			   "sharing a queue between several"
> +			   " RSS groups is not supported");
> +	return NULL;
> +}
> +
> +/**
>   * Complete flow rule creation.
>   *
>   * @param priv
> @@ -819,6 +966,7 @@ struct rte_flow_drop {
>  {
>  	struct ibv_qp *qp;
>  	struct rte_flow *rte_flow;
> +	struct rxq *rxq_parent = NULL;
>  
>  	assert(priv->pd);
>  	assert(priv->ctx);
> @@ -831,9 +979,39 @@ struct rte_flow_drop {
>  	if (action->drop) {
>  		qp = priv->flow_drop_queue->qp;
>  	} else {
> -		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
> +		int ret;
> +		unsigned int i;
> +		struct rxq *rxq = NULL;
>  
> -		qp = rxq->qp;
> +		if (action->queues_n > 1) {
> +			rxq_parent = priv_get_parent(priv, action->queues,
> +						     action->queues_n, error);
> +			if (!rxq_parent)
> +				goto error;
> +		}
> +		for (i = 0; i < action->queues_n; ++i) {
> +			rxq = (*priv->rxqs)[action->queues[i]];
> +			/*
> +			 * In case of isolated mode we postpone
> +			 * ibv receive queue creation till the first
> +			 * rte_flow rule will be applied on that queue.
> +			 */
> +			if (!rxq->qp) {
> +				assert(priv->isolated);
> +				ret = rxq_create_qp(rxq, rxq->elts_n,
> +						    0, 0, rxq_parent);
> +				if (ret) {
> +					rte_flow_error_set(
> +						error,
> +						ENOMEM,
> +						RTE_FLOW_ERROR_TYPE_HANDLE,
> +						NULL,
> +						"flow rule creation failure");
> +					goto error;
> +				}
> +			}
> +		}
> +		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
>  		rte_flow->qp = qp;
>  	}
>  	rte_flow->ibv_attr = ibv_attr;
> @@ -846,6 +1024,8 @@ struct rte_flow_drop {
>  	return rte_flow;
>  
>  error:
> +	if (rxq_parent)
> +		rxq_parent_cleanup(rxq_parent);
>  	rte_free(rte_flow);
>  	return NULL;
>  }
> @@ -909,11 +1089,22 @@ struct rte_flow_drop {
>  			continue;
>  		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
>  			action.queue = 1;
> -			action.queue_id =
> +			action.queues_n = 1;
> +			action.queues[0] =
>  				((const struct rte_flow_action_queue *)
>  				 actions->conf)->index;
>  		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
>  			action.drop = 1;
> +		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
> +			unsigned int i;
> +			const struct rte_flow_action_rss *rss =
> +				(const struct rte_flow_action_rss *)
> +				 actions->conf;
> +
> +			action.queue = 1;
> +			action.queues_n = rss->num;
> +			for (i = 0; i < rss->num; ++i)
> +				action.queues[i] = rss->queue[i];
>  		} else {
>  			rte_flow_error_set(error, ENOTSUP,
>  					   RTE_FLOW_ERROR_TYPE_ACTION,
> diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
> index 4d007da..beabcf2 100644
> --- a/drivers/net/mlx4/mlx4_flow.h
> +++ b/drivers/net/mlx4/mlx4_flow.h
> @@ -98,7 +98,8 @@ struct mlx4_flow {
>  struct mlx4_flow_action {
>  	uint32_t drop:1; /**< Target is a drop queue. */
>  	uint32_t queue:1; /**< Target is a receive queue. */
> -	uint32_t queue_id; /**< Identifier of the queue. */
> +	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
> +	uint16_t queues_n; /**< Number of entries in queue[] */
>  };
>  
>  int mlx4_priv_flow_start(struct priv *priv);
> -- 
> 1.8.3.1

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v5 4/4] app/testpmd: add isolated mode parameter
  2017-06-28 14:03 ` [PATCH v5 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-06-29 16:53   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-06-29 16:53 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro, Wu, Jingjing, Thomas Monjalon

On Wed, Jun 28, 2017 at 05:03:57PM +0300, Vasily Philipov wrote:
> In case if --isolated-mode parameter was added to the command line
> the rte flow isolate action will be applied on each port before
> the device configuring.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

The patch itself looks OK but I think the commit log needs some
clarification, here's another suggestion:

 app/testpmd: add isolated mode parameter

 Providing this parameter requests flow API isolated mode on all ports at
 initialization time. It ensures all traffic is received through the
 configured flow rules only (see flow command).

 Ports that do not support this mode are automatically discarded.

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH v6 1/4] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (15 preceding siblings ...)
  2017-06-28 14:03 ` [PATCH v5 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-07-02 12:32 ` Vasily Philipov
  2017-07-02 12:32 ` [PATCH v6 2/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
                   ` (10 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-07-02 12:32 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Providing this parameter requests flow API isolated mode on all ports at
initialization time. It ensures all traffic is received through the
configured flow rules only (see flow command).

Ports that do not support this mode are automatically discarded.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..e313871 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+			if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+				isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index b3ad83b..864a2a8 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -270,6 +270,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1425,6 +1430,15 @@ static int eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 364502d..d5fc9ad 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v6 2/4] net/mlx4: refactor RSS parent queue allocation
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (16 preceding siblings ...)
  2017-07-02 12:32 ` [PATCH v6 1/4] " Vasily Philipov
@ 2017-07-02 12:32 ` Vasily Philipov
  2017-07-02 12:32 ` [PATCH v6 3/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (9 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-07-02 12:32 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

A special "parent" queue must be allocated in addition to a group of
standard Rx queues for RSS to work. This is done automatically outside of
isolated mode by the PMD when applications request several Rx queues.

Since each configured flow rule with the RSS action may target a different
set of queues, the PMD must have the ability to dynamically allocate
several parent queues, one per RSS group.

Refactor RSS parent queue allocations (currently limited to a single
parent) in preparation for flow API RSS action support.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c | 374 +++++++++++++++++++++++++++++++++---------------
 drivers/net/mlx4/mlx4.h |  17 ++-
 2 files changed, 271 insertions(+), 120 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 16cafae..96f88c6 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -552,13 +552,91 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new parent is inserted in front of the list in the private structure.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+priv_parent_create(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent)
+		return -ENOMEM;
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return -ret;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return 0;
+}
+
+/**
+ * Clean up RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	LIST_REMOVE(parent, next);
+	rxq_cleanup(parent);
+	rte_free(parent);
+}
+
+/**
+ * Clean up parent structures from the parent list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parent_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents))
+		rxq_parent_cleanup(LIST_FIRST(&priv->parents));
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -607,7 +685,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parent_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -632,7 +710,7 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
+	ret = priv_parent_create(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
 	/* Failure, rollback. */
@@ -2522,7 +2600,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2589,7 +2667,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -2771,8 +2849,9 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->channel != NULL)
@@ -3351,15 +3430,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3389,16 +3471,16 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
 		attr.qpg.parent_attrib.rss_child_count =
-			rte_align32pow2(priv->rxqs_n + 1) >> 1;
+			rte_align32pow2(children_n + 1) >> 1;
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3434,13 +3516,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3485,6 +3561,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3492,12 +3570,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3506,9 +3578,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3516,6 +3585,13 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
 	if (!priv->rss) {
 		rxq_mac_addrs_add(&tmpl);
@@ -3583,6 +3659,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3610,6 +3688,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (parent || !priv->rss) {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3627,14 +3815,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3642,17 +3837,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3743,45 +3936,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if ((parent) || (!priv->rss))  {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3792,29 +3946,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3826,21 +3965,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3878,6 +4007,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3912,9 +4042,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3951,7 +4088,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -4003,7 +4139,7 @@ struct txq_mp2mr_mbuf_check_data {
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
 	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4091,7 +4227,7 @@ struct txq_mp2mr_mbuf_check_data {
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
 	if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4225,7 +4361,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parent_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4610,7 +4746,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4655,7 +4791,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4690,7 +4826,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4735,7 +4871,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -5044,7 +5180,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5052,7 +5188,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5066,7 +5202,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5074,7 +5210,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index c46fc23..40e1bad 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -247,6 +248,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -340,7 +345,6 @@ struct priv {
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -349,10 +353,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v6 3/4] net/mlx4: implement isolated mode from flow API
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (17 preceding siblings ...)
  2017-07-02 12:32 ` [PATCH v6 2/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
@ 2017-07-02 12:32 ` Vasily Philipov
  2017-07-02 12:32 ` [PATCH v6 4/4] net/mlx4: support flow API RSS action Vasily Philipov
                   ` (8 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-07-02 12:32 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration,
the default RSS ring isn't created in this case.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 58 +++++++++++++++++++++++++++++++++++---------
 drivers/net/mlx4/mlx4.h      |  1 +
 drivers/net/mlx4/mlx4_flow.c | 37 ++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  5 ++++
 4 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 96f88c6..afad2be 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -666,7 +666,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -710,6 +710,8 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
+	if (priv->isolated)
+		return 0;
 	ret = priv_parent_create(priv, NULL, priv->rxqs_n);
 	if (!ret)
 		return 0;
@@ -2596,6 +2598,7 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
@@ -2845,7 +2848,7 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
@@ -3548,7 +3551,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3593,7 +3596,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return err;
 	}
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3752,7 +3755,7 @@ struct txq_mp2mr_mbuf_check_data {
 		      strerror(ret));
 		return ret;
 	}
-	if (parent || !priv->rss) {
+	if (!priv->isolated && (parent || !priv->rss)) {
 		/* Configure MAC and broadcast addresses. */
 		ret = rxq_mac_addrs_add(rxq);
 		if (ret) {
@@ -4042,7 +4045,7 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (priv->rss) {
+	if (priv->rss && !priv->isolated) {
 		/* The list consists of the single default one. */
 		parent = LIST_FIRST(&priv->parents);
 		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
@@ -4138,7 +4141,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
@@ -4226,7 +4232,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
@@ -4658,6 +4667,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4691,6 +4702,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4738,6 +4755,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4786,7 +4809,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
@@ -4818,6 +4841,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4870,7 +4899,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv_unlock(priv);
 		return;
 	}
-	if (priv->rss) {
+	if (priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
@@ -5009,7 +5038,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5244,6 +5273,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5256,6 +5291,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 40e1bad..6de3484 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -340,6 +340,7 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..5ad50bd 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,43 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..4d007da 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -90,6 +90,11 @@ struct mlx4_flow {
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 };
 
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
+
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v6 4/4] net/mlx4: support flow API RSS action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (18 preceding siblings ...)
  2017-07-02 12:32 ` [PATCH v6 3/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-07-02 12:32 ` Vasily Philipov
  2017-07-04 11:14 ` [PATCH v7 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
                   ` (7 subsequent siblings)
  27 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-07-02 12:32 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

This commit adds support for the flow API RSS action with the following
limitations:

 - Only supported when isolated mode is enabled.
 - The number of queues specified by the action (rte_flow_action_rss.num)
   must be a power of two.
 - Each queue index can be specified at most once in the configuration
   (rte_flow_action_rss.queue[]).
 - Because a queue can be associated with a single RSS context, it cannot
   be targeted by multiple RSS actions simultaneously.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |  21 +++--
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 197 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 211 insertions(+), 15 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index afad2be..bbc1ba4 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -573,9 +573,9 @@ void priv_unlock(struct priv *priv)
  *   The number of entries in queues[].
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   Pointer to a parent rxq structure, NULL on failure.
  */
-static int
+struct rxq *
 priv_parent_create(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
@@ -587,13 +587,15 @@ void priv_unlock(struct priv *priv)
 	parent = rte_zmalloc("parent queue",
 			     sizeof(*parent),
 			     RTE_CACHE_LINE_SIZE);
-	if (!parent)
-		return -ENOMEM;
+	if (!parent) {
+		ERROR("cannot allocate memory for RSS parent queue");
+		return NULL;
+	}
 	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
 			NULL, NULL, children_n, NULL);
 	if (ret) {
 		rte_free(parent);
-		return -ret;
+		return NULL;
 	}
 	parent->rss.queues_n = children_n;
 	if (queues) {
@@ -606,7 +608,7 @@ void priv_unlock(struct priv *priv)
 			parent->rss.queues[i] = i;
 	}
 	LIST_INSERT_HEAD(&priv->parents, parent, next);
-	return 0;
+	return parent;
 }
 
 /**
@@ -655,7 +657,6 @@ void priv_unlock(struct priv *priv)
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
 	unsigned int tmp;
-	int ret;
 
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
@@ -712,14 +713,12 @@ void priv_unlock(struct priv *priv)
 	priv->rxqs_n = rxqs_n;
 	if (priv->isolated)
 		return 0;
-	ret = priv_parent_create(priv, NULL, priv->rxqs_n);
-	if (!ret)
+	if (priv_parent_create(priv, NULL, priv->rxqs_n))
 		return 0;
 	/* Failure, rollback. */
 	priv->rss = 0;
 	priv->rxqs_n = tmp;
-	assert(ret > 0);
-	return ret;
+	return ENOMEM;
 }
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 6de3484..716fd45 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -371,4 +371,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+struct rxq *
+priv_parent_create(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 5ad50bd..8ade106 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,82 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns an existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_parent_get(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int same = 0;
+		unsigned int overlap = 0;
+
+		/*
+		 * Find out whether an appropriate parent queue already exists
+		 * and can be reused, otherwise make sure there are no overlaps.
+		 */
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j) {
+				if (parent->rss.queues[j] != queues[i])
+					continue;
+				++overlap;
+				if (i == j)
+					++same;
+			}
+		}
+		if (same == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp)
+			goto error;
+	}
+	parent = priv_parent_create(priv, queues, children_n);
+	if (!parent) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return parent;
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -819,6 +966,7 @@ struct rte_flow_drop {
 {
 	struct ibv_qp *qp;
 	struct rte_flow *rte_flow;
+	struct rxq *rxq_parent = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -831,9 +979,39 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
+		int ret;
+		unsigned int i;
+		struct rxq *rxq = NULL;
 
-		qp = rxq->qp;
+		if (action->queues_n > 1) {
+			rxq_parent = priv_parent_get(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
+				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
+			}
+		}
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -846,6 +1024,8 @@ struct rte_flow_drop {
 	return rte_flow;
 
 error:
+	if (rxq_parent)
+		rxq_parent_cleanup(rxq_parent);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -909,11 +1089,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 4d007da..beabcf2 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -98,7 +98,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v7 1/4] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (19 preceding siblings ...)
  2017-07-02 12:32 ` [PATCH v6 4/4] net/mlx4: support flow API RSS action Vasily Philipov
@ 2017-07-04 11:14 ` Vasily Philipov
  2017-07-04 15:20   ` Adrien Mazarguil
  2017-07-04 11:14 ` [PATCH v7 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (6 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-07-04 11:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Providing this parameter requests flow API isolated mode on all ports at
initialization time. It ensures all traffic is received through the
configured flow rules only (see flow command).

Ports that do not support this mode are automatically discarded.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..e313871 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+			if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+				isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index b3ad83b..864a2a8 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -270,6 +270,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1425,6 +1430,15 @@ static int eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 364502d..d5fc9ad 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v7 2/4] net/mlx4: implement isolated mode from flow API
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (20 preceding siblings ...)
  2017-07-04 11:14 ` [PATCH v7 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-07-04 11:14 ` Vasily Philipov
  2017-07-04 15:20   ` Adrien Mazarguil
  2017-07-04 11:14 ` [PATCH v7 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
                   ` (5 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-07-04 11:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 57 +++++++++++++++++++++++++++++++++++---------
 drivers/net/mlx4/mlx4.h      |  1 +
 drivers/net/mlx4/mlx4_flow.c | 37 ++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  5 ++++
 4 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 16cafae..fdd9cce 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -588,7 +588,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -2518,6 +2518,7 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
@@ -2767,12 +2768,13 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->channel != NULL)
@@ -3472,7 +3474,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3517,7 +3519,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return err;
 	};
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3773,7 +3775,7 @@ struct txq_mp2mr_mbuf_check_data {
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	if ((parent) || (!priv->rss))  {
+	if (!priv->isolated && (parent || !priv->rss)) {
 		/* Configure MAC and broadcast addresses. */
 		ret = rxq_mac_addrs_add(&tmpl);
 		if (ret) {
@@ -4002,7 +4004,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = &priv->rxq_parent;
 		r = 1;
 	} else {
@@ -4090,7 +4095,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = &priv->rxq_parent;
 		r = 1;
 	} else {
@@ -4522,6 +4530,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4555,6 +4565,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4602,6 +4618,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4650,7 +4672,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
@@ -4682,6 +4704,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4730,7 +4758,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->allmulti) {
+	if (!priv->allmulti || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
@@ -4873,7 +4901,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5108,6 +5136,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5120,6 +5154,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index c46fc23..1119525 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -335,6 +335,7 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..5ad50bd 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,43 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..4d007da 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -90,6 +90,11 @@ struct mlx4_flow {
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 };
 
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
+
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v7 3/4] net/mlx4: refactor RSS parent queue allocation
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (21 preceding siblings ...)
  2017-07-04 11:14 ` [PATCH v7 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-07-04 11:14 ` Vasily Philipov
  2017-07-04 15:20   ` Adrien Mazarguil
  2017-07-04 11:14 ` [PATCH v7 4/4] net/mlx4: support flow API RSS action Vasily Philipov
                   ` (4 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-07-04 11:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

A special "parent" queue must be allocated in addition to a group of
standard Rx queues for RSS to work. This is done automatically outside of
isolated mode by the PMD when applications request several Rx queues.

Since each configured flow rule with the RSS action may target a different
set of queues, the PMD must have the ability to dynamically allocate
several parent queues, one per RSS group.

If isolated mode was requested the default RSS parent queue isn't created
in this case.

Refactor RSS parent queue allocations (currently limited to a single
parent) in preparation for flow API RSS action support.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c | 380 ++++++++++++++++++++++++++++++++----------------
 drivers/net/mlx4/mlx4.h |  17 ++-
 2 files changed, 274 insertions(+), 123 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index fdd9cce..6459c86 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -552,13 +552,93 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new parent is inserted in front of the list in the private structure.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_parent_create(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent) {
+		ERROR("cannot allocate memory for RSS parent queue");
+		return NULL;
+	}
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return NULL;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return parent;
+}
+
+/**
+ * Clean up RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	LIST_REMOVE(parent, next);
+	rxq_cleanup(parent);
+	rte_free(parent);
+}
+
+/**
+ * Clean up parent structures from the parent list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parent_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents))
+		rxq_parent_cleanup(LIST_FIRST(&priv->parents));
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -577,7 +657,6 @@ void priv_unlock(struct priv *priv)
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
 	unsigned int tmp;
-	int ret;
 
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
@@ -607,7 +686,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parent_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -632,14 +711,14 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
-	if (!ret)
+	if (priv->isolated)
+		return 0;
+	if (priv_parent_create(priv, NULL, priv->rxqs_n))
 		return 0;
 	/* Failure, rollback. */
 	priv->rss = 0;
 	priv->rxqs_n = tmp;
-	assert(ret > 0);
-	return ret;
+	return ENOMEM;
 }
 
 /**
@@ -2523,7 +2602,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2590,7 +2669,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -3353,15 +3432,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3391,16 +3473,16 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
 		attr.qpg.parent_attrib.rss_child_count =
-			rte_align32pow2(priv->rxqs_n + 1) >> 1;
+			rte_align32pow2(children_n + 1) >> 1;
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3436,13 +3518,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3487,6 +3563,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3494,12 +3572,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3508,9 +3580,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3518,6 +3587,13 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
 	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
@@ -3585,6 +3661,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3612,6 +3690,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (!priv->isolated && (parent || !priv->rss)) {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3629,14 +3817,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3644,17 +3839,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3745,45 +3938,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if (!priv->isolated && (parent || !priv->rss)) {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3794,29 +3948,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3828,21 +3967,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3880,6 +4009,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3914,9 +4044,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss && !priv->isolated) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3953,7 +4090,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -4008,7 +4144,7 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq = NULL;
 		r = 1;
 	} else if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4099,7 +4235,7 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq = NULL;
 		r = 1;
 	} else if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4233,7 +4369,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parent_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4632,7 +4768,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4677,7 +4813,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4718,7 +4854,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4763,7 +4899,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -5072,7 +5208,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5080,7 +5216,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5094,7 +5230,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5102,7 +5238,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1119525..6de3484 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -247,6 +248,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -341,7 +346,6 @@ struct priv {
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -350,10 +354,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v7 4/4] net/mlx4: support flow API RSS action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (22 preceding siblings ...)
  2017-07-04 11:14 ` [PATCH v7 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
@ 2017-07-04 11:14 ` Vasily Philipov
  2017-07-04 15:21   ` Adrien Mazarguil
  2017-07-05  8:14 ` [PATCH v8 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
                   ` (3 subsequent siblings)
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-07-04 11:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

This commit adds support for the flow API RSS action with the following
limitations:

 - Only supported when isolated mode is enabled.
 - The number of queues specified by the action (rte_flow_action_rss.num)
   must be a power of two.
 - Each queue index can be specified at most once in the configuration
   (rte_flow_action_rss.queue[]).
 - Because a queue can be associated with a single RSS context, it cannot
   be targeted by multiple RSS actions simultaneously.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |   2 +-
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 197 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 202 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 6459c86..f09d77c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -575,7 +575,7 @@ void priv_unlock(struct priv *priv)
  * @return
  *   Pointer to a parent rxq structure, NULL on failure.
  */
-static struct rxq *
+struct rxq *
 priv_parent_create(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 6de3484..716fd45 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -371,4 +371,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+struct rxq *
+priv_parent_create(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 5ad50bd..8ade106 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,82 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns an existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_parent_get(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int same = 0;
+		unsigned int overlap = 0;
+
+		/*
+		 * Find out whether an appropriate parent queue already exists
+		 * and can be reused, otherwise make sure there are no overlaps.
+		 */
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j) {
+				if (parent->rss.queues[j] != queues[i])
+					continue;
+				++overlap;
+				if (i == j)
+					++same;
+			}
+		}
+		if (same == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp)
+			goto error;
+	}
+	parent = priv_parent_create(priv, queues, children_n);
+	if (!parent) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return parent;
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -819,6 +966,7 @@ struct rte_flow_drop {
 {
 	struct ibv_qp *qp;
 	struct rte_flow *rte_flow;
+	struct rxq *rxq_parent = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -831,9 +979,39 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
+		int ret;
+		unsigned int i;
+		struct rxq *rxq = NULL;
 
-		qp = rxq->qp;
+		if (action->queues_n > 1) {
+			rxq_parent = priv_parent_get(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
+				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
+			}
+		}
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -846,6 +1024,8 @@ struct rte_flow_drop {
 	return rte_flow;
 
 error:
+	if (rxq_parent)
+		rxq_parent_cleanup(rxq_parent);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -909,11 +1089,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 4d007da..beabcf2 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -98,7 +98,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH v7 1/4] app/testpmd: add isolated mode parameter
  2017-07-04 11:14 ` [PATCH v7 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-07-04 15:20   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-04 15:20 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Tue, Jul 04, 2017 at 11:22:48AM +0000, Vasily Philipov wrote:
> Providing this parameter requests flow API isolated mode on all ports at
> initialization time. It ensures all traffic is received through the
> configured flow rules only (see flow command).
> 
> Ports that do not support this mode are automatically discarded.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Thanks for reordering the series. For this commit:

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v7 2/4] net/mlx4: implement isolated mode from flow API
  2017-07-04 11:14 ` [PATCH v7 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-07-04 15:20   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-04 15:20 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Tue, Jul 04, 2017 at 11:22:49AM +0000, Vasily Philipov wrote:
> The user must request isolated mode before device configuration.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v7 3/4] net/mlx4: refactor RSS parent queue allocation
  2017-07-04 11:14 ` [PATCH v7 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
@ 2017-07-04 15:20   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-04 15:20 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Tue, Jul 04, 2017 at 11:22:50AM +0000, Vasily Philipov wrote:
> A special "parent" queue must be allocated in addition to a group of
> standard Rx queues for RSS to work. This is done automatically outside of
> isolated mode by the PMD when applications request several Rx queues.
> 
> Since each configured flow rule with the RSS action may target a different
> set of queues, the PMD must have the ability to dynamically allocate
> several parent queues, one per RSS group.
> 
> If isolated mode was requested the default RSS parent queue isn't created
> in this case.
> 
> Refactor RSS parent queue allocations (currently limited to a single
> parent) in preparation for flow API RSS action support.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Thanks for making the requested changes.

There is a remaining issue with this patch, creating a flow in isolated mode
causes a crash due to RX QPs not being allocated. You should temporarily
make priv_flow_create_action_queue() call rxq_create_qp() when the target QP
does not exist.

Patch looks otherwise fine.

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v7 4/4] net/mlx4: support flow API RSS action
  2017-07-04 11:14 ` [PATCH v7 4/4] net/mlx4: support flow API RSS action Vasily Philipov
@ 2017-07-04 15:21   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-04 15:21 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Tue, Jul 04, 2017 at 11:22:51AM +0000, Vasily Philipov wrote:
> This commit adds support for the flow API RSS action with the following
> limitations:
> 
>  - Only supported when isolated mode is enabled.
>  - The number of queues specified by the action (rte_flow_action_rss.num)
>    must be a power of two.
>  - Each queue index can be specified at most once in the configuration
>    (rte_flow_action_rss.queue[]).
>  - Because a queue can be associated with a single RSS context, it cannot
>    be targeted by multiple RSS actions simultaneously.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Looks good, assuming crash from patch 3/4 is addressed:

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH v8 1/4] app/testpmd: add isolated mode parameter
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (23 preceding siblings ...)
  2017-07-04 11:14 ` [PATCH v7 4/4] net/mlx4: support flow API RSS action Vasily Philipov
@ 2017-07-05  8:14 ` Vasily Philipov
  2017-07-05 14:49   ` Adrien Mazarguil
                     ` (2 more replies)
  2017-07-05  8:14 ` [PATCH v8 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (2 subsequent siblings)
  27 siblings, 3 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-07-05  8:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

Providing this parameter requests flow API isolated mode on all ports at
initialization time. It ensures all traffic is received through the
configured flow rules only (see flow command).

Ports that do not support this mode are automatically discarded.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 app/test-pmd/parameters.c |  3 +++
 app/test-pmd/testpmd.c    | 14 ++++++++++++++
 app/test-pmd/testpmd.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index fbe6284..e313871 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -623,6 +623,7 @@
 		{ "tx-queue-stats-mapping",	1, 0, 0 },
 		{ "rx-queue-stats-mapping",	1, 0, 0 },
 		{ "no-flush-rx",	0, 0, 0 },
+		{ "isolated-mode",	        0, 0, 0 },
 		{ "txpkts",			1, 0, 0 },
 		{ "disable-link-check",		0, 0, 0 },
 		{ "no-lsc-interrupt",		0, 0, 0 },
@@ -1081,6 +1082,8 @@
 				lsc_interrupt = 0;
 			if (!strcmp(lgopts[opt_idx].name, "no-rmv-interrupt"))
 				rmv_interrupt = 0;
+			if (!strcmp(lgopts[opt_idx].name, "isolated-mode"))
+				isolated_mode = 1;
 			if (!strcmp(lgopts[opt_idx].name, "print-event"))
 				if (parse_event_printing_config(optarg, 1)) {
 					rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index b3ad83b..864a2a8 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -270,6 +270,11 @@ struct fwd_engine * fwd_engines[] = {
 uint8_t no_flush_rx = 0; /* flush by default */
 
 /*
+ * Flow API isolated mode.
+ */
+uint8_t isolated_mode;
+
+/*
  * Avoids to check link status when starting/stopping a port.
  */
 uint8_t no_link_check = 0; /* check by default */
@@ -1425,6 +1430,15 @@ static int eth_event_callback(uint8_t port_id,
 		if (port->need_reconfig > 0) {
 			port->need_reconfig = 0;
 
+			if (isolated_mode) {
+				int ret = port_flow_isolate(pi, 1);
+				if (ret) {
+					printf("Failed to apply isolated"
+					       " mode on port %d\n", pi);
+					return -1;
+				}
+			}
+
 			printf("Configuring Port %d (socket %u)\n", pi,
 					port->socket_id);
 			/* configure port */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 364502d..d5fc9ad 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -303,6 +303,7 @@ struct queue_stats_mappings {
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t isolated_mode; /**<set by "--isolated-mode */
 extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v8 2/4] net/mlx4: implement isolated mode from flow API
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (24 preceding siblings ...)
  2017-07-05  8:14 ` [PATCH v8 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-07-05  8:14 ` Vasily Philipov
  2017-07-05 14:49   ` Adrien Mazarguil
  2017-07-05  8:14 ` [PATCH v8 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
  2017-07-05  8:14 ` [PATCH v8 4/4] net/mlx4: support flow API RSS action Vasily Philipov
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-07-05  8:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

The user must request isolated mode before device configuration.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 57 +++++++++++++++++++++++++++++++++++---------
 drivers/net/mlx4/mlx4.h      |  1 +
 drivers/net/mlx4/mlx4_flow.c | 37 ++++++++++++++++++++++++++++
 drivers/net/mlx4/mlx4_flow.h |  5 ++++
 4 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 16cafae..fdd9cce 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -588,7 +588,7 @@ void priv_unlock(struct priv *priv)
 	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
-	if (!rte_is_power_of_2(rxqs_n)) {
+	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
 		unsigned n_active;
 
 		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
@@ -2518,6 +2518,7 @@ struct txq_mp2mr_mbuf_check_data {
 {
 	unsigned int i;
 
+	assert(!priv->isolated);
 	assert(mac_index < elemof(priv->mac));
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
@@ -2767,12 +2768,13 @@ struct txq_mp2mr_mbuf_check_data {
 						rxq->if_cq,
 						&params));
 	}
-	if (rxq->qp != NULL) {
+	if (rxq->qp != NULL && !rxq->priv->isolated) {
 		rxq_promiscuous_disable(rxq);
 		rxq_allmulticast_disable(rxq);
 		rxq_mac_addrs_del(rxq);
-		claim_zero(ibv_destroy_qp(rxq->qp));
 	}
+	if (rxq->qp != NULL)
+		claim_zero(ibv_destroy_qp(rxq->qp));
 	if (rxq->cq != NULL)
 		claim_zero(ibv_destroy_cq(rxq->cq));
 	if (rxq->channel != NULL)
@@ -3472,7 +3474,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return 0;
 	}
 	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_allmulticast_disable(&tmpl);
 		rxq_promiscuous_disable(&tmpl);
 		rxq_mac_addrs_del(&tmpl);
@@ -3517,7 +3519,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return err;
 	};
 	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss) {
+	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
 		if (priv->promisc)
 			rxq_promiscuous_enable(&tmpl);
@@ -3773,7 +3775,7 @@ struct txq_mp2mr_mbuf_check_data {
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	if ((parent) || (!priv->rss))  {
+	if (!priv->isolated && (parent || !priv->rss)) {
 		/* Configure MAC and broadcast addresses. */
 		ret = rxq_mac_addrs_add(&tmpl);
 		if (ret) {
@@ -4002,7 +4004,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = &priv->rxq_parent;
 		r = 1;
 	} else {
@@ -4090,7 +4095,10 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->rss) {
+	if (priv->isolated) {
+		rxq = NULL;
+		r = 1;
+	} else if (priv->rss) {
 		rxq = &priv->rxq_parent;
 		r = 1;
 	} else {
@@ -4522,6 +4530,8 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated)
+		goto end;
 	DEBUG("%p: removing MAC address from index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4555,6 +4565,12 @@ struct txq_mp2mr_mbuf_check_data {
 		return -ENOTSUP;
 	(void)vmdq;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot add MAC address, "
+		      "device is in isolated mode", (void *)dev);
+		re = EPERM;
+		goto end;
+	}
 	DEBUG("%p: adding MAC address at index %" PRIu32,
 	      (void *)dev, index);
 	/* Last array entry is reserved for broadcast. */
@@ -4602,6 +4618,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable promiscuous, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->promisc) {
 		priv_unlock(priv);
 		return;
@@ -4650,7 +4672,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->promisc) {
+	if (!priv->promisc || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
@@ -4682,6 +4704,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot enable allmulticast, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return;
+	}
 	if (priv->allmulti) {
 		priv_unlock(priv);
 		return;
@@ -4730,7 +4758,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return;
 	priv_lock(priv);
-	if (!priv->allmulti) {
+	if (!priv->allmulti || priv->isolated) {
 		priv_unlock(priv);
 		return;
 	}
@@ -4873,7 +4901,7 @@ struct txq_mp2mr_mbuf_check_data {
 		}
 		/* Reenable non-RSS queue attributes. No need to check
 		 * for errors at this stage. */
-		if (!priv->rss) {
+		if (!priv->rss && !priv->isolated) {
 			rxq_mac_addrs_add(rxq);
 			if (priv->promisc)
 				rxq_promiscuous_enable(rxq);
@@ -5108,6 +5136,12 @@ struct txq_mp2mr_mbuf_check_data {
 	if (mlx4_is_secondary())
 		return -E_RTE_SECONDARY;
 	priv_lock(priv);
+	if (priv->isolated) {
+		DEBUG("%p: cannot set vlan filter, "
+		      "device is in isolated mode", (void *)dev);
+		priv_unlock(priv);
+		return -EINVAL;
+	}
 	ret = vlan_filter_set(dev, vlan_id, on);
 	priv_unlock(priv);
 	assert(ret >= 0);
@@ -5120,6 +5154,7 @@ struct txq_mp2mr_mbuf_check_data {
 	.destroy = mlx4_flow_destroy,
 	.flush = mlx4_flow_flush,
 	.query = NULL,
+	.isolate = mlx4_flow_isolate,
 };
 
 /**
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index c46fc23..1119525 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -335,6 +335,7 @@ struct priv {
 	unsigned int rss:1; /* RSS is enabled. */
 	unsigned int vf:1; /* This is a VF device. */
 	unsigned int pending_alarm:1; /* An alarm is pending. */
+	unsigned int isolated:1; /* Toggle isolated mode. */
 #ifdef INLINE_RECV
 	unsigned int inl_recv_size; /* Inline recv size */
 #endif
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index edfac03..5ad50bd 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -957,6 +957,43 @@ struct rte_flow *
 }
 
 /**
+ * @see rte_flow_isolate()
+ *
+ * Must be done before calling dev_configure().
+ *
+ * @param dev
+ *   Pointer to the ethernet device structure.
+ * @param enable
+ *   Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative value on error.
+ */
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	if (priv->rxqs) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "isolated mode must be set"
+				   " before configuring the device");
+		priv_unlock(priv);
+		return -rte_errno;
+	}
+	priv->isolated = !!enable;
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
  * Destroy a flow.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 12a293e..4d007da 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -90,6 +90,11 @@ struct mlx4_flow {
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
 };
 
+int
+mlx4_flow_isolate(struct rte_eth_dev *dev,
+		  int enable,
+		  struct rte_flow_error *error);
+
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v8 3/4] net/mlx4: refactor RSS parent queue allocation
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (25 preceding siblings ...)
  2017-07-05  8:14 ` [PATCH v8 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-07-05  8:14 ` Vasily Philipov
  2017-07-05 14:49   ` Adrien Mazarguil
  2017-07-05  8:14 ` [PATCH v8 4/4] net/mlx4: support flow API RSS action Vasily Philipov
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-07-05  8:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

A special "parent" queue must be allocated in addition to a group of
standard Rx queues for RSS to work. This is done automatically outside of
isolated mode by the PMD when applications request several Rx queues.

Since each configured flow rule with the RSS action may target a different
set of queues, the PMD must have the ability to dynamically allocate
several parent queues, one per RSS group.

If isolated mode was requested the default RSS parent queue isn't created
in this case.

Refactor RSS parent queue allocations (currently limited to a single
parent) in preparation for flow API RSS action support.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 382 +++++++++++++++++++++++++++++--------------
 drivers/net/mlx4/mlx4.h      |  17 +-
 drivers/net/mlx4/mlx4_flow.c |  15 ++
 3 files changed, 291 insertions(+), 123 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index fdd9cce..0557c7c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -552,13 +552,93 @@ void priv_unlock(struct priv *priv)
 
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp);
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent);
 
 static void
 rxq_cleanup(struct rxq *rxq);
 
 /**
+ * Create RSS parent queue.
+ *
+ * The new parent is inserted in front of the list in the private structure.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues indices array, if NULL use all Rx queues.
+ * @param children_n
+ *   The number of entries in queues[].
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_parent_create(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n)
+{
+	int ret;
+	uint16_t i;
+	struct rxq *parent;
+
+	parent = rte_zmalloc("parent queue",
+			     sizeof(*parent),
+			     RTE_CACHE_LINE_SIZE);
+	if (!parent) {
+		ERROR("cannot allocate memory for RSS parent queue");
+		return NULL;
+	}
+	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
+			NULL, NULL, children_n, NULL);
+	if (ret) {
+		rte_free(parent);
+		return NULL;
+	}
+	parent->rss.queues_n = children_n;
+	if (queues) {
+		for (i = 0; i < children_n; ++i)
+			parent->rss.queues[i] = queues[i];
+	} else {
+		/* the default RSS ring case */
+		assert(priv->rxqs_n == children_n);
+		for (i = 0; i < priv->rxqs_n; ++i)
+			parent->rss.queues[i] = i;
+	}
+	LIST_INSERT_HEAD(&priv->parents, parent, next);
+	return parent;
+}
+
+/**
+ * Clean up RX queue parent structure.
+ *
+ * @param parent
+ *   RX queue parent structure.
+ */
+void
+rxq_parent_cleanup(struct rxq *parent)
+{
+	LIST_REMOVE(parent, next);
+	rxq_cleanup(parent);
+	rte_free(parent);
+}
+
+/**
+ * Clean up parent structures from the parent list.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_parent_list_cleanup(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->parents))
+		rxq_parent_cleanup(LIST_FIRST(&priv->parents));
+}
+
+/**
  * Ethernet device configuration.
  *
  * Prepare the driver for a given number of TX and RX queues.
@@ -577,7 +657,6 @@ void priv_unlock(struct priv *priv)
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
 	unsigned int tmp;
-	int ret;
 
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
@@ -607,7 +686,7 @@ void priv_unlock(struct priv *priv)
 		for (i = 0; (i != priv->rxqs_n); ++i)
 			if ((*priv->rxqs)[i] != NULL)
 				return EINVAL;
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parent_list_cleanup(priv);
 		priv->rss = 0;
 		priv->rxqs_n = 0;
 	}
@@ -632,14 +711,16 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
-	if (!ret)
+	if (priv->isolated) {
+		priv->rss = 0;
+		return 0;
+	}
+	if (priv_parent_create(priv, NULL, priv->rxqs_n))
 		return 0;
 	/* Failure, rollback. */
 	priv->rss = 0;
 	priv->rxqs_n = tmp;
-	assert(ret > 0);
-	return ret;
+	return ENOMEM;
 }
 
 /**
@@ -2523,7 +2604,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
 		return;
 	if (priv->rss) {
-		rxq_mac_addr_del(&priv->rxq_parent, mac_index);
+		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
 		goto end;
 	}
 	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
@@ -2590,7 +2671,7 @@ struct txq_mp2mr_mbuf_check_data {
 		goto end;
 	}
 	if (priv->rss) {
-		ret = rxq_mac_addr_add(&priv->rxq_parent, mac_index);
+		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
 		if (ret)
 			return ret;
 		goto end;
@@ -3353,15 +3434,18 @@ struct txq_mp2mr_mbuf_check_data {
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
- * @param parent
- *   If nonzero, create a parent QP, otherwise a child.
+ * @param children_n
+ *   If nonzero, a number of children for parent QP and zero for a child.
+ * @param rxq_parent
+ *   Pointer for a parent in a child case, NULL otherwise.
  *
  * @return
  *   QP pointer or NULL in case of error.
  */
 static struct ibv_qp *
 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int parent, struct ibv_exp_res_domain *rd)
+		 int children_n, struct ibv_exp_res_domain *rd,
+		 struct rxq *rxq_parent)
 {
 	struct ibv_exp_qp_init_attr attr = {
 		/* CQ to be associated with the send queue. */
@@ -3391,16 +3475,16 @@ struct txq_mp2mr_mbuf_check_data {
 	attr.max_inl_recv = priv->inl_recv_size,
 	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
 #endif
-	if (parent) {
+	if (children_n > 0) {
 		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
 		/* TSS isn't necessary. */
 		attr.qpg.parent_attrib.tss_child_count = 0;
 		attr.qpg.parent_attrib.rss_child_count =
-			rte_align32pow2(priv->rxqs_n + 1) >> 1;
+			rte_align32pow2(children_n + 1) >> 1;
 		DEBUG("initializing parent RSS queue");
 	} else {
 		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = priv->rxq_parent.qp;
+		attr.qpg.qpg_parent = rxq_parent->qp;
 		DEBUG("initializing child RSS queue");
 	}
 	return ibv_exp_create_qp(priv->ctx, &attr);
@@ -3436,13 +3520,7 @@ struct txq_mp2mr_mbuf_check_data {
 	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int err;
-	int parent = (rxq == &priv->rxq_parent);
 
-	if (parent) {
-		ERROR("%p: cannot rehash parent queue %p",
-		      (void *)dev, (void *)rxq);
-		return EINVAL;
-	}
 	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
 	/* Number of descriptors and mbufs currently allocated. */
@@ -3487,6 +3565,8 @@ struct txq_mp2mr_mbuf_check_data {
 	}
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize QP. */
+	if (!tmpl.qp)
+		goto skip_init;
 	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
 	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
 	if (err) {
@@ -3494,12 +3574,6 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	}
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
 	mod = (struct ibv_exp_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
@@ -3508,9 +3582,6 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	err = ibv_exp_modify_qp(tmpl.qp, &mod,
 				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
 				 IBV_EXP_QP_PORT));
 	if (err) {
 		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
@@ -3518,6 +3589,13 @@ struct txq_mp2mr_mbuf_check_data {
 		assert(err > 0);
 		return err;
 	};
+skip_init:
+	err = ibv_resize_cq(tmpl.cq, desc_n);
+	if (err) {
+		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+		assert(err > 0);
+		return err;
+	}
 	/* Reconfigure flows. Do not care for errors. */
 	if (!priv->rss && !priv->isolated) {
 		rxq_mac_addrs_add(&tmpl);
@@ -3585,6 +3663,8 @@ struct txq_mp2mr_mbuf_check_data {
 	rxq->elts_n = 0;
 	rte_free(rxq->elts.sp);
 	rxq->elts.sp = NULL;
+	if (!tmpl.qp)
+		goto skip_rtr;
 	/* Post WRs. */
 	err = ibv_post_recv(tmpl.qp,
 			    (tmpl.sp ?
@@ -3612,6 +3692,116 @@ struct txq_mp2mr_mbuf_check_data {
 }
 
 /**
+ * Create verbs QP resources associated with a rxq.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param inactive
+ *   If true, the queue is disabled because its index is higher or
+ *   equal to the real number of queues, which must be a power of 2.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure for a child in RSS case,
+ *   NULL for parent.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent)
+{
+	int ret;
+	struct ibv_exp_qp_attr mod;
+	struct ibv_exp_query_intf_params params;
+	enum ibv_exp_query_intf_status status;
+	struct ibv_recv_wr *bad_wr;
+	int parent = (children_n > 0);
+	struct priv *priv = rxq->priv;
+
+#ifdef RSS_SUPPORT
+	if (priv->rss && !inactive && (rxq_parent || parent))
+		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
+					   children_n, rxq->rd,
+					   rxq_parent);
+	else
+#endif /* RSS_SUPPORT */
+		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
+	if (rxq->qp == NULL) {
+		ret = (errno ? errno : EINVAL);
+		ERROR("QP creation failure: %s",
+		      strerror(ret));
+		return ret;
+	}
+	mod = (struct ibv_exp_qp_attr){
+		/* Move the QP to this state. */
+		.qp_state = IBV_QPS_INIT,
+		/* Primary port number. */
+		.port_num = priv->port
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod,
+				(IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+				 IBV_EXP_QP_PORT));
+	if (ret) {
+		ERROR("QP state to IBV_QPS_INIT failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	if (!priv->isolated && (parent || !priv->rss)) {
+		/* Configure MAC and broadcast addresses. */
+		ret = rxq_mac_addrs_add(rxq);
+		if (ret) {
+			ERROR("QP flow attachment failed: %s",
+			      strerror(ret));
+			return ret;
+		}
+	}
+	if (!parent) {
+		ret = ibv_post_recv(rxq->qp,
+				    (rxq->sp ?
+				     &(*rxq->elts.sp)[0].wr :
+				     &(*rxq->elts.no_sp)[0].wr),
+				    &bad_wr);
+		if (ret) {
+			ERROR("ibv_post_recv() failed for WR %p: %s",
+			      (void *)bad_wr,
+			      strerror(ret));
+			return ret;
+		}
+	}
+	mod = (struct ibv_exp_qp_attr){
+		.qp_state = IBV_QPS_RTR
+	};
+	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
+	if (ret) {
+		ERROR("QP state to IBV_QPS_RTR failed: %s",
+		      strerror(ret));
+		return ret;
+	}
+	params = (struct ibv_exp_query_intf_params){
+		.intf_scope = IBV_EXP_INTF_GLOBAL,
+		.intf = IBV_EXP_INTF_QP_BURST,
+		.obj = rxq->qp,
+	};
+	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
+	if (rxq->if_qp == NULL) {
+		ERROR("QP interface family query failed with status %d",
+		      status);
+		return errno;
+	}
+	return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -3629,14 +3819,21 @@ struct txq_mp2mr_mbuf_check_data {
  *   Thresholds parameters.
  * @param mp
  *   Memory pool for buffer allocations.
+ * @param children_n
+ *   The number of children in a parent case, zero for a child.
+ * @param rxq_parent
+ *   The pointer to a parent RX structure (or NULL) in a child case,
+ *   NULL for parent.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static int
 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp)
+	  unsigned int socket, int inactive,
+	  const struct rte_eth_rxconf *conf,
+	  struct rte_mempool *mp, int children_n,
+	  struct rxq *rxq_parent)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct rxq tmpl = {
@@ -3644,17 +3841,15 @@ struct txq_mp2mr_mbuf_check_data {
 		.mp = mp,
 		.socket = socket
 	};
-	struct ibv_exp_qp_attr mod;
 	union {
 		struct ibv_exp_query_intf_params params;
 		struct ibv_exp_cq_init_attr cq;
 		struct ibv_exp_res_domain_init_attr rd;
 	} attr;
 	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
 	unsigned int mb_len;
 	int ret = 0;
-	int parent = (rxq == &priv->rxq_parent);
+	int parent = (children_n > 0);
 
 	(void)conf; /* Thresholds configuration (ignored). */
 	/*
@@ -3745,45 +3940,6 @@ struct txq_mp2mr_mbuf_check_data {
 	      priv->device_attr.max_qp_wr);
 	DEBUG("priv->device_attr.max_sge is %d",
 	      priv->device_attr.max_sge);
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive)
-		tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
-					   tmpl.rd);
-	else
-#endif /* RSS_SUPPORT */
-		tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if (!priv->isolated && (parent || !priv->rss)) {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(&tmpl);
-		if (ret) {
-			ERROR("%p: QP flow attachment failed: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
 	/* Allocate descriptors for RX queues, except for the RSS parent. */
 	if (parent)
 		goto skip_alloc;
@@ -3794,29 +3950,14 @@ struct txq_mp2mr_mbuf_check_data {
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (ret) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(ret));
-		goto error;
+		return ret;
 	}
 skip_alloc:
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
+	if (parent || rxq_parent || !priv->rss) {
+		ret = rxq_create_qp(&tmpl, desc, inactive,
+				    children_n, rxq_parent);
+		if (ret)
+			goto error;
 	}
 	/* Save port ID. */
 	tmpl.port_id = dev->data->port_id;
@@ -3828,21 +3969,11 @@ struct txq_mp2mr_mbuf_check_data {
 	};
 	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
 	if (tmpl.if_cq == NULL) {
+		ret = EINVAL;
 		ERROR("%p: CQ interface family query failed with status %d",
 		      (void *)dev, status);
 		goto error;
 	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
 	rxq_cleanup(rxq);
@@ -3880,6 +4011,7 @@ struct txq_mp2mr_mbuf_check_data {
 		    unsigned int socket, const struct rte_eth_rxconf *conf,
 		    struct rte_mempool *mp)
 {
+	struct rxq *parent;
 	struct priv *priv = dev->data->dev_private;
 	struct rxq *rxq = (*priv->rxqs)[idx];
 	int inactive = 0;
@@ -3914,9 +4046,16 @@ struct txq_mp2mr_mbuf_check_data {
 			return -ENOMEM;
 		}
 	}
-	if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-		inactive = 1;
-	ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
+	if (priv->rss && !priv->isolated) {
+		/* The list consists of the single default one. */
+		parent = LIST_FIRST(&priv->parents);
+		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+			inactive = 1;
+	} else {
+		parent = NULL;
+	}
+	ret = rxq_setup(dev, rxq, desc, socket,
+			inactive, conf, mp, 0, parent);
 	if (ret)
 		rte_free(rxq);
 	else {
@@ -3953,7 +4092,6 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	priv = rxq->priv;
 	priv_lock(priv);
-	assert(rxq != &priv->rxq_parent);
 	for (i = 0; (i != priv->rxqs_n); ++i)
 		if ((*priv->rxqs)[i] == rxq) {
 			DEBUG("%p: removing RX queue %p from list",
@@ -4008,7 +4146,7 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq = NULL;
 		r = 1;
 	} else if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4099,7 +4237,7 @@ struct txq_mp2mr_mbuf_check_data {
 		rxq = NULL;
 		r = 1;
 	} else if (priv->rss) {
-		rxq = &priv->rxq_parent;
+		rxq = LIST_FIRST(&priv->parents);
 		r = 1;
 	} else {
 		rxq = (*priv->rxqs)[0];
@@ -4233,7 +4371,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->txqs = NULL;
 	}
 	if (priv->rss)
-		rxq_cleanup(&priv->rxq_parent);
+		priv_parent_list_cleanup(priv);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
@@ -4632,7 +4770,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_promiscuous_enable(&priv->rxq_parent);
+		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4677,7 +4815,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_promiscuous_disable(&priv->rxq_parent);
+		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -4718,7 +4856,7 @@ struct txq_mp2mr_mbuf_check_data {
 	if (!priv->started)
 		goto end;
 	if (priv->rss) {
-		ret = rxq_allmulticast_enable(&priv->rxq_parent);
+		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
 		if (ret) {
 			priv_unlock(priv);
 			return;
@@ -4763,7 +4901,7 @@ struct txq_mp2mr_mbuf_check_data {
 		return;
 	}
 	if (priv->rss) {
-		rxq_allmulticast_disable(&priv->rxq_parent);
+		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
 		goto end;
 	}
 	for (i = 0; (i != priv->rxqs_n); ++i)
@@ -5072,7 +5210,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5080,7 +5218,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 1;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
@@ -5094,7 +5232,7 @@ struct txq_mp2mr_mbuf_check_data {
 		 * Rehashing flows in all RX queues is necessary.
 		 */
 		if (priv->rss)
-			rxq_mac_addrs_del(&priv->rxq_parent);
+			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
 		else
 			for (i = 0; (i != priv->rxqs_n); ++i)
 				if ((*priv->rxqs)[i] != NULL)
@@ -5102,7 +5240,7 @@ struct txq_mp2mr_mbuf_check_data {
 		priv->vlan_filter[j].enabled = 0;
 		if (priv->started) {
 			if (priv->rss)
-				rxq_mac_addrs_add(&priv->rxq_parent);
+				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
 			else
 				for (i = 0; (i != priv->rxqs_n); ++i) {
 					if ((*priv->rxqs)[i] == NULL)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1119525..6de3484 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -219,6 +219,7 @@ struct rxq_elt {
 
 /* RX queue descriptor. */
 struct rxq {
+	LIST_ENTRY(rxq) next; /* Used by parent queue only */
 	struct priv *priv; /* Back pointer to private data. */
 	struct rte_mempool *mp; /* Memory Pool for allocations. */
 	struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -247,6 +248,10 @@ struct rxq {
 	struct mlx4_rxq_stats stats; /* RX queue counters. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+	struct {
+		uint16_t queues_n;
+		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
+	} rss;
 };
 
 /* TX element. */
@@ -341,7 +346,6 @@ struct priv {
 #endif
 	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
 	/* RX/TX queues. */
-	struct rxq rxq_parent; /* Parent queue when RSS is enabled. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
 	struct rxq *(*rxqs)[]; /* RX queues. */
@@ -350,10 +354,21 @@ struct priv {
 	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
 	LIST_HEAD(mlx4_flows, rte_flow) flows;
 	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
+	LIST_HEAD(mlx4_parents, rxq) parents;
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
 void priv_lock(struct priv *priv);
 void priv_unlock(struct priv *priv);
 
+int
+rxq_create_qp(struct rxq *rxq,
+	      uint16_t desc,
+	      int inactive,
+	      int children_n,
+	      struct rxq *rxq_parent);
+
+void
+rxq_parent_cleanup(struct rxq *parent);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 5ad50bd..1344101 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -831,8 +831,23 @@ struct rte_flow_drop {
 	if (action->drop) {
 		qp = priv->flow_drop_queue->qp;
 	} else {
+		int ret;
 		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
 
+		if (!rxq->qp) {
+			assert(priv->isolated);
+			ret = rxq_create_qp(rxq, rxq->elts_n,
+					    0, 0, NULL);
+			if (ret) {
+				rte_flow_error_set(
+					error,
+					ENOMEM,
+					RTE_FLOW_ERROR_TYPE_HANDLE,
+					NULL,
+					"flow rule creation failure");
+				goto error;
+			}
+		}
 		qp = rxq->qp;
 		rte_flow->qp = qp;
 	}
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH v8 4/4] net/mlx4: support flow API RSS action
  2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
                   ` (26 preceding siblings ...)
  2017-07-05  8:14 ` [PATCH v8 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
@ 2017-07-05  8:14 ` Vasily Philipov
  2017-07-05 14:49   ` Adrien Mazarguil
  27 siblings, 1 reply; 51+ messages in thread
From: Vasily Philipov @ 2017-07-05  8:14 UTC (permalink / raw)
  To: dev; +Cc: Vasily Philipov, Adrien Mazarguil, Nelio Laranjeiro

This commit adds support for the flow API RSS action with the following
limitations:

 - Only supported when isolated mode is enabled.
 - The number of queues specified by the action (rte_flow_action_rss.num)
   must be a power of two.
 - Each queue index can be specified at most once in the configuration
   (rte_flow_action_rss.queue[]).
 - Because a queue can be associated with a single RSS context, it cannot
   be targeted by multiple RSS actions simultaneously.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      |   6 +-
 drivers/net/mlx4/mlx4.h      |   5 ++
 drivers/net/mlx4/mlx4_flow.c | 206 +++++++++++++++++++++++++++++++++++++++----
 drivers/net/mlx4/mlx4_flow.h |   3 +-
 4 files changed, 200 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 0557c7c..f09d77c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -575,7 +575,7 @@ void priv_unlock(struct priv *priv)
  * @return
  *   Pointer to a parent rxq structure, NULL on failure.
  */
-static struct rxq *
+struct rxq *
 priv_parent_create(struct priv *priv,
 		   uint16_t queues[],
 		   uint16_t children_n)
@@ -711,10 +711,8 @@ void priv_unlock(struct priv *priv)
 	priv->rss = 1;
 	tmp = priv->rxqs_n;
 	priv->rxqs_n = rxqs_n;
-	if (priv->isolated) {
-		priv->rss = 0;
+	if (priv->isolated)
 		return 0;
-	}
 	if (priv_parent_create(priv, NULL, priv->rxqs_n))
 		return 0;
 	/* Failure, rollback. */
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 6de3484..716fd45 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -371,4 +371,9 @@ struct priv {
 void
 rxq_parent_cleanup(struct rxq *parent);
 
+struct rxq *
+priv_parent_create(struct priv *priv,
+		   uint16_t queues[],
+		   uint16_t children_n);
+
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 1344101..8ade106 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -112,6 +112,7 @@ struct rte_flow_drop {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_RSS,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -672,6 +673,76 @@ struct rte_flow_drop {
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+			action.queues_n = 1;
+			action.queues[0] = queue->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			int i;
+			int ierr;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (!priv->hw_rss) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used with "
+					   "the current configuration");
+				return -rte_errno;
+			}
+			if (!priv->isolated) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "RSS cannot be used without "
+					   "isolated mode");
+				return -rte_errno;
+			}
+			if (!rte_is_power_of_2(rss->num)) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "should be power of two");
+				return -rte_errno;
+			}
+			if (priv->max_rss_tbl_sz < rss->num) {
+				rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions,
+					   "the number of queues "
+					   "is too large");
+				return -rte_errno;
+			}
+			/* checking indexes array */
+			ierr = 0;
+			for (i = 0; i < rss->num; ++i) {
+				int j;
+				if (rss->queue[i] >= priv->rxqs_n)
+					ierr = 1;
+				/*
+				 * Prevent the user from specifying
+				 * the same queue twice in the RSS array.
+				 */
+				for (j = i + 1; j < rss->num && !ierr; ++j)
+					if (rss->queue[j] == rss->queue[i])
+						ierr = 1;
+				if (ierr) {
+					rte_flow_error_set(
+						error,
+						ENOTSUP,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"RSS action only supports "
+						"unique queue indices "
+						"in a list");
+					return -rte_errno;
+				}
+			}
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -797,6 +868,82 @@ struct rte_flow_drop {
 }
 
 /**
+ * Get RSS parent rxq structure for given queues.
+ *
+ * Creates a new or returns an existed one.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   queues indices array, NULL in default RSS case.
+ * @param children_n
+ *   the size of queues array.
+ *
+ * @return
+ *   Pointer to a parent rxq structure, NULL on failure.
+ */
+static struct rxq *
+priv_parent_get(struct priv *priv,
+		uint16_t queues[],
+		uint16_t children_n,
+		struct rte_flow_error *error)
+{
+	unsigned int i;
+	struct rxq *parent;
+
+	for (parent = LIST_FIRST(&priv->parents);
+	     parent;
+	     parent = LIST_NEXT(parent, next)) {
+		unsigned int same = 0;
+		unsigned int overlap = 0;
+
+		/*
+		 * Find out whether an appropriate parent queue already exists
+		 * and can be reused, otherwise make sure there are no overlaps.
+		 */
+		for (i = 0; i < children_n; ++i) {
+			unsigned int j;
+
+			for (j = 0; j < parent->rss.queues_n; ++j) {
+				if (parent->rss.queues[j] != queues[i])
+					continue;
+				++overlap;
+				if (i == j)
+					++same;
+			}
+		}
+		if (same == children_n &&
+			children_n == parent->rss.queues_n)
+			return parent;
+		else if (overlap)
+			goto error;
+	}
+	/* Exclude the cases when some QPs were created without RSS */
+	for (i = 0; i < children_n; ++i) {
+		struct rxq *rxq = (*priv->rxqs)[queues[i]];
+		if (rxq->qp)
+			goto error;
+	}
+	parent = priv_parent_create(priv, queues, children_n);
+	if (!parent) {
+		rte_flow_error_set(error,
+				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL, "flow rule creation failure");
+		return NULL;
+	}
+	return parent;
+
+error:
+	rte_flow_error_set(error,
+			   EEXIST,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL,
+			   "sharing a queue between several"
+			   " RSS groups is not supported");
+	return NULL;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -819,6 +966,7 @@ struct rte_flow_drop {
 {
 	struct ibv_qp *qp;
 	struct rte_flow *rte_flow;
+	struct rxq *rxq_parent = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -832,23 +980,38 @@ struct rte_flow_drop {
 		qp = priv->flow_drop_queue->qp;
 	} else {
 		int ret;
-		struct rxq *rxq = (*priv->rxqs)[action->queue_id];
-
-		if (!rxq->qp) {
-			assert(priv->isolated);
-			ret = rxq_create_qp(rxq, rxq->elts_n,
-					    0, 0, NULL);
-			if (ret) {
-				rte_flow_error_set(
-					error,
-					ENOMEM,
-					RTE_FLOW_ERROR_TYPE_HANDLE,
-					NULL,
-					"flow rule creation failure");
+		unsigned int i;
+		struct rxq *rxq = NULL;
+
+		if (action->queues_n > 1) {
+			rxq_parent = priv_parent_get(priv, action->queues,
+						     action->queues_n, error);
+			if (!rxq_parent)
 				goto error;
+		}
+		for (i = 0; i < action->queues_n; ++i) {
+			rxq = (*priv->rxqs)[action->queues[i]];
+			/*
+			 * In case of isolated mode we postpone
+			 * ibv receive queue creation till the first
+			 * rte_flow rule will be applied on that queue.
+			 */
+			if (!rxq->qp) {
+				assert(priv->isolated);
+				ret = rxq_create_qp(rxq, rxq->elts_n,
+						    0, 0, rxq_parent);
+				if (ret) {
+					rte_flow_error_set(
+						error,
+						ENOMEM,
+						RTE_FLOW_ERROR_TYPE_HANDLE,
+						NULL,
+						"flow rule creation failure");
+					goto error;
+				}
 			}
 		}
-		qp = rxq->qp;
+		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
 		rte_flow->qp = qp;
 	}
 	rte_flow->ibv_attr = ibv_attr;
@@ -861,6 +1024,8 @@ struct rte_flow_drop {
 	return rte_flow;
 
 error:
+	if (rxq_parent)
+		rxq_parent_cleanup(rxq_parent);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -924,11 +1089,22 @@ struct rte_flow_drop {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			action.queue = 1;
-			action.queue_id =
+			action.queues_n = 1;
+			action.queues[0] =
 				((const struct rte_flow_action_queue *)
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			unsigned int i;
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				 actions->conf;
+
+			action.queue = 1;
+			action.queues_n = rss->num;
+			for (i = 0; i < rss->num; ++i)
+				action.queues[i] = rss->queue[i];
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 4d007da..beabcf2 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -98,7 +98,8 @@ struct mlx4_flow {
 struct mlx4_flow_action {
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t queue:1; /**< Target is a receive queue. */
-	uint32_t queue_id; /**< Identifier of the queue. */
+	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
+	uint16_t queues_n; /**< Number of entries in queue[] */
 };
 
 int mlx4_priv_flow_start(struct priv *priv);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 1/4] app/testpmd: add isolated mode parameter
  2017-07-05  8:14 ` [PATCH v8 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
@ 2017-07-05 14:49   ` Adrien Mazarguil
  2017-07-05 15:18   ` Ferruh Yigit
  2017-07-05 15:46   ` Ferruh Yigit
  2 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-05 14:49 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Wed, Jul 05, 2017 at 11:14:08AM +0300, Vasily Philipov wrote:
> Providing this parameter requests flow API isolated mode on all ports at
> initialization time. It ensures all traffic is received through the
> configured flow rules only (see flow command).
> 
> Ports that do not support this mode are automatically discarded.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 2/4] net/mlx4: implement isolated mode from flow API
  2017-07-05  8:14 ` [PATCH v8 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
@ 2017-07-05 14:49   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-05 14:49 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Wed, Jul 05, 2017 at 11:14:09AM +0300, Vasily Philipov wrote:
> The user must request isolated mode before device configuration.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 3/4] net/mlx4: refactor RSS parent queue allocation
  2017-07-05  8:14 ` [PATCH v8 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
@ 2017-07-05 14:49   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-05 14:49 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Wed, Jul 05, 2017 at 11:14:10AM +0300, Vasily Philipov wrote:
> A special "parent" queue must be allocated in addition to a group of
> standard Rx queues for RSS to work. This is done automatically outside of
> isolated mode by the PMD when applications request several Rx queues.
> 
> Since each configured flow rule with the RSS action may target a different
> set of queues, the PMD must have the ability to dynamically allocate
> several parent queues, one per RSS group.
> 
> If isolated mode was requested the default RSS parent queue isn't created
> in this case.
> 
> Refactor RSS parent queue allocations (currently limited to a single
> parent) in preparation for flow API RSS action support.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 4/4] net/mlx4: support flow API RSS action
  2017-07-05  8:14 ` [PATCH v8 4/4] net/mlx4: support flow API RSS action Vasily Philipov
@ 2017-07-05 14:49   ` Adrien Mazarguil
  0 siblings, 0 replies; 51+ messages in thread
From: Adrien Mazarguil @ 2017-07-05 14:49 UTC (permalink / raw)
  To: Vasily Philipov; +Cc: dev, Nelio Laranjeiro

On Wed, Jul 05, 2017 at 11:14:11AM +0300, Vasily Philipov wrote:
> This commit adds support for the flow API RSS action with the following
> limitations:
> 
>  - Only supported when isolated mode is enabled.
>  - The number of queues specified by the action (rte_flow_action_rss.num)
>    must be a power of two.
>  - Each queue index can be specified at most once in the configuration
>    (rte_flow_action_rss.queue[]).
>  - Because a queue can be associated with a single RSS context, it cannot
>    be targeted by multiple RSS actions simultaneously.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>

Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 1/4] app/testpmd: add isolated mode parameter
  2017-07-05  8:14 ` [PATCH v8 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
  2017-07-05 14:49   ` Adrien Mazarguil
@ 2017-07-05 15:18   ` Ferruh Yigit
  2017-07-05 15:31     ` Ferruh Yigit
  2017-07-05 15:46   ` Ferruh Yigit
  2 siblings, 1 reply; 51+ messages in thread
From: Ferruh Yigit @ 2017-07-05 15:18 UTC (permalink / raw)
  To: Vasily Philipov, dev; +Cc: Adrien Mazarguil, Nelio Laranjeiro

On 7/5/2017 9:14 AM, Vasily Philipov wrote:
> Providing this parameter requests flow API isolated mode on all ports at
> initialization time. It ensures all traffic is received through the
> configured flow rules only (see flow command).
> 
> Ports that do not support this mode are automatically discarded.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> ---
>  app/test-pmd/parameters.c |  3 +++
>  app/test-pmd/testpmd.c    | 14 ++++++++++++++
>  app/test-pmd/testpmd.h    |  1 +
>  3 files changed, 18 insertions(+)
> 
> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
> index fbe6284..e313871 100644
> --- a/app/test-pmd/parameters.c
> +++ b/app/test-pmd/parameters.c
> @@ -623,6 +623,7 @@
>  		{ "tx-queue-stats-mapping",	1, 0, 0 },
>  		{ "rx-queue-stats-mapping",	1, 0, 0 },
>  		{ "no-flush-rx",	0, 0, 0 },
> +		{ "isolated-mode",	        0, 0, 0 },

Can you please document the new option [1] ?

And when you don't know nothing about isolated-mode, this option is not
specific enough. What do you think adding a "flow" or similar keyword to
the option?

[1]
doc/guides/testpmd_app_ug/run_app.rst

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 1/4] app/testpmd: add isolated mode parameter
  2017-07-05 15:18   ` Ferruh Yigit
@ 2017-07-05 15:31     ` Ferruh Yigit
  2017-07-06  6:03       ` Vasily Philipov
  0 siblings, 1 reply; 51+ messages in thread
From: Ferruh Yigit @ 2017-07-05 15:31 UTC (permalink / raw)
  To: Vasily Philipov, dev; +Cc: Adrien Mazarguil, Nelio Laranjeiro

On 7/5/2017 4:18 PM, Ferruh Yigit wrote:
> On 7/5/2017 9:14 AM, Vasily Philipov wrote:
>> Providing this parameter requests flow API isolated mode on all ports at
>> initialization time. It ensures all traffic is received through the
>> configured flow rules only (see flow command).
>>
>> Ports that do not support this mode are automatically discarded.
>>
>> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
>> ---
>>  app/test-pmd/parameters.c |  3 +++
>>  app/test-pmd/testpmd.c    | 14 ++++++++++++++
>>  app/test-pmd/testpmd.h    |  1 +
>>  3 files changed, 18 insertions(+)
>>
>> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
>> index fbe6284..e313871 100644
>> --- a/app/test-pmd/parameters.c
>> +++ b/app/test-pmd/parameters.c
>> @@ -623,6 +623,7 @@
>>  		{ "tx-queue-stats-mapping",	1, 0, 0 },
>>  		{ "rx-queue-stats-mapping",	1, 0, 0 },
>>  		{ "no-flush-rx",	0, 0, 0 },
>> +		{ "isolated-mode",	        0, 0, 0 },
> 
> Can you please document the new option [1] ?
> 
> And when you don't know nothing about isolated-mode, this option is not
> specific enough. What do you think adding a "flow" or similar keyword to
> the option?
> 
> [1]
> doc/guides/testpmd_app_ug/run_app.rst

Overall this testpmd patch seems can be seperated from patchset.

I will check other driver patches in the patchset, can you please send
new version of this patch as a standalone patch?

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 1/4] app/testpmd: add isolated mode parameter
  2017-07-05  8:14 ` [PATCH v8 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
  2017-07-05 14:49   ` Adrien Mazarguil
  2017-07-05 15:18   ` Ferruh Yigit
@ 2017-07-05 15:46   ` Ferruh Yigit
  2 siblings, 0 replies; 51+ messages in thread
From: Ferruh Yigit @ 2017-07-05 15:46 UTC (permalink / raw)
  To: Vasily Philipov, dev; +Cc: Adrien Mazarguil, Nelio Laranjeiro

On 7/5/2017 9:14 AM, Vasily Philipov wrote:
> Providing this parameter requests flow API isolated mode on all ports at
> initialization time. It ensures all traffic is received through the
> configured flow rules only (see flow command).
> 
> Ports that do not support this mode are automatically discarded.
> 
> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>


Patch 2-4/4 applied to dpdk-next-net/master, thanks.

(Patch 1/4, testpmd patch excluded!)

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v8 1/4] app/testpmd: add isolated mode parameter
  2017-07-05 15:31     ` Ferruh Yigit
@ 2017-07-06  6:03       ` Vasily Philipov
  0 siblings, 0 replies; 51+ messages in thread
From: Vasily Philipov @ 2017-07-06  6:03 UTC (permalink / raw)
  To: Ferruh Yigit, dev; +Cc: Adrien Mazarguil, Nélio Laranjeiro

Yes sure, I will send it

> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Wednesday, July 05, 2017 18:32
> To: Vasily Philipov <vasilyf@mellanox.com>; dev@dpdk.org
> Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>; Nélio Laranjeiro
> <nelio.laranjeiro@6wind.com>
> Subject: Re: [dpdk-dev] [PATCH v8 1/4] app/testpmd: add isolated mode
> parameter
> 
> On 7/5/2017 4:18 PM, Ferruh Yigit wrote:
> > On 7/5/2017 9:14 AM, Vasily Philipov wrote:
> >> Providing this parameter requests flow API isolated mode on all ports
> >> at initialization time. It ensures all traffic is received through
> >> the configured flow rules only (see flow command).
> >>
> >> Ports that do not support this mode are automatically discarded.
> >>
> >> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
> >> ---
> >>  app/test-pmd/parameters.c |  3 +++
> >>  app/test-pmd/testpmd.c    | 14 ++++++++++++++
> >>  app/test-pmd/testpmd.h    |  1 +
> >>  3 files changed, 18 insertions(+)
> >>
> >> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
> >> index fbe6284..e313871 100644
> >> --- a/app/test-pmd/parameters.c
> >> +++ b/app/test-pmd/parameters.c
> >> @@ -623,6 +623,7 @@
> >>  		{ "tx-queue-stats-mapping",	1, 0, 0 },
> >>  		{ "rx-queue-stats-mapping",	1, 0, 0 },
> >>  		{ "no-flush-rx",	0, 0, 0 },
> >> +		{ "isolated-mode",	        0, 0, 0 },
> >
> > Can you please document the new option [1] ?
> >
> > And when you don't know nothing about isolated-mode, this option is
> > not specific enough. What do you think adding a "flow" or similar
> > keyword to the option?
> >
> > [1]
> > doc/guides/testpmd_app_ug/run_app.rst
> 
> Overall this testpmd patch seems can be seperated from patchset.
> 
> I will check other driver patches in the patchset, can you please send new
> version of this patch as a standalone patch?
> 
> Thanks,
> ferruh

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2017-07-06  6:03 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-25 13:02 [PATCH 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-05-25 13:02 ` [PATCH 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
2017-05-25 13:02 ` [PATCH 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
2017-05-25 14:05 ` [PATCH v2 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-05-25 14:05 ` [PATCH v2 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
2017-05-25 14:05 ` [PATCH v2 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
2017-05-25 14:10 ` [PATCH v3 1/3] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-05-25 14:10 ` [PATCH v3 2/3] net/mlx4: support for the RSS flow action Vasily Philipov
2017-05-25 14:10 ` [PATCH v3 3/3] app/testpmd: add isolated mode parameter Vasily Philipov
2017-06-04 13:34 ` [PATCH v4 1/4] net/mlx4: RSS parent queues new method maintenance Vasily Philipov
2017-06-04 13:35 ` [PATCH v4 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-06-04 13:35 ` [PATCH v4 3/4] net/mlx4: support for the RSS flow action Vasily Philipov
2017-06-04 13:35 ` [PATCH v4 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
2017-06-20  1:26   ` Wu, Jingjing
2017-06-21  9:43     ` Vasily Philipov
2017-06-22  1:13       ` Wu, Jingjing
2017-06-26  5:53         ` Vasily Philipov
2017-06-27  8:28         ` Thomas Monjalon
2017-06-29  5:52           ` Wu, Jingjing
2017-06-28 14:03 ` [PATCH v5 1/4] net/mlx4: RSS parent queues new method maintenance Vasily Philipov
2017-06-29 16:51   ` Adrien Mazarguil
2017-06-28 14:03 ` [PATCH v5 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-06-29 16:52   ` Adrien Mazarguil
2017-06-28 14:03 ` [PATCH v5 3/4] net/mlx4: support for the RSS flow action Vasily Philipov
2017-06-29 16:53   ` Adrien Mazarguil
2017-06-28 14:03 ` [PATCH v5 4/4] app/testpmd: add isolated mode parameter Vasily Philipov
2017-06-29 16:53   ` Adrien Mazarguil
2017-07-02 12:32 ` [PATCH v6 1/4] " Vasily Philipov
2017-07-02 12:32 ` [PATCH v6 2/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
2017-07-02 12:32 ` [PATCH v6 3/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-07-02 12:32 ` [PATCH v6 4/4] net/mlx4: support flow API RSS action Vasily Philipov
2017-07-04 11:14 ` [PATCH v7 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
2017-07-04 15:20   ` Adrien Mazarguil
2017-07-04 11:14 ` [PATCH v7 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-07-04 15:20   ` Adrien Mazarguil
2017-07-04 11:14 ` [PATCH v7 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
2017-07-04 15:20   ` Adrien Mazarguil
2017-07-04 11:14 ` [PATCH v7 4/4] net/mlx4: support flow API RSS action Vasily Philipov
2017-07-04 15:21   ` Adrien Mazarguil
2017-07-05  8:14 ` [PATCH v8 1/4] app/testpmd: add isolated mode parameter Vasily Philipov
2017-07-05 14:49   ` Adrien Mazarguil
2017-07-05 15:18   ` Ferruh Yigit
2017-07-05 15:31     ` Ferruh Yigit
2017-07-06  6:03       ` Vasily Philipov
2017-07-05 15:46   ` Ferruh Yigit
2017-07-05  8:14 ` [PATCH v8 2/4] net/mlx4: implement isolated mode from flow API Vasily Philipov
2017-07-05 14:49   ` Adrien Mazarguil
2017-07-05  8:14 ` [PATCH v8 3/4] net/mlx4: refactor RSS parent queue allocation Vasily Philipov
2017-07-05 14:49   ` Adrien Mazarguil
2017-07-05  8:14 ` [PATCH v8 4/4] net/mlx4: support flow API RSS action Vasily Philipov
2017-07-05 14:49   ` Adrien Mazarguil

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.