All of lore.kernel.org
 help / color / mirror / Atom feed
From: Declan Doherty <declan.doherty@intel.com>
To: dev@dpdk.org
Cc: Declan Doherty <declan.doherty@intel.com>,
	Keith Wiles <keith.wiles@intel.com>
Subject: [PATCH] net/bond: burst mode hash calculation
Date: Fri,  1 Dec 2017 00:04:05 +0000	[thread overview]
Message-ID: <20171201000405.27870-1-declan.doherty@intel.com> (raw)

change the xmit_hash functions to handle bursts of packet instead of
packet at a time. Updating effect tx_burst functions.

Signed-off-by: Declan Doherty <declan.doherty@intel.com>
Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 drivers/net/bonding/rte_eth_bond_api.c     |   3 -
 drivers/net/bonding/rte_eth_bond_pmd.c     | 582 ++++++++++++++++++-----------
 drivers/net/bonding/rte_eth_bond_private.h |  22 +-
 3 files changed, 376 insertions(+), 231 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c
index 980e636..60f5c9c 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -667,15 +667,12 @@ rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy)
 	switch (policy) {
 	case BALANCE_XMIT_POLICY_LAYER2:
 		internals->balance_xmit_policy = policy;
-		internals->xmit_hash = xmit_l2_hash;
 		break;
 	case BALANCE_XMIT_POLICY_LAYER23:
 		internals->balance_xmit_policy = policy;
-		internals->xmit_hash = xmit_l23_hash;
 		break;
 	case BALANCE_XMIT_POLICY_LAYER34:
 		internals->balance_xmit_policy = policy;
-		internals->xmit_hash = xmit_l34_hash;
 		break;
 
 	default:
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index fe23289..67dff1e 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -309,87 +309,114 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 
 static uint16_t
 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
-		uint16_t nb_pkts)
+		uint16_t nb_bufs)
 {
-	struct bond_dev_private *internals;
-	struct bond_tx_queue *bd_tx_q;
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-	uint16_t num_of_slaves;
-	uint16_t slaves[RTE_MAX_ETHPORTS];
-	 /* positions in slaves, not ID */
-	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
-	uint8_t distributing_count;
+	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t slave_count;
 
-	uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
-	uint16_t i, op_slave_idx;
+	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t dist_slave_count;
 
-	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
+	/* 2-D array to sort mbufs for transmission on each slave into */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+	/* Number of mbufs for transmission on each slave */
+	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+	/* Mapping array generated by hash function to map mbufs to slaves */
+	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-	/* Total amount of packets in slave_bufs */
-	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-	/* Slow packets placed in each slave */
+	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	if (unlikely(nb_pkts == 0))
-		return 0;
+	uint16_t i, j;
 
-	bd_tx_q = (struct bond_tx_queue *)queue;
-	internals = bd_tx_q->dev_private;
+	if (unlikely(nb_bufs == 0))
+		return 0;
 
 	/* Copy slave list to protect against slave up/down changes during tx
 	 * bursting */
-	num_of_slaves = internals->active_slave_count;
-	if (num_of_slaves < 1)
-		return num_tx_total;
+	slave_count = internals->active_slave_count;
+	if (unlikely(slave_count < 1))
+		return 0;
 
-	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
-			num_of_slaves);
+	memcpy(slave_port_ids, internals->active_slaves,
+			sizeof(slave_port_ids[0]) * slave_count);
+
+
+	dist_slave_count = 0;
+	for (i = 0; i < slave_count; i++) {
+		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
 
-	distributing_count = 0;
-	for (i = 0; i < num_of_slaves; i++) {
-		struct port *port = &mode_8023ad_ports[slaves[i]];
 		if (ACTOR_STATE(port, DISTRIBUTING))
-			distributing_offsets[distributing_count++] = i;
+			dist_slave_port_ids[dist_slave_count++] =
+					slave_port_ids[i];
 	}
 
-	if (likely(distributing_count > 0)) {
-		/* Populate slaves mbuf with the packets which are to be sent */
-		for (i = 0; i < nb_pkts; i++) {
-			/* Select output slave using hash based on xmit policy */
-			op_slave_idx = internals->xmit_hash(bufs[i],
-					distributing_count);
+	if (unlikely(dist_slave_count < 1))
+		return 0;
 
-			/* Populate slave mbuf arrays with mbufs for that slave.
-			 * Use only slaves that are currently distributing.
-			 */
-			uint8_t slave_offset =
-					distributing_offsets[op_slave_idx];
-			slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
-					bufs[i];
-			slave_nb_pkts[slave_offset]++;
-		}
+	/*
+	 * Populate slaves mbuf with the packets which are to be sent on it
+	 * selecting output slave using hash based on xmit policy
+	 */
+	internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
+			bufs_slave_port_idxs);
+
+	for (i = 0; i < nb_bufs; i++) {
+		/* Populate slave mbuf arrays with mbufs for that slave. */
+		uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
 	}
 
+
 	/* Send packet burst on each slave device */
-	for (i = 0; i < num_of_slaves; i++) {
-		if (slave_nb_pkts[i] == 0)
+	for (i = 0; i < dist_slave_count; i++) {
+		if (slave_nb_bufs[i] == 0)
 			continue;
 
-		num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-				slave_bufs[i], slave_nb_pkts[i]);
+		slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
+				bd_tx_q->queue_id, slave_bufs[i],
+				slave_nb_bufs[i]);
 
-		num_tx_total += num_tx_slave;
-		num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+		total_tx_count += slave_tx_count;
 
 		/* If tx burst fails move packets to end of bufs */
-		if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-			uint16_t j = nb_pkts - num_tx_fail_total;
-			for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
-					num_tx_slave++)
-				bufs[j] = slave_bufs[i][num_tx_slave];
+		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+			slave_tx_fail_count[i] = slave_nb_bufs[i] -
+					slave_tx_count;
+			total_tx_fail_count += slave_tx_fail_count[i];
+
+			/*
+			 * Shift bufs to beginning of array to allow reordering
+			 * later
+			 */
+			for (j = 0; j < slave_tx_fail_count[i]; j++) {
+				slave_bufs[i][j] =
+					slave_bufs[i][(slave_tx_count - 1) + j];
+			}
 		}
 	}
 
-	return num_tx_total;
+	/*
+	 * If there are tx burst failures we move packets to end of bufs to
+	 * preserve expected PMD behaviour of all failed transmitted being
+	 * at the end of the input mbuf array
+	 */
+	if (unlikely(total_tx_fail_count > 0)) {
+		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+		for (i = 0; i < slave_count; i++) {
+			if (slave_tx_fail_count[i] > 0) {
+				for (j = 0; j < slave_tx_fail_count[i]; j++)
+					bufs[bufs_idx++] = slave_bufs[i][j];
+			}
+		}
+	}
+
+	return total_tx_count;
 }
 
 
@@ -788,96 +815,129 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr)
 			(word_src_addr[3] ^ word_dst_addr[3]);
 }
 
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+
+void
+burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves)
 {
-	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
+	struct ether_hdr *eth_hdr;
+	uint32_t hash;
+	int i;
+
+	for (i = 0; i < nb_pkts; i++) {
+		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
 
-	uint32_t hash = ether_hash(eth_hdr);
+		hash = ether_hash(eth_hdr);
 
-	return (hash ^= hash >> 8) % slave_count;
+		slaves[i++] = (hash ^= hash >> 8) % slave_count;
+	}
 }
 
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
+void
+burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves)
 {
-	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
-	uint16_t proto = eth_hdr->ether_type;
-	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
-	uint32_t hash, l3hash = 0;
+	uint16_t i;
+	struct ether_hdr *eth_hdr;
+	uint16_t proto;
+	size_t vlan_offset;
+	uint32_t hash, l3hash;
 
-	hash = ether_hash(eth_hdr);
+	for (i = 0; i < nb_pkts; i++) {
+		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
+		l3hash = 0;
 
-	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
-		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		l3hash = ipv4_hash(ipv4_hdr);
+		proto = eth_hdr->ether_type;
+		hash = ether_hash(eth_hdr);
 
-	} else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
-		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		l3hash = ipv6_hash(ipv6_hdr);
-	}
+		vlan_offset = get_vlan_offset(eth_hdr, &proto);
 
-	hash = hash ^ l3hash;
-	hash ^= hash >> 16;
-	hash ^= hash >> 8;
+		if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+			struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			l3hash = ipv4_hash(ipv4_hdr);
 
-	return hash % slave_count;
-}
+		} else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+			struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			l3hash = ipv6_hash(ipv6_hdr);
+		}
 
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
-{
-	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
-	uint16_t proto = eth_hdr->ether_type;
-	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
+		hash = hash ^ l3hash;
+		hash ^= hash >> 16;
+		hash ^= hash >> 8;
 
-	struct udp_hdr *udp_hdr = NULL;
-	struct tcp_hdr *tcp_hdr = NULL;
-	uint32_t hash, l3hash = 0, l4hash = 0;
+		slaves[i++] = hash % slave_count;
+	}
+}
 
-	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
-		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		size_t ip_hdr_offset;
+void
+burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves)
+{
+	struct ether_hdr *eth_hdr;
+	uint16_t proto;
+	size_t vlan_offset;
+	int i;
 
-		l3hash = ipv4_hash(ipv4_hdr);
+	struct udp_hdr *udp_hdr;
+	struct tcp_hdr *tcp_hdr;
+	uint32_t hash, l3hash, l4hash;
 
-		/* there is no L4 header in fragmented packet */
-		if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
-			ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+	for (i = 0; i < nb_pkts; i++) {
+		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
+		proto = eth_hdr->ether_type;
+		vlan_offset = get_vlan_offset(eth_hdr, &proto);
+		l3hash = 0;
+		l4hash = 0;
+
+		if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
+			struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			size_t ip_hdr_offset;
+
+			l3hash = ipv4_hash(ipv4_hdr);
+
+			/* there is no L4 header in fragmented packet */
+			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
+								== 0)) {
+				ip_hdr_offset = (ipv4_hdr->version_ihl
+					& IPV4_HDR_IHL_MASK) *
 					IPV4_IHL_MULTIPLIER;
 
-			if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
-				tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
-						ip_hdr_offset);
+				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
+					tcp_hdr = (struct tcp_hdr *)
+						((char *)ipv4_hdr +
+							ip_hdr_offset);
+					l4hash = HASH_L4_PORTS(tcp_hdr);
+				} else if (ipv4_hdr->next_proto_id ==
+								IPPROTO_UDP) {
+					udp_hdr = (struct udp_hdr *)
+						((char *)ipv4_hdr +
+							ip_hdr_offset);
+					l4hash = HASH_L4_PORTS(udp_hdr);
+				}
+			}
+		} else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
+			struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
+					((char *)(eth_hdr + 1) + vlan_offset);
+			l3hash = ipv6_hash(ipv6_hdr);
+
+			if (ipv6_hdr->proto == IPPROTO_TCP) {
+				tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
 				l4hash = HASH_L4_PORTS(tcp_hdr);
-			} else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
-				udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
-						ip_hdr_offset);
+			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
+				udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
 				l4hash = HASH_L4_PORTS(udp_hdr);
 			}
 		}
-	} else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
-		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
-				((char *)(eth_hdr + 1) + vlan_offset);
-		l3hash = ipv6_hash(ipv6_hdr);
 
-		if (ipv6_hdr->proto == IPPROTO_TCP) {
-			tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
-			l4hash = HASH_L4_PORTS(tcp_hdr);
-		} else if (ipv6_hdr->proto == IPPROTO_UDP) {
-			udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
-			l4hash = HASH_L4_PORTS(udp_hdr);
-		}
-	}
-
-	hash = l3hash ^ l4hash;
-	hash ^= hash >> 16;
-	hash ^= hash >> 8;
+		hash = l3hash ^ l4hash;
+		hash ^= hash >> 16;
+		hash ^= hash >> 8;
 
-	return hash % slave_count;
+		slaves[i++] = hash % slave_count;
+	}
 }
 
 struct bwg_slave {
@@ -1185,156 +1245,240 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 static uint16_t
 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
-		uint16_t nb_pkts)
+		uint16_t nb_bufs)
 {
-	struct bond_dev_private *internals;
-	struct bond_tx_queue *bd_tx_q;
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-	uint16_t num_of_slaves;
-	uint16_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t slave_count;
 
-	uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
+	/* 2-D array to sort mbufs for transmission on each slave into */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+	/* Number of mbufs for transmission on each slave */
+	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+	/* Mapping array generated by hash function to map mbufs to slaves */
+	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-	int i, op_slave_id;
+	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
-	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t i, j;
 
-	bd_tx_q = (struct bond_tx_queue *)queue;
-	internals = bd_tx_q->dev_private;
+	if (unlikely(nb_bufs == 0))
+		return 0;
 
 	/* Copy slave list to protect against slave up/down changes during tx
 	 * bursting */
-	num_of_slaves = internals->active_slave_count;
-	memcpy(slaves, internals->active_slaves,
-			sizeof(internals->active_slaves[0]) * num_of_slaves);
+	slave_count = internals->active_slave_count;
+	if (unlikely(slave_count < 1))
+		return 0;
 
-	if (num_of_slaves < 1)
-		return num_tx_total;
+	memcpy(slave_port_ids, internals->active_slaves,
+			sizeof(slave_port_ids[0]) * slave_count);
 
-	/* Populate slaves mbuf with the packets which are to be sent on it  */
-	for (i = 0; i < nb_pkts; i++) {
-		/* Select output slave using hash based on xmit policy */
-		op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
+	/*
+	 * Populate slaves mbuf with the packets which are to be sent on it
+	 * selecting output slave using hash based on xmit policy
+	 */
+	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
+			bufs_slave_port_idxs);
 
-		/* Populate slave mbuf arrays with mbufs for that slave */
-		slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
+	for (i = 0; i < nb_bufs; i++) {
+		/* Populate slave mbuf arrays with mbufs for that slave. */
+		uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
 	}
 
 	/* Send packet burst on each slave device */
-	for (i = 0; i < num_of_slaves; i++) {
-		if (slave_nb_pkts[i] > 0) {
-			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-					slave_bufs[i], slave_nb_pkts[i]);
+	for (i = 0; i < slave_count; i++) {
+		if (slave_nb_bufs[i] == 0)
+			continue;
 
-			/* if tx burst fails move packets to end of bufs */
-			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-				int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
+				bd_tx_q->queue_id, slave_bufs[i],
+				slave_nb_bufs[i]);
 
-				tx_fail_total += slave_tx_fail_count;
-				memcpy(&bufs[nb_pkts - tx_fail_total],
-						&slave_bufs[i][num_tx_slave],
-						slave_tx_fail_count * sizeof(bufs[0]));
+		total_tx_count += slave_tx_count;
+
+		/* If tx burst fails move packets to end of bufs */
+		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+			slave_tx_fail_count[i] = slave_nb_bufs[i] -
+					slave_tx_count;
+			total_tx_fail_count += slave_tx_fail_count[i];
+
+			/*
+			 * Shift bufs to beginning of array to allow reordering
+			 * later
+			 */
+			for (j = 0; j < slave_tx_fail_count[i]; j++) {
+				slave_bufs[i][j] =
+					slave_bufs[i][(slave_tx_count - 1) + j];
 			}
+		}
+	}
 
-			num_tx_total += num_tx_slave;
+	/*
+	 * If there are tx burst failures we move packets to end of bufs to
+	 * preserve expected PMD behaviour of all failed transmitted being
+	 * at the end of the input mbuf array
+	 */
+	if (unlikely(total_tx_fail_count > 0)) {
+		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+		for (i = 0; i < slave_count; i++) {
+			if (slave_tx_fail_count[i] > 0) {
+				for (j = 0; j < slave_tx_fail_count[i]; j++)
+					bufs[bufs_idx++] = slave_bufs[i][j];
+			}
 		}
 	}
 
-	return num_tx_total;
+	return total_tx_count;
 }
 
 static uint16_t
 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
-		uint16_t nb_pkts)
+		uint16_t nb_bufs)
 {
-	struct bond_dev_private *internals;
-	struct bond_tx_queue *bd_tx_q;
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
 
-	uint16_t num_of_slaves;
-	uint16_t slaves[RTE_MAX_ETHPORTS];
-	 /* positions in slaves, not ID */
-	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
-	uint8_t distributing_count;
+	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t slave_count;
 
-	uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
-	uint16_t i, j, op_slave_idx;
-	const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
+	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
+	uint16_t dist_slave_count;
 
-	/* Allocate additional packets in case 8023AD mode. */
-	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
-	void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
+	/* 2-D array to sort mbufs for transmission on each slave into */
+	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
+	/* Number of mbufs for transmission on each slave */
+	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
+	/* Mapping array generated by hash function to map mbufs to slaves */
+	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-	/* Total amount of packets in slave_bufs */
-	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
-	/* Slow packets placed in each slave */
-	uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-	bd_tx_q = (struct bond_tx_queue *)queue;
-	internals = bd_tx_q->dev_private;
+	uint16_t i, j;
+
+	if (unlikely(nb_bufs == 0))
+		return 0;
 
 	/* Copy slave list to protect against slave up/down changes during tx
 	 * bursting */
-	num_of_slaves = internals->active_slave_count;
-	if (num_of_slaves < 1)
-		return num_tx_total;
-
-	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+	slave_count = internals->active_slave_count;
+	if (unlikely(slave_count < 1))
+		return 0;
 
-	distributing_count = 0;
-	for (i = 0; i < num_of_slaves; i++) {
-		struct port *port = &mode_8023ad_ports[slaves[i]];
+	memcpy(slave_port_ids, internals->active_slaves,
+			sizeof(slave_port_ids[0]) * slave_count);
 
-		slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
-				slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
-				NULL);
-		slave_nb_pkts[i] = slave_slow_nb_pkts[i];
 
-		for (j = 0; j < slave_slow_nb_pkts[i]; j++)
-			slave_bufs[i][j] = slow_pkts[j];
+	dist_slave_count = 0;
+	for (i = 0; i < slave_count; i++) {
+		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
 
 		if (ACTOR_STATE(port, DISTRIBUTING))
-			distributing_offsets[distributing_count++] = i;
+			dist_slave_port_ids[dist_slave_count++] =
+					slave_port_ids[i];
 	}
 
-	if (likely(distributing_count > 0)) {
-		/* Populate slaves mbuf with the packets which are to be sent on it */
-		for (i = 0; i < nb_pkts; i++) {
-			/* Select output slave using hash based on xmit policy */
-			op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
+	if (likely(dist_slave_count > 1)) {
+
+		/*
+		 * Populate slaves mbuf with the packets which are to be sent
+		 * on it, selecting output slave using hash based on xmit policy
+		 */
+		internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
+				bufs_slave_port_idxs);
+
+		for (i = 0; i < nb_bufs; i++) {
+			/*
+			 * Populate slave mbuf arrays with mbufs for that
+			 * slave
+			 */
+			uint8_t slave_idx = bufs_slave_port_idxs[i];
+
+			slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
+					bufs[i];
+		}
+
+
+		/* Send packet burst on each slave device */
+		for (i = 0; i < dist_slave_count; i++) {
+			if (slave_nb_bufs[i] == 0)
+				continue;
 
-			/* Populate slave mbuf arrays with mbufs for that slave. Use only
-			 * slaves that are currently distributing. */
-			uint8_t slave_offset = distributing_offsets[op_slave_idx];
-			slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
-			slave_nb_pkts[slave_offset]++;
+			slave_tx_count = rte_eth_tx_burst(
+					dist_slave_port_ids[i],
+					bd_tx_q->queue_id, slave_bufs[i],
+					slave_nb_bufs[i]);
+
+			total_tx_count += slave_tx_count;
+
+			/* If tx burst fails move packets to end of bufs */
+			if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
+				slave_tx_fail_count[i] = slave_nb_bufs[i] -
+						slave_tx_count;
+				total_tx_fail_count += slave_tx_fail_count[i];
+
+				/*
+				 * Shift bufs to beginning of array to allow
+				 * reordering later
+				 */
+				for (j = 0; j < slave_tx_fail_count[i]; j++)
+					slave_bufs[i][j] =
+						slave_bufs[i]
+							[(slave_tx_count - 1)
+							+ j];
+			}
+		}
+
+		/*
+		 * If there are tx burst failures we move packets to end of
+		 * bufs to preserve expected PMD behaviour of all failed
+		 * transmitted being at the end of the input mbuf array
+		 */
+		if (unlikely(total_tx_fail_count > 0)) {
+			int bufs_idx = nb_bufs - total_tx_fail_count - 1;
+
+			for (i = 0; i < slave_count; i++) {
+				if (slave_tx_fail_count[i] > 0) {
+					for (j = 0;
+						j < slave_tx_fail_count[i];
+						j++) {
+						bufs[bufs_idx++] =
+							slave_bufs[i][j];
+					}
+				}
+			}
 		}
 	}
 
-	/* Send packet burst on each slave device */
-	for (i = 0; i < num_of_slaves; i++) {
-		if (slave_nb_pkts[i] == 0)
-			continue;
+	/* Check for LACP control packets and send if available */
+	for (i = 0; i < slave_count; i++) {
+		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
+		struct rte_mbuf *ctrl_pkt = NULL;
 
-		num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
-				slave_bufs[i], slave_nb_pkts[i]);
+		if (likely(rte_ring_empty(port->tx_ring)))
+			continue;
 
-		/* If tx burst fails drop slow packets */
-		for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
-			rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
+		rte_ring_dequeue(port->tx_ring,	(void **)&ctrl_pkt);
 
-		num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
-		num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
+		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
+					bd_tx_q->queue_id, &ctrl_pkt, 1);
 
-		/* If tx burst fails move packets to end of bufs */
-		if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
-			uint16_t j = nb_pkts - num_tx_fail_total;
-			for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
-				bufs[j] = slave_bufs[i][num_tx_slave];
-		}
+		/*
+		 * re-enqueue LAG control plane packets to buffering
+		 * ring if transmission fails so the packet isn't lost.
+		 */
+		if (slave_tx_count != 1)
+			rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
 	}
 
-	return num_tx_total;
+	return total_tx_count;
 }
 
 static uint16_t
@@ -2769,7 +2913,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
 	internals->mode = BONDING_MODE_INVALID;
 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
-	internals->xmit_hash = xmit_l2_hash;
+	internals->burst_xmit_hash = burst_xmit_l2_hash;
 	internals->user_defined_mac = 0;
 
 	internals->link_status_polling_enabled = 0;
diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h
index 1392da9..3dc2e5a 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -109,8 +109,8 @@ struct bond_slave_details {
 	uint16_t reta_size;
 };
 
-
-typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count);
+typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
 
 /** Link Bonding PMD device private configuration Structure */
 struct bond_dev_private {
@@ -126,7 +126,7 @@ struct bond_dev_private {
 
 	uint8_t balance_xmit_policy;
 	/**< Transmit policy - l2 / l23 / l34 for operation in balance mode */
-	xmit_hash_t xmit_hash;
+	burst_xmit_hash_t burst_xmit_hash;
 	/**< Transmit policy hash function */
 
 	uint8_t user_defined_mac;
@@ -245,14 +245,18 @@ void
 slave_add(struct bond_dev_private *internals,
 		struct rte_eth_dev *slave_eth_dev);
 
-uint16_t
-xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+void
+burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
 
-uint16_t
-xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count);
+void
+burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
+
+void
+burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
+		uint8_t slave_count, uint16_t *slaves);
 
-uint16_t
-xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count);
 
 void
 bond_ethdev_primary_set(struct bond_dev_private *internals,
-- 
2.9.4

             reply	other threads:[~2017-12-01  0:09 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-01  0:04 Declan Doherty [this message]
2017-12-07  1:42 ` [PATCH] net/bond: burst mode hash calculation Ferruh Yigit
2018-01-09 11:34 ` [PATCH v2] " Declan Doherty
2018-01-10 20:39   ` Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171201000405.27870-1-declan.doherty@intel.com \
    --to=declan.doherty@intel.com \
    --cc=dev@dpdk.org \
    --cc=keith.wiles@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.