All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vivian Kong <vivkong@gmail.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [RFC 05/12] examples/l3fwd: add support for s390x architecture
Date: Tue,  9 Apr 2019 15:06:23 -0400	[thread overview]
Message-ID: <20190409190630.31975-6-vivkong@ca.ibm.com> (raw)
In-Reply-To: <20190409190630.31975-1-vivkong@ca.ibm.com>

Add s390x specific implementation.

Signed-off-by: Vivian Kong <vivkong@ca.ibm.com>
---
 examples/l3fwd/l3fwd_em.c        |   8 +
 examples/l3fwd/l3fwd_lpm_s390x.h | 137 ++++++++++++++++
 examples/l3fwd/l3fwd_s390x.h     | 259 +++++++++++++++++++++++++++++++
 3 files changed, 404 insertions(+)
 create mode 100644 examples/l3fwd/l3fwd_lpm_s390x.h
 create mode 100644 examples/l3fwd/l3fwd_s390x.h

diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index fa8f82be6..a486a9c57 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -240,6 +240,14 @@ em_mask_key(void *key, xmm_t mask)
 
 	return vec_and(data, mask);
 }
+#elif defined(RTE_MACHINE_CPUFLAG_ZARCH)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+	xmm_t data = (xmm_t) vec_xld2(0, (unsigned int *)(key));
+
+	return data + mask;
+}
 #else
 #error No vector engine (SSE, NEON, ALTIVEC) available, check your toolchain
 #endif
diff --git a/examples/l3fwd/l3fwd_lpm_s390x.h b/examples/l3fwd/l3fwd_lpm_s390x.h
new file mode 100644
index 000000000..858f696ba
--- /dev/null
+++ b/examples/l3fwd/l3fwd_lpm_s390x.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef __L3FWD_LPM_S390X_H__
+#define __L3FWD_LPM_S390X_H__
+
+#include "l3fwd_s390x.h"
+
+typedef unsigned char vector_unsigned_char
+	__attribute__((vector_size(16*sizeof(unsigned char))));
+
+/*
+ * Read packet_type and destination IPV4 addresses from 4 mbufs.
+ */
+static inline void
+processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
+		vector_unsigned_int *dip,
+		uint32_t *ipv4_flag)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ether_hdr *eth_hdr;
+	uint32_t x0, x1, x2, x3;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x0 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x1 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[1]->packet_type;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x2 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[2]->packet_type;
+
+	rte_compiler_barrier();
+	eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
+	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+	x3 = ipv4_hdr->dst_addr;
+	ipv4_flag[0] &= pkt[3]->packet_type;
+
+	rte_compiler_barrier();
+	dip[0] = (vector_unsigned_int){x0, x1, x2, x3};
+}
+
+/*
+ * Lookup into LPM for destination port.
+ * If lookup fails, use incoming port (portid) as destination port.
+ */
+static inline void
+processx4_step2(const struct lcore_conf *qconf,
+		vector_unsigned_int dip,
+		uint32_t ipv4_flag,
+		uint8_t portid,
+		struct rte_mbuf *pkt[FWDSTEP],
+		uint16_t dprt[FWDSTEP])
+{
+	rte_xmm_t dst;
+	const vector_unsigned_char bswap_mask = (vector_unsigned_char){
+							3, 2, 1, 0,
+							7, 6, 5, 4,
+							11, 10, 9, 8,
+							15, 14, 13, 12};
+
+	/* Byte swap 4 IPV4 addresses. */
+	dip = (vector_unsigned_int)vec_perm(*(vector_unsigned_char *)&dip,
+					(vector_unsigned_char){}, bswap_mask);
+
+	/* if all 4 packets are IPV4. */
+	if (likely(ipv4_flag)) {
+		rte_lpm_lookupx4(qconf->ipv4_lookup_struct, (xmm_t)dip,
+			(uint32_t *)&dst, portid);
+		/* get rid of unused upper 16 bit for each dport. */
+		dst.x = (xmm_t)vec_packs(dst.x, dst.x);
+		*(uint64_t *)dprt = dst.u64[0];
+	} else {
+		dst.x = (xmm_t)dip;
+		dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0],
+							dst.u32[0], portid);
+		dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1],
+							dst.u32[1], portid);
+		dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2],
+							dst.u32[2], portid);
+		dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3],
+							dst.u32[3], portid);
+	}
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+			uint8_t portid, struct lcore_conf *qconf)
+{
+	int32_t j;
+	uint16_t dst_port[MAX_PKT_BURST];
+	vector_unsigned_int dip[MAX_PKT_BURST / FWDSTEP];
+	uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
+	const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+
+	for (j = 0; j != k; j += FWDSTEP)
+		processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
+				&ipv4_flag[j / FWDSTEP]);
+
+	for (j = 0; j != k; j += FWDSTEP)
+		processx4_step2(qconf, dip[j / FWDSTEP],
+				ipv4_flag[j / FWDSTEP],
+				portid, &pkts_burst[j], &dst_port[j]);
+
+	/* Classify last up to 3 packets one by one */
+	switch (nb_rx % FWDSTEP) {
+	case 3:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	case 2:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	case 1:
+		dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+		j++;
+		/* fall-through */
+	}
+
+	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+}
+
+#endif /* __L3FWD_LPM_S390X_H__ */
diff --git a/examples/l3fwd/l3fwd_s390x.h b/examples/l3fwd/l3fwd_s390x.h
new file mode 100644
index 000000000..d027092a4
--- /dev/null
+++ b/examples/l3fwd/l3fwd_s390x.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation.
+ * (c) Copyright IBM Corp. 2017, 2019
+ */
+#ifndef _L3FWD_S390X_H_
+#define _L3FWD_S390X_H_
+
+#include "l3fwd.h"
+#include "l3fwd_common.h"
+
+#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
+typedef unsigned int vector_unsigned_int
+	__attribute__((vector_size(4*sizeof(unsigned int))));
+typedef unsigned short vector_unsigned_short
+	__attribute__((vector_size(8*sizeof(unsigned short))));
+
+/*
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
+{
+	vector_unsigned_int te[FWDSTEP];
+	vector_unsigned_int ve[FWDSTEP];
+	vector_unsigned_int *p[FWDSTEP];
+
+	p[0] = rte_pktmbuf_mtod(pkt[0], vector_unsigned_int *);
+	p[1] = rte_pktmbuf_mtod(pkt[1], vector_unsigned_int *);
+	p[2] = rte_pktmbuf_mtod(pkt[2], vector_unsigned_int *);
+	p[3] = rte_pktmbuf_mtod(pkt[3], vector_unsigned_int *);
+
+	ve[0] = (vector_unsigned_int)val_eth[dst_port[0]];
+	te[0] = *p[0];
+
+	ve[1] = (vector_unsigned_int)val_eth[dst_port[1]];
+	te[1] = *p[1];
+
+	ve[2] = (vector_unsigned_int)val_eth[dst_port[2]];
+	te[2] = *p[2];
+
+	ve[3] = (vector_unsigned_int)val_eth[dst_port[3]];
+	te[3] = *p[3];
+
+	/* Update first 12 bytes, keep rest bytes intact. */
+	te[0] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[0],
+			(vector_unsigned_short)te[0],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	te[1] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[1],
+			(vector_unsigned_short)te[1],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	te[2] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[2],
+			(vector_unsigned_short)te[2],
+			(vector_unsigned_short) {0, 0, 0, 0, 0,
+						0, 0xffff, 0xffff});
+
+	te[3] = (vector_unsigned_int)vec_sel(
+			(vector_unsigned_short)ve[3],
+			(vector_unsigned_short)te[3],
+			(vector_unsigned_short) {0, 0, 0, 0,
+						0, 0, 0xffff, 0xffff});
+
+	*p[0] = te[0];
+	*p[1] = te[1];
+	*p[2] = te[2];
+	*p[3] = te[3];
+
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
+		&dst_port[0], pkt[0]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
+		&dst_port[1], pkt[1]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
+		&dst_port[2], pkt[2]->packet_type);
+	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
+		&dst_port[3], pkt[3]->packet_type);
+}
+
+/*
+ * Group consecutive packets with the same destination port in bursts of 4.
+ * Suppose we have array of destination ports:
+ * dst_port[] = {a, b, c, d,, e, ... }
+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
+ * We doing 4 comparisons at once and the result is 4 bit mask.
+ * This mask is used as an index into prebuild array of pnum values.
+ */
+static inline uint16_t *
+port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, vector_unsigned_short dp1,
+	vector_unsigned_short dp2)
+{
+	union {
+		uint16_t u16[FWDSTEP + 1];
+		uint64_t u64;
+	} *pnum = (void *)pn;
+
+	int32_t v;
+
+	v = vec_any_eq(dp1, dp2);
+
+
+	/* update last port counter. */
+	lp[0] += gptbl[v].lpv;
+
+	/* if dest port value has changed. */
+	if (v != GRPMSK) {
+		pnum->u64 = gptbl[v].pnum;
+		pnum->u16[FWDSTEP] = 1;
+		lp = pnum->u16 + gptbl[v].idx;
+	}
+
+	return lp;
+}
+
+/**
+ * Process one packet:
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
+{
+	struct ether_hdr *eth_hdr;
+	vector_unsigned_int te, ve;
+
+	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+	te = *(vector_unsigned_int *)eth_hdr;
+	ve = (vector_unsigned_int)val_eth[dst_port[0]];
+
+	rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port,
+			pkt->packet_type);
+
+	/* dynamically vec_sel te and ve for MASK_ETH (0x3f) */
+	te = (vector_unsigned_int)vec_sel(
+		(vector_unsigned_short)ve,
+		(vector_unsigned_short)te,
+		(vector_unsigned_short){0, 0, 0, 0,
+					0, 0, 0xffff, 0xffff});
+
+	*(vector_unsigned_int *)eth_hdr = te;
+}
+
+/**
+ * Send packets burst from pkts_burst to the ports in dst_port array
+ */
+static __rte_always_inline void
+send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
+		uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+{
+	int32_t k;
+	int j = 0;
+	uint16_t dlp;
+	uint16_t *lp;
+	uint16_t pnum[MAX_PKT_BURST + 1];
+
+	/*
+	 * Finish packet processing and group consecutive
+	 * packets with the same destination port.
+	 */
+	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+	if (k != 0) {
+		vector_unsigned_short dp1, dp2;
+
+		lp = pnum;
+		lp[0] = 1;
+
+		processx4_step3(pkts_burst, dst_port);
+
+		/* dp1: <d[0], d[1], d[2], d[3], ... > */
+		dp1 = *(vector_unsigned_short *)dst_port;
+
+		for (j = FWDSTEP; j != k; j += FWDSTEP) {
+			processx4_step3(&pkts_burst[j], &dst_port[j]);
+
+			/*
+			 * dp2:
+			 * <d[j-3], d[j-2], d[j-1], d[j], ... >
+			 */
+			dp2 = *((vector_unsigned_short *)
+					&dst_port[j - FWDSTEP + 1]);
+			lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+			/*
+			 * dp1:
+			 * <d[j], d[j+1], d[j+2], d[j+3], ... >
+			 */
+			dp1 = vec_sro(dp2, (vector unsigned char) {
+				0, 0, 0, 0, 0, 0, 0, 0,
+				0, 0, 0, (FWDSTEP - 1) * sizeof(dst_port[0])});
+		}
+
+		/*
+		 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
+		 */
+		dp2 = vec_perm(dp1, (vector_unsigned_short){},
+				(vector unsigned char) {0xf9});
+		lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+		/*
+		 * remove values added by the last repeated
+		 * dst port.
+		 */
+		lp[0]--;
+		dlp = dst_port[j - 1];
+	} else {
+		/* set dlp and lp to the never used values. */
+		dlp = BAD_PORT - 1;
+		lp = pnum + MAX_PKT_BURST;
+	}
+
+	/* Process up to last 3 packets one by one. */
+	switch (nb_rx % FWDSTEP) {
+	case 3:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+		/* fall-through */
+	case 2:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+		/* fall-through */
+	case 1:
+		process_packet(pkts_burst[j], dst_port + j);
+		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+		j++;
+	}
+
+	/*
+	 * Send packets out, through destination port.
+	 * Consecutive packets with the same destination port
+	 * are already grouped together.
+	 * If destination port for the packet equals BAD_PORT,
+	 * then free the packet without sending it out.
+	 */
+	for (j = 0; j < nb_rx; j += k) {
+
+		int32_t m;
+		uint16_t pn;
+
+		pn = dst_port[j];
+		k = pnum[j];
+
+		if (likely(pn != BAD_PORT))
+			send_packetsx4(qconf, pn, pkts_burst + j, k);
+		else
+			for (m = j; m != j + k; m++)
+				rte_pktmbuf_free(pkts_burst[m]);
+
+	}
+}
+
+#endif /* _L3FWD_S390X_H_ */
-- 
2.17.1


  parent reply	other threads:[~2019-04-09 19:07 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-09 19:06 [dpdk-dev] [RFC 00/12] introduce s390x architecture Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 01/12] mk: " Vivian Kong
2019-04-09 19:38   ` Luca Boccassi
2019-04-10 16:50     ` Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 02/12] eal: add support for " Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 03/12] acl: " Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 04/12] lpm: " Vivian Kong
2019-04-09 19:06 ` Vivian Kong [this message]
2019-04-09 19:06 ` [dpdk-dev] [RFC 06/12] net/i40e: " Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 07/12] test: " Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 08/12] hash: " Vivian Kong
2019-04-15 20:43   ` Dharmik Thakkar
2019-04-09 19:06 ` [dpdk-dev] [RFC 09/12] doc: introduce " Vivian Kong
2019-04-09 20:12   ` Thomas Monjalon
2019-04-10 16:52     ` Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 10/12] ethdev: add cast for bus_device Vivian Kong
2019-04-09 20:14   ` Thomas Monjalon
2019-04-10  2:41     ` Stephen Hemminger
2019-04-10 17:10     ` Vivian Kong
2019-04-09 19:06 ` [dpdk-dev] [RFC 11/12] mbuf: trivial fix Vivian Kong
2019-04-09 20:13   ` Thomas Monjalon
2019-04-09 19:06 ` [dpdk-dev] [RFC 12/12] test: miscellaneous test fixes Vivian Kong
2019-04-09 20:25 ` [dpdk-dev] [RFC 00/12] introduce s390x architecture Thomas Monjalon
2019-04-10 17:09   ` Vivian Kong
2019-04-10 17:43     ` Thomas Monjalon
2019-04-10 19:15       ` Vivian Kong
2019-04-10 19:46         ` Thomas Monjalon
2019-04-10 20:45           ` Vivian Kong
2019-04-10 20:31       ` David Christensen
2019-04-10 20:46         ` Thomas Monjalon
2019-04-10 21:03           ` David Christensen
2019-04-10 21:45           ` Pradeep Satyanarayana
2019-04-11  6:59             ` Thomas Monjalon
2019-04-11 15:57               ` Pradeep Satyanarayana
     [not found]               ` <OFB9049735.7E51096E-ON882583D9.00574783-882583D9.00579FE2@LocalDomain>
2019-04-18 21:06                 ` Pradeep Satyanarayana
2019-04-18 21:18                   ` Thomas Monjalon
2019-04-11  7:45 ` David Marchand
2019-05-15 12:45   ` David Marchand
2020-02-18 21:03 ` Thomas Monjalon
2020-02-20  0:20   ` Pradeep Satyanarayana
2020-02-21 13:33   ` Vivian Kong
2021-03-24 21:40     ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190409190630.31975-6-vivkong@ca.ibm.com \
    --to=vivkong@gmail.com \
    --cc=dev@dpdk.org \
    --cc=vivkong@ca.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.