All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] Support VxLAN GRO
@ 2017-11-25  3:17 Jiayu Hu
  2017-11-25  3:17 ` [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup Jiayu Hu
                   ` (2 more replies)
  0 siblings, 3 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-11-25  3:17 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, konstantin.ananyev, Jiayu Hu

VxLAN is one of the most widely used tunneled protocols. Providing GRO
support for VxLAN-encapsulated packets can benefit many per-packet based
applications, like OVS.

This patchset is to support VxLAN GRO. The first patch cleans up current
TCP/IPv4 GRO codes for the sake of supporting tunneled GRO. The second
patch supports GRO on the VxLAN packets which have an outer IPv4 header
and inner TCP/IPv4 headers.

Jiayu Hu (2):
  gro: TCP/IPV4 GRO codes cleanup
  gro: support VxLAN GRO

 lib/librte_gro/Makefile         |   1 +
 lib/librte_gro/gro_tcp4.c       | 294 +++++++-----------------
 lib/librte_gro/gro_tcp4.h       | 236 ++++++++++++++------
 lib/librte_gro/gro_vxlan_tcp4.c | 481 ++++++++++++++++++++++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h | 178 +++++++++++++++
 lib/librte_gro/rte_gro.c        | 166 ++++++++++----
 lib/librte_gro/rte_gro.h        |   3 +
 7 files changed, 1032 insertions(+), 327 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

-- 
2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup
  2017-11-25  3:17 [PATCH 0/2] Support VxLAN GRO Jiayu Hu
@ 2017-11-25  3:17 ` Jiayu Hu
  2017-11-25  3:17 ` [PATCH 2/2] gro: support VxLAN GRO Jiayu Hu
  2017-12-14  2:49 ` [PATCH v2 0/2] Support " Jiayu Hu
  2 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-11-25  3:17 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, konstantin.ananyev, Jiayu Hu

This patch updates TCP/IPv4 GRO as follows:
- remove IP identification check when merge TCP/IPv4 packets
- extract common internal functions for supporting tunneled GRO
- rename internal functions and variants for better understanding
- update comments

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 lib/librte_gro/gro_tcp4.c | 294 +++++++++++++---------------------------------
 lib/librte_gro/gro_tcp4.h | 236 ++++++++++++++++++++++++++-----------
 lib/librte_gro/rte_gro.c  |  60 +++++-----
 3 files changed, 278 insertions(+), 312 deletions(-)

diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 61a0423..a560a84 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -34,8 +34,6 @@
 #include <rte_mbuf.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
 
 #include "gro_tcp4.h"
 
@@ -72,20 +70,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
 	}
 	tbl->max_item_num = entries_num;
 
-	size = sizeof(struct gro_tcp4_key) * entries_num;
-	tbl->keys = rte_zmalloc_socket(__func__,
+	size = sizeof(struct gro_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
 			size,
 			RTE_CACHE_LINE_SIZE,
 			socket_id);
-	if (tbl->keys == NULL) {
+	if (tbl->flows == NULL) {
 		rte_free(tbl->items);
 		rte_free(tbl);
 		return NULL;
 	}
-	/* INVALID_ARRAY_INDEX indicates empty key */
+	/* INVALID_ARRAY_INDEX indicates an empty flow */
 	for (i = 0; i < entries_num; i++)
-		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
-	tbl->max_key_num = entries_num;
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
 
 	return tbl;
 }
@@ -97,111 +95,11 @@ gro_tcp4_tbl_destroy(void *tbl)
 
 	if (tcp_tbl) {
 		rte_free(tcp_tbl->items);
-		rte_free(tcp_tbl->keys);
+		rte_free(tcp_tbl->flows);
 	}
 	rte_free(tcp_tbl);
 }
 
-/*
- * merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
-		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
-		int cmp)
-{
-	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t tcp_datalen;
-
-	if (cmp > 0) {
-		pkt_head = item_src->firstseg;
-		pkt_tail = pkt;
-	} else {
-		pkt_head = pkt;
-		pkt_tail = item_src->firstseg;
-	}
-
-	/* check if the packet length will be beyond the max value */
-	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
-		pkt_tail->l3_len - pkt_tail->l4_len;
-	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
-			TCP4_MAX_L3_LENGTH)
-		return 0;
-
-	/* remove packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail,
-			pkt_tail->l2_len +
-			pkt_tail->l3_len +
-			pkt_tail->l4_len);
-
-	/* chain two packets together */
-	if (cmp > 0) {
-		item_src->lastseg->next = pkt;
-		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
-		/* update IP ID to the larger value */
-		item_src->ip_id = ip_id;
-	} else {
-		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item_src->firstseg;
-		item_src->firstseg = pkt;
-		/* update sent_seq to the smaller value */
-		item_src->sent_seq = sent_seq;
-	}
-	item_src->nb_merged++;
-
-	/* update mbuf metadata for the merged packet */
-	pkt_head->nb_segs += pkt_tail->nb_segs;
-	pkt_head->pkt_len += pkt_tail->pkt_len;
-
-	return 1;
-}
-
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
-		struct tcp_hdr *tcp_hdr,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
-		uint16_t ip_id,
-		uint32_t sent_seq)
-{
-	struct rte_mbuf *pkt0 = item->firstseg;
-	struct ipv4_hdr *ipv4_hdr0;
-	struct tcp_hdr *tcp_hdr0;
-	uint16_t tcp_hl0, tcp_dl0;
-	uint16_t len;
-
-	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
-			pkt0->l2_len);
-	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
-	tcp_hl0 = pkt0->l4_len;
-
-	/* check if TCP option fields equal. If not, return 0. */
-	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
-	if ((tcp_hl != tcp_hl0) ||
-			((len > 0) && (memcmp(tcp_hdr + 1,
-					tcp_hdr0 + 1,
-					len) != 0)))
-		return 0;
-
-	/* check if the two packets are neighbors */
-	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
-	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
-			(ip_id == (item->ip_id + 1)))
-		/* append the new packet */
-		return 1;
-	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
-			((ip_id + item->nb_merged) == item->ip_id))
-		/* pre-pend the new packet */
-		return -1;
-	else
-		return 0;
-}
-
 static inline uint32_t
 find_an_empty_item(struct gro_tcp4_tbl *tbl)
 {
@@ -215,13 +113,13 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
 }
 
 static inline uint32_t
-find_an_empty_key(struct gro_tcp4_tbl *tbl)
+find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 {
 	uint32_t i;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num;
 
-	for (i = 0; i < max_key_num; i++)
-		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
 			return i;
 	return INVALID_ARRAY_INDEX;
 }
@@ -229,7 +127,6 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
 static inline uint32_t
 insert_new_item(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf *pkt,
-		uint16_t ip_id,
 		uint32_t sent_seq,
 		uint32_t prev_idx,
 		uint64_t start_time)
@@ -245,7 +142,6 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 	tbl->items[item_idx].start_time = start_time;
 	tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
 	tbl->items[item_idx].sent_seq = sent_seq;
-	tbl->items[item_idx].ip_id = ip_id;
 	tbl->items[item_idx].nb_merged = 1;
 	tbl->item_num++;
 
@@ -265,7 +161,7 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 {
 	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
 
-	/* set NULL to firstseg to indicate it's an empty item */
+	/* NULL indicates an empty item */
 	tbl->items[item_idx].firstseg = NULL;
 	tbl->item_num--;
 	if (prev_item_idx != INVALID_ARRAY_INDEX)
@@ -275,53 +171,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 }
 
 static inline uint32_t
-insert_new_key(struct gro_tcp4_tbl *tbl,
-		struct tcp4_key *key_src,
+insert_new_flow(struct gro_tcp4_tbl *tbl,
+		struct tcp4_flow_key *src,
 		uint32_t item_idx)
 {
-	struct tcp4_key *key_dst;
-	uint32_t key_idx;
+	struct tcp4_flow_key *dst;
+	uint32_t flow_idx;
 
-	key_idx = find_an_empty_key(tbl);
-	if (key_idx == INVALID_ARRAY_INDEX)
+	flow_idx = find_an_empty_flow(tbl);
+	if (flow_idx == INVALID_ARRAY_INDEX)
 		return INVALID_ARRAY_INDEX;
 
-	key_dst = &(tbl->keys[key_idx].key);
+	dst = &(tbl->flows[flow_idx].key);
 
-	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
-	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
-	key_dst->ip_src_addr = key_src->ip_src_addr;
-	key_dst->ip_dst_addr = key_src->ip_dst_addr;
-	key_dst->recv_ack = key_src->recv_ack;
-	key_dst->src_port = key_src->src_port;
-	key_dst->dst_port = key_src->dst_port;
+	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
+	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+	dst->ip_src_addr = src->ip_src_addr;
+	dst->ip_dst_addr = src->ip_dst_addr;
+	dst->recv_ack = src->recv_ack;
+	dst->src_port = src->src_port;
+	dst->dst_port = src->dst_port;
 
-	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
-	tbl->keys[key_idx].start_index = item_idx;
-	tbl->key_num++;
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
 
-	return key_idx;
+	return flow_idx;
 }
 
-static inline int
-is_same_key(struct tcp4_key k1, struct tcp4_key k2)
-{
-	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
-		return 0;
-
-	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
-		return 0;
-
-	return ((k1.ip_src_addr == k2.ip_src_addr) &&
-			(k1.ip_dst_addr == k2.ip_dst_addr) &&
-			(k1.recv_ack == k2.recv_ack) &&
-			(k1.src_port == k2.src_port) &&
-			(k1.dst_port == k2.dst_port));
-}
-
-/*
- * update packet length for the flushed packet.
- */
 static inline void
 update_header(struct gro_tcp4_item *item)
 {
@@ -343,30 +219,32 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	struct ipv4_hdr *ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
 	uint32_t sent_seq;
-	uint16_t tcp_dl, ip_id;
+	uint16_t tcp_dl, hdr_len;
 
-	struct tcp4_key key;
+	struct tcp4_flow_key key;
 	uint32_t cur_idx, prev_idx, item_idx;
-	uint32_t i, max_key_num;
+	uint32_t i, max_flow_num;
 	int cmp;
 
 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
 	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
 
 	/*
-	 * if FIN, SYN, RST, PSH, URG, ECE or
-	 * CWR is set, return immediately.
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+	 * or CWR set.
 	 */
 	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
 		return -1;
-	/* if payload length is 0, return immediately */
-	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
-		pkt->l4_len;
-	if (tcp_dl == 0)
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
 		return -1;
 
-	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
 	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
 
 	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
@@ -377,49 +255,51 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	key.dst_port = tcp_hdr->dst_port;
 	key.recv_ack = tcp_hdr->recv_ack;
 
-	/* search for a key */
-	max_key_num = tbl->max_key_num;
-	for (i = 0; i < max_key_num; i++) {
-		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
-				is_same_key(tbl->keys[i].key, key))
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	for (i = 0; i < max_flow_num; i++) {
+		if ((tbl->flows[i].start_index != INVALID_ARRAY_INDEX) &&
+				is_same_tcp4_flow(tbl->flows[i].key, key))
 			break;
 	}
 
-	/* can't find a key, so insert a new key and a new item. */
-	if (i == tbl->max_key_num) {
-		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
+	/*
+	 * Fail to find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (i == tbl->max_flow_num) {
+		item_idx = insert_new_item(tbl, pkt, sent_seq,
 				INVALID_ARRAY_INDEX, start_time);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
-		if (insert_new_key(tbl, &key, item_idx) ==
+		if (insert_new_flow(tbl, &key, item_idx) ==
 				INVALID_ARRAY_INDEX) {
-			/*
-			 * fail to insert a new key, so
-			 * delete the inserted item
-			 */
+			/* Fail to insert a new flow. */
 			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
 			return -1;
 		}
 		return 0;
 	}
 
-	/* traverse all packets in the item group to find one to merge */
-	cur_idx = tbl->keys[i].start_index;
+	/*
+	 * Check all packets in the flow and try to find a neighbor for
+	 * the input packet.
+	 */
+	cur_idx = tbl->flows[i].start_index;
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				pkt->l4_len, tcp_dl, ip_id, sent_seq);
+				pkt->l4_len, tcp_dl, sent_seq, 0);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, ip_id,
-						sent_seq, cmp))
+						pkt, sent_seq, cmp, 0))
 				return 1;
 			/*
-			 * fail to merge two packets since the packet
-			 * length will be greater than the max value.
-			 * So insert the packet into the item group.
+			 * Fail to merge the two packets, as the packet
+			 * length is greater than the max value. Store
+			 * the packet into the flow.
 			 */
-			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
+			if (insert_new_item(tbl, pkt, sent_seq,
 						prev_idx, start_time) ==
 					INVALID_ARRAY_INDEX)
 				return -1;
@@ -429,11 +309,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		cur_idx = tbl->items[cur_idx].next_pkt_idx;
 	} while (cur_idx != INVALID_ARRAY_INDEX);
 
-	/*
-	 * can't find a packet in the item group to merge,
-	 * so insert the packet into the item group.
-	 */
-	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
+	/* Fail to find a neighbor, so store the packet into the flow. */
+	if (insert_new_item(tbl, pkt, sent_seq, prev_idx,
 				start_time) == INVALID_ARRAY_INDEX)
 		return -1;
 
@@ -448,44 +325,33 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 {
 	uint16_t k = 0;
 	uint32_t i, j;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num;
 
-	for (i = 0; i < max_key_num; i++) {
-		/* all keys have been checked, return immediately */
-		if (tbl->key_num == 0)
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
 			return k;
 
-		j = tbl->keys[i].start_index;
+		j = tbl->flows[i].start_index;
 		while (j != INVALID_ARRAY_INDEX) {
 			if (tbl->items[j].start_time <= flush_timestamp) {
 				out[k++] = tbl->items[j].firstseg;
 				if (tbl->items[j].nb_merged > 1)
 					update_header(&(tbl->items[j]));
 				/*
-				 * delete the item and get
-				 * the next packet index
+				 * Delete the packet and get the next
+				 * packet in the flow.
 				 */
-				j = delete_item(tbl, j,
-						INVALID_ARRAY_INDEX);
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
 
-				/*
-				 * delete the key as all of
-				 * packets are flushed
-				 */
-				if (j == INVALID_ARRAY_INDEX) {
-					tbl->keys[i].start_index =
-						INVALID_ARRAY_INDEX;
-					tbl->key_num--;
-				} else
-					/* update start_index of the key */
-					tbl->keys[i].start_index = j;
-
-				if (k == nb_out)
+				if (unlikely(k == nb_out))
 					return k;
 			} else
 				/*
-				 * left packets of this key won't be
-				 * timeout, so go to check other keys.
+				 * The left packets in this flow won't be
+				 * timeout. Go to check other flows.
 				 */
 				break;
 		}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 0a81716..de9925e 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -33,17 +33,20 @@
 #ifndef _GRO_TCP4_H_
 #define _GRO_TCP4_H_
 
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
 /*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of L3
+ * header, L4 header and the payload.
  */
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
 
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow. */
+struct tcp4_flow_key {
 	struct ether_addr eth_saddr;
 	struct ether_addr eth_daddr;
 	uint32_t ip_src_addr;
@@ -54,43 +57,39 @@ struct tcp4_key {
 	uint16_t dst_port;
 };
 
-struct gro_tcp4_key {
-	struct tcp4_key key;
+struct gro_tcp4_flow {
+	struct tcp4_flow_key key;
 	/*
-	 * the index of the first packet in the item group.
-	 * If the value is INVALID_ARRAY_INDEX, it means
-	 * the key is empty.
+	 * The index of the first packet in the flow.
+	 * INVALID_ARRAY_INDEX indicates an empty flow.
 	 */
 	uint32_t start_index;
 };
 
 struct gro_tcp4_item {
 	/*
-	 * first segment of the packet. If the value
+	 * First segment of the packet. If the value
 	 * is NULL, it means the item is empty.
 	 */
 	struct rte_mbuf *firstseg;
-	/* last segment of the packet */
+	/* Last segment of the packet */
 	struct rte_mbuf *lastseg;
 	/*
-	 * the time when the first packet is inserted
+	 * The time when the first packet is inserted
 	 * into the table. If a packet in the table is
 	 * merged with an incoming packet, this value
-	 * won't be updated. We set this value only
-	 * when the first packet is inserted into the
-	 * table.
+	 * won't be updated.
 	 */
 	uint64_t start_time;
 	/*
-	 * we use next_pkt_idx to chain the packets that
-	 * have same key value but can't be merged together.
+	 * next_pkt_idx is used to chain the packets that
+	 * are in the same flow but can't be merged together
+	 * (i.e. caused by packet reordering).
 	 */
 	uint32_t next_pkt_idx;
-	/* the sequence number of the packet */
+	/* TCP sequence number of the packet */
 	uint32_t sent_seq;
-	/* the IP ID of the packet */
-	uint16_t ip_id;
-	/* the number of merged packets */
+	/* The number of merged packets */
 	uint16_t nb_merged;
 };
 
@@ -100,31 +99,31 @@ struct gro_tcp4_item {
 struct gro_tcp4_tbl {
 	/* item array */
 	struct gro_tcp4_item *items;
-	/* key array */
-	struct gro_tcp4_key *keys;
+	/* flow array */
+	struct gro_tcp4_flow *flows;
 	/* current item number */
 	uint32_t item_num;
-	/* current key num */
-	uint32_t key_num;
+	/* current flow num */
+	uint32_t flow_num;
 	/* item array size */
 	uint32_t max_item_num;
-	/* key array size */
-	uint32_t max_key_num;
+	/* flow array size */
+	uint32_t max_flow_num;
 };
 
 /**
  * This function creates a TCP/IPv4 reassembly table.
  *
  * @param socket_id
- *  socket index for allocating TCP/IPv4 reassemble table
+ *  Socket index for allocating the TCP/IPv4 reassemble table
  * @param max_flow_num
- *  the maximum number of flows in the TCP/IPv4 GRO table
+ *  The maximum number of flows in the TCP/IPv4 GRO table
  * @param max_item_per_flow
- *  the maximum packet number per flow.
+ *  The maximum number of packets per flow
  *
  * @return
- *  if create successfully, return a pointer which points to the
- *  created TCP/IPv4 GRO table. Otherwise, return NULL.
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
  */
 void *gro_tcp4_tbl_create(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -134,62 +133,53 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
  * This function destroys a TCP/IPv4 reassembly table.
  *
  * @param tbl
- *  a pointer points to the TCP/IPv4 reassembly table.
+ *  Pointer pointint to the TCP/IPv4 reassembly table.
  */
 void gro_tcp4_tbl_destroy(void *tbl);
 
 /**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload. It returns the packet if there is no available space in the
+ * table.
  *
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums.
+ * Additionally, it doesn't re-calculate checksums for the merged packet.
+ * If the input packet is IP fragmented, it assumes the packet is complete.
  *
  * @param pkt
- *  packet to reassemble.
+ *  Packet to reassemble
  * @param tbl
- *  a pointer that points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table
  * @start_time
- *  the start time that the packet is inserted into the table
+ *  The time when the packet is inserted into the table
  *
  * @return
- *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- *  or URG bit is set, or there is no available space in the table to
- *  insert a new item or a new key, return a negative value. If the
- *  packet is merged successfully, return an positive value. If the
- *  packet is inserted into the table, return 0.
+ *  - Return a positive value if the input packet is merged.
+ *  - Return zero if the input packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters.
  */
 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		struct gro_tcp4_tbl *tbl,
 		uint64_t start_time);
 
 /**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
  *
  * @param tbl
- *  a pointer that points to a TCP GRO table.
+ *  Pointer points to a TCP/IPv4 reassembly table
  * @param flush_timestamp
- *  this function flushes packets which are inserted into the table
- *  before or at the flush_timestamp.
+ *  Flush packets which are inserted into the table before or at the
+ *  flush_timestamp
  * @param out
- *  pointer array which is used to keep flushed packets.
+ *  Pointer array used to keep flushed packets
  * @param nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number in 'out'. It also determines the maximum number of
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of packets that are returned.
+ *  The number of flushed packets
  */
 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		uint64_t flush_timestamp,
@@ -201,10 +191,124 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
  * reassembly table.
  *
  * @param tbl
- *  pointer points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to a TCP/IPv4 reassembly table
  *
  * @return
- *  the number of packets in the table
+ *  The number of packets in the table
  */
 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
+		return 0;
+
+	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
+		return 0;
+
+	return ((k1.ip_src_addr == k2.ip_src_addr) &&
+			(k1.ip_dst_addr == k2.ip_dst_addr) &&
+			(k1.recv_ack == k2.recv_ack) &&
+			(k1.src_port == k2.src_port) &&
+			(k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+		struct tcp_hdr *tcph,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint32_t sent_seq,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	struct ipv4_hdr *iph_orig;
+	struct tcp_hdr *tcph_orig;
+	uint16_t len, l4_len_orig;
+
+	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			l2_offset + pkt_orig->l2_len);
+	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+	l4_len_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
+	if ((tcp_hl != l4_len_orig) || ((len > 0) &&
+				(memcmp(tcph + 1, tcph_orig + 1,
+					len) != 0)))
+		return 0;
+
+	/* Check if the two packets are neighbors */
+	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+		pkt_orig->l3_len - l4_len_orig;
+	if (sent_seq == item->sent_seq + len)
+		/* Append the new packet */
+		return 1;
+	else if (sent_seq + tcp_dl == item->sent_seq)
+		/* Pre-pend the new packet */
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		uint32_t sent_seq,
+		int cmp,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+	uint16_t hdr_len;
+
+	if (cmp > 0) {
+		pkt_head = item->firstseg;
+		pkt_tail = pkt;
+	} else {
+		pkt_head = pkt;
+		pkt_tail = item->firstseg;
+	}
+
+	/* Check if the length is greater than the max value */
+	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+		pkt_head->l4_len;
+	if (pkt_head->pkt_len - l2_offset - pkt_head->l2_len +
+			pkt_tail->pkt_len - hdr_len > MAX_IPV4_PKT_LENGTH)
+		return 0;
+
+	/* Remove packet header for the tail packet */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+	/* Chain two packets together */
+	if (cmp > 0) {
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
+	} else {
+		lastseg = rte_pktmbuf_lastseg(pkt);
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
+		/* Update sent_seq to the smaller value */
+		item->sent_seq = sent_seq;
+	}
+	item->nb_merged++;
+
+	/* Update mbuf metadata for the merged packet */
+	pkt_head->nb_segs += pkt_tail->nb_segs;
+	pkt_head->pkt_len += pkt_tail->pkt_len;
+
+	return 1;
+}
 #endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 7853246..dfee932 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -51,6 +51,9 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, NULL};
 
+#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
+
 /*
  * GRO context structure, which is used to merge packets. It keeps
  * many reassembly tables of desired GRO types. Applications need to
@@ -131,62 +134,55 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		const struct rte_gro_param *param)
 {
-	uint16_t i;
-	uint16_t nb_after_gro = nb_pkts;
-	uint32_t item_num;
-
-	/* allocate a reassembly table for TCP/IPv4 GRO */
+	/* Allocate a reassembly table for TCP/IPv4 GRO. */
 	struct gro_tcp4_tbl tcp_tbl;
-	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
-	uint16_t unprocess_num = 0;
-	int32_t ret;
 	uint64_t current_time;
+	uint32_t item_num;
+	int32_t ret;
+	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
 
 	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
 		return nb_pkts;
 
-	/* get the actual number of packets */
+	/* Get the actual number of packets. */
 	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
-			param->max_item_per_flow));
+				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
 	for (i = 0; i < item_num; i++)
-		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
 
-	tcp_tbl.keys = tcp_keys;
+	tcp_tbl.flows = tcp_flows;
 	tcp_tbl.items = tcp_items;
-	tcp_tbl.key_num = 0;
+	tcp_tbl.flow_num = 0;
 	tcp_tbl.item_num = 0;
-	tcp_tbl.max_key_num = item_num;
+	tcp_tbl.max_flow_num = item_num;
 	tcp_tbl.max_item_num = item_num;
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			ret = gro_tcp4_reassemble(pkts[i],
-					&tcp_tbl,
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl,
 					current_time);
 			if (ret > 0)
-				/* merge successfully */
+				/* Merge successfully */
 				nb_after_gro--;
-			else if (ret < 0) {
-				unprocess_pkts[unprocess_num++] =
-					pkts[i];
-			}
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 
-	/* re-arrange GROed packets */
 	if (nb_after_gro < nb_pkts) {
+		/* Flush packets from the tables. */
 		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
 				pkts, nb_pkts);
+		/* Copy unprocessed packets. */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
 					sizeof(struct rte_mbuf *) *
@@ -202,10 +198,11 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		void *ctx)
 {
-	uint16_t i, unprocess_num = 0;
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
+	void *tbl;
 	uint64_t current_time;
+	uint16_t i, unprocess_num = 0;
 
 	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
 		return nb_pkts;
@@ -213,12 +210,9 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			if (gro_tcp4_reassemble(pkts[i],
-						gro_ctx->tbls
-						[RTE_GRO_TCP_IPV4_INDEX],
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
+			if (gro_tcp4_reassemble(pkts[i], tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
@@ -252,6 +246,7 @@ rte_gro_timeout_flush(void *ctx,
 				flush_timestamp,
 				out, max_nb_out);
 	}
+
 	return 0;
 }
 
@@ -274,5 +269,6 @@ rte_gro_get_pkt_count(void *ctx)
 			continue;
 		item_num += pkt_count_fn(gro_ctx->tbls[i]);
 	}
+
 	return item_num;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/2] gro: support VxLAN GRO
  2017-11-25  3:17 [PATCH 0/2] Support VxLAN GRO Jiayu Hu
  2017-11-25  3:17 ` [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup Jiayu Hu
@ 2017-11-25  3:17 ` Jiayu Hu
  2017-12-14  2:49 ` [PATCH v2 0/2] Support " Jiayu Hu
  2 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-11-25  3:17 UTC (permalink / raw)
  To: dev; +Cc: jianfeng.tan, konstantin.ananyev, Jiayu Hu

This patch adds a framework that allows GRO on tunneled packets.
Furthermore, it leverages that framework to provide GRO support for
VxLAN-encapsulated packets.

Supported VxLAN packets must have an outer IPv4 header, and contain an
inner TCP/IPv4 packet.

VxLAN GRO doesn't check if input packets have correct checksums and
doesn't update checksums for output packets. Additionally, if input
packets are IP fragmented, it assumes they are complete.

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 lib/librte_gro/Makefile         |   1 +
 lib/librte_gro/gro_vxlan_tcp4.c | 481 ++++++++++++++++++++++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h | 178 +++++++++++++++
 lib/librte_gro/rte_gro.c        | 118 ++++++++--
 lib/librte_gro/rte_gro.h        |   3 +
 5 files changed, 760 insertions(+), 21 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index eb423cc..0110455 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -45,6 +45,7 @@ LIBABIVER := 1
 # source files
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c
new file mode 100644
index 0000000..09afa0b
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.c
@@ -0,0 +1,481 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_udp.h>
+
+#include "gro_vxlan_tcp4.h"
+
+void *
+gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_vxlan_tcp4_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_vxlan_tcp4_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_vxlan_tcp4_tbl_destroy(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
+
+	if (vxlan_tbl) {
+		rte_free(vxlan_tbl->items);
+		rte_free(vxlan_tbl->flows);
+	}
+	rte_free(vxlan_tbl);
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_item_num = tbl->max_item_num, i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (tbl->items[i].inner_item.firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_flow_num = tbl->max_flow_num, i;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
+		struct rte_mbuf *pkt,
+		uint16_t outer_ip_id,
+		uint32_t sent_seq,
+		uint32_t prev_idx,
+		uint64_t start_time)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(tbl);
+	if (item_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	tbl->items[item_idx].inner_item.firstseg = pkt;
+	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
+	tbl->items[item_idx].inner_item.start_time = start_time;
+	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
+	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
+	tbl->items[item_idx].inner_item.nb_merged = 1;
+	tbl->items[item_idx].outer_ip_id = outer_ip_id;
+	tbl->item_num++;
+
+	/* If the previous packet exists, chain the new one with it. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		tbl->items[item_idx].inner_item.next_pkt_idx =
+			tbl->items[prev_idx].inner_item.next_pkt_idx;
+		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_vxlan_tcp4_tbl *tbl,
+		uint32_t item_idx,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
+
+	/* NULL indicates an empty item. */
+	tbl->items[item_idx].inner_item.firstseg = NULL;
+	tbl->item_num--;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
+		struct vxlan_tcp4_flow_key *src,
+		uint32_t item_idx)
+{
+	struct vxlan_tcp4_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (flow_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	ether_addr_copy(&(src->inner_key.eth_saddr),
+			&(dst->inner_key.eth_saddr));
+	ether_addr_copy(&(src->inner_key.eth_daddr),
+			&(dst->inner_key.eth_daddr));
+	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
+	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
+	dst->inner_key.recv_ack = src->inner_key.recv_ack;
+	dst->inner_key.src_port = src->inner_key.src_port;
+	dst->inner_key.dst_port = src->inner_key.dst_port;
+
+	dst->vxlan_hdr = src->vxlan_hdr;
+	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
+	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
+	dst->outer_ip_src_addr = src->outer_ip_src_addr;
+	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
+	dst->outer_src_port = src->outer_src_port;
+	dst->outer_dst_port = src->outer_dst_port;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+static inline int
+is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
+		struct vxlan_tcp4_flow_key k2)
+{
+	if (is_same_tcp4_flow(k1.inner_key, k2.inner_key) == 0 ||
+			is_same_ether_addr(&k1.outer_eth_saddr,
+				&k2.outer_eth_saddr) == 0 ||
+			is_same_ether_addr(&k1.outer_eth_daddr,
+				&k2.outer_eth_daddr) == 0)
+		return 0;
+
+	return ((k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
+			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
+			(k1.outer_src_port == k2.outer_src_port) &&
+			(k1.outer_dst_port == k2.outer_dst_port) &&
+			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
+			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni));
+}
+
+static inline int
+check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
+		struct tcp_hdr *tcp_hdr,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint16_t outer_ip_id,
+		uint32_t sent_seq)
+{
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	int cmp;
+	uint16_t l2_offset;
+
+	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
+	cmp = check_seq_option(&item->inner_item, tcp_hdr, tcp_hl, tcp_dl,
+			sent_seq, l2_offset);
+	if (cmp == 1 && outer_ip_id == item->outer_ip_id + 1)
+		/* Append the packet. */
+		return 1;
+	else if (cmp == -1 && outer_ip_id + item->inner_item.nb_merged ==
+			item->outer_ip_id)
+		/* Prepend the packet. */
+		return -1;
+	else
+		return 0;
+}
+
+static inline int
+merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		uint16_t outer_ip_id,
+		uint32_t sent_seq,
+		int cmp)
+{
+	if (merge_two_tcp4_packets(&item->inner_item, pkt, sent_seq, cmp,
+				pkt->outer_l2_len + pkt->outer_l3_len)) {
+		item->outer_ip_id = outer_ip_id;
+		return 1;
+	} else
+		return 0;
+}
+
+static inline void
+update_vxlan_header(struct gro_vxlan_tcp4_item *item)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct udp_hdr *udp_hdr;
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	uint16_t len;
+
+	/* Update the outer IPv4 header. */
+	len = pkt->pkt_len - pkt->outer_l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Update the outer UDP header. */
+	len -= pkt->outer_l3_len;
+	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
+
+	/* Update the inner IPv4 header. */
+	len -= pkt->l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+}
+
+int32_t
+gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time)
+{
+	struct ether_hdr *outer_eth_hdr, *eth_hdr;
+	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
+	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
+	struct vxlan_hdr *vxlan_hdr;
+	uint32_t sent_seq;
+	uint16_t tcp_dl, outer_ip_id;
+
+	struct vxlan_tcp4_flow_key key;
+	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t i, max_flow_num;
+	uint16_t hdr_len;
+	int cmp;
+
+	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
+			pkt->outer_l2_len);
+	outer_ip_id = rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
+	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
+			pkt->outer_l3_len);
+	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
+			sizeof(struct udp_hdr));
+	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
+			sizeof(struct vxlan_hdr));
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+
+	/*
+	 * Check if the inner TCP header flag sets FIN, SYN, RST,
+	 * PSH, URG, ECE or CWR bit.
+	 */
+	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
+		return -1;
+	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len + pkt->l4_len;
+	/*
+	 * If the payload length is less than or equal to 0, return
+	 * immediately.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
+		return -1;
+
+	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
+	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
+	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
+	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.inner_key.src_port = tcp_hdr->src_port;
+	key.inner_key.dst_port = tcp_hdr->dst_port;
+	key.inner_key.recv_ack = tcp_hdr->recv_ack;
+
+	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
+	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
+	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
+	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
+	key.outer_src_port = udp_hdr->src_port;
+	key.outer_dst_port = udp_hdr->dst_port;
+	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
+	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	for (i = 0; i < max_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX &&
+				is_same_vxlan_tcp4_flow(tbl->flows[i].key,
+					key))
+			break;
+	}
+
+	/*
+	 * Can't find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (i == tbl->max_flow_num) {
+		item_idx = insert_new_item(tbl, pkt, outer_ip_id, sent_seq,
+				INVALID_ARRAY_INDEX, start_time);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+				INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so
+			 * delete the inserted packet.
+			 */
+			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+
+	/* Check all packets in the flow and try to find a neighbor. */
+	cur_idx = tbl->flows[i].start_index;
+	prev_idx = cur_idx;
+	do {
+		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
+				pkt->l4_len, tcp_dl, outer_ip_id, sent_seq);
+		if (cmp) {
+			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
+						pkt, outer_ip_id, sent_seq,
+						cmp))
+				return 1;
+			/*
+			 * Can't merge two packets, as the packet
+			 * length will be greater than the max value.
+			 * Insert the packet into the flow.
+			 */
+			if (insert_new_item(tbl, pkt, outer_ip_id, sent_seq,
+						prev_idx, start_time) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+		prev_idx = cur_idx;
+		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Can't find neighbor. Insert the packet into the flow. */
+	if (insert_new_item(tbl, pkt, outer_ip_id, sent_seq, prev_idx,
+				start_time) == INVALID_ARRAY_INDEX)
+		return -1;
+
+	return 0;
+}
+
+uint16_t
+gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].inner_item.start_time <=
+					flush_timestamp) {
+				out[k++] = tbl->items[j].inner_item.firstseg;
+				if (tbl->items[j].inner_item.nb_merged > 1)
+					update_vxlan_header(&(tbl->items[j]));
+				/*
+				 * Delete the item and get the next packet
+				 * index.
+				 */
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in the flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_vxlan_tcp4_tbl_pkt_count(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
new file mode 100644
index 0000000..ed9a422
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.h
@@ -0,0 +1,178 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GRO_VXLAN_TCP4_H_
+#define _GRO_VXLAN_TCP4_H_
+
+#include "gro_tcp4.h"
+
+#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a VxLAN flow */
+struct vxlan_tcp4_flow_key {
+	struct tcp4_flow_key inner_key;
+	struct vxlan_hdr vxlan_hdr;
+
+	struct ether_addr outer_eth_saddr;
+	struct ether_addr outer_eth_daddr;
+
+	uint32_t outer_ip_src_addr;
+	uint32_t outer_ip_dst_addr;
+
+	/* Outer UDP ports */
+	uint16_t outer_src_port;
+	uint16_t outer_dst_port;
+
+};
+
+struct gro_vxlan_tcp4_flow {
+	struct vxlan_tcp4_flow_key key;
+	/*
+	 * The index of the first item in the flow. INVALID_ARRAY_INDEX
+	 * indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+struct gro_vxlan_tcp4_item {
+	struct gro_tcp4_item inner_item;
+	/* IP id in the outer IPv4 header */
+	uint16_t outer_ip_id;
+};
+
+/*
+ * VxLAN (with an outer IPv4 header and inner TCP/IPv4 headers)
+ * reassembly table structure
+ */
+struct gro_vxlan_tcp4_tbl {
+	/* item array */
+	struct gro_vxlan_tcp4_item *items;
+	/* flow array */
+	struct gro_vxlan_tcp4_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow number */
+	uint32_t flow_num;
+	/* the maximum item number */
+	uint32_t max_item_num;
+	/* the maximum flow number */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a VxLAN reassembly table for VxLAN packets
+ * which have an outer IPv4 header and inner TCP/IPv4 headers.
+ *
+ * @param socket_id
+ *  Socket index for allocating the reassemblt table
+ * @param max_flow_num
+ *  The maximum number of flows in the table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a VxLAN reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ */
+void gro_vxlan_tcp4_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a VxLAN packet which has an outer IPv4 header and
+ * inner TCP/IPv4 headers. It doesn't process the packet, which has SYN,
+ * FIN, RST, PSH, CWR, ECE or URG set, or doesn't have payload. It
+ * returns the packet if there is no available space in the table.
+ *
+ * This function doesn't check if the packet has correct checksums.
+ * Additionally, it doesn't re-calculate checksums for the merged packet.
+ * If the input packet is IP fragmented, it assumes the packet is complete.
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  VxLAN reassembly table pointer
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters.
+ */
+int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in the VxLAN reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  Pointer that points to a TCP GRO table.
+ * @param flush_timestamp
+ *  This function flushes packets which are inserted into the table
+ *  before or at the flush_timestamp.
+ * @param out
+ *  Pointer array which is used to keep flushed packets.
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets.
+ */
+uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a VxLAN
+ * reassembly table.
+ *
+ * @param tbl
+ *  Pointer points to a TCP/IPv4 reassembly table.
+ *
+ * @return
+ *  The number of packets in the table.
+ */
+uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index dfee932..6537bd6 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -37,6 +37,7 @@
 
 #include "rte_gro.h"
 #include "gro_tcp4.h"
+#include "gro_vxlan_tcp4.h"
 
 typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -45,15 +46,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
 typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
-		gro_tcp4_tbl_create, NULL};
+		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_destroy, NULL};
+			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
+			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_pkt_count, NULL};
+			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
+			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
 		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
 
+#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
+		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
+		 RTE_PTYPE_TUNNEL_VXLAN) && \
+		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
+		  RTE_PTYPE_INNER_L4_TCP) && \
+		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
+		    (RTE_PTYPE_INNER_L3_IPV4 | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
+
 /*
  * GRO context structure, which is used to merge packets. It keeps
  * many reassembly tables of desired GRO types. Applications need to
@@ -139,13 +153,21 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
+	/* Allocate a reassembly table for VXLAN GRO. */
+	struct gro_vxlan_tcp4_tbl vxlan_tbl;
+	struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
+		{{0}, 0} };
+
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	uint64_t current_time;
 	uint32_t item_num;
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
+	uint8_t do_tcp4_gro = 0, do_vxlan_tcp4_gro = 0;
 
-	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if ((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0)
 		return nb_pkts;
 
 	/* Get the actual number of packets. */
@@ -153,20 +175,46 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
-	for (i = 0; i < item_num; i++)
-		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_tbl.flows = vxlan_flows;
+		vxlan_tbl.items = vxlan_items;
+		vxlan_tbl.flow_num = 0;
+		vxlan_tbl.item_num = 0;
+		vxlan_tbl.max_flow_num = item_num;
+		vxlan_tbl.max_item_num = item_num;
+		do_vxlan_tcp4_gro = 1;
+	}
 
-	tcp_tbl.flows = tcp_flows;
-	tcp_tbl.items = tcp_items;
-	tcp_tbl.flow_num = 0;
-	tcp_tbl.item_num = 0;
-	tcp_tbl.max_flow_num = item_num;
-	tcp_tbl.max_item_num = item_num;
+	if (param->gro_types & RTE_GRO_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		tcp_tbl.flows = tcp_flows;
+		tcp_tbl.items = tcp_items;
+		tcp_tbl.flow_num = 0;
+		tcp_tbl.item_num = 0;
+		tcp_tbl.max_flow_num = item_num;
+		tcp_tbl.max_item_num = item_num;
+		do_tcp4_gro = 1;
+	}
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+		if (do_vxlan_tcp4_gro && IS_IPV4_VXLAN_TCP4_PKT(
+					pkts[i]->packet_type)) {
+			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl,
+					current_time);
+			if (ret > 0)
+				/* Merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (do_tcp4_gro && IS_IPV4_TCP_PKT(
+					pkts[i]->packet_type)) {
 			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl,
 					current_time);
 			if (ret > 0)
@@ -179,9 +227,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	}
 
 	if (nb_after_gro < nb_pkts) {
+		i = 0;
 		/* Flush packets from the tables. */
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
-				pkts, nb_pkts);
+		if (do_vxlan_tcp4_gro) {
+			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
+					current_time, pkts, nb_pkts);
+		}
+		if (do_tcp4_gro) {
+			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl,
+					current_time, &pkts[i], nb_pkts - i);
+		}
 		/* Copy unprocessed packets. */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
@@ -203,14 +258,27 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 	void *tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
+	uint8_t do_tcp4_gro = 0, do_vxlan_tcp4_gro = 0;
 
-	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if ((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0)
 		return nb_pkts;
+	if (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4)
+		do_vxlan_tcp4_gro = 1;
+	if (gro_ctx->gro_types & RTE_GRO_TCP_IPV4)
+		do_tcp4_gro = 1;
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+		if (do_vxlan_tcp4_gro && IS_IPV4_VXLAN_TCP4_PKT(
+					pkts[i]->packet_type)) {
+			tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
+			if (gro_vxlan_tcp4_reassemble(pkts[i], tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (do_tcp4_gro && IS_IPV4_TCP_PKT(
+					pkts[i]->packet_type)) {
 			tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
 			if (gro_tcp4_reassemble(pkts[i], tbl,
 						current_time) < 0)
@@ -236,18 +304,26 @@ rte_gro_timeout_flush(void *ctx,
 {
 	struct gro_ctx *gro_ctx = ctx;
 	uint64_t flush_timestamp;
+	uint16_t num = 0;
 
 	gro_types = gro_types & gro_ctx->gro_types;
 	flush_timestamp = rte_rdtsc() - timeout_cycles;
 
-	if (gro_types & RTE_GRO_TCP_IPV4) {
-		return gro_tcp4_tbl_timeout_flush(
+	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
+				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
+				flush_timestamp, out, max_nb_out);
+	}
+
+	max_nb_out -= num;
+	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
+		num += gro_tcp4_tbl_timeout_flush(
 				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
 				flush_timestamp,
-				out, max_nb_out);
+				&out[num], max_nb_out);
 	}
 
-	return 0;
+	return num;
 }
 
 uint64_t
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index d57e0c5..d50518b 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -57,6 +57,9 @@ extern "C" {
 #define RTE_GRO_TCP_IPV4_INDEX 0
 #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)
 /**< TCP/IPv4 GRO flag */
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
+/**< VxLAN GRO flag. */
 
 /**
  * A structure which is used to create GRO context objects or tell
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 0/2] Support VxLAN GRO
  2017-11-25  3:17 [PATCH 0/2] Support VxLAN GRO Jiayu Hu
  2017-11-25  3:17 ` [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup Jiayu Hu
  2017-11-25  3:17 ` [PATCH 2/2] gro: support VxLAN GRO Jiayu Hu
@ 2017-12-14  2:49 ` Jiayu Hu
  2017-12-14  2:49   ` [PATCH v2 1/2] gro: code cleanup Jiayu Hu
                     ` (2 more replies)
  2 siblings, 3 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-12-14  2:49 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, jianfeng.tan, junjie.j.chen, stephen,
	john.mcnamara, matvejchikov, Jiayu Hu

VxLAN is one of the most widely used tunneled protocols. Providing GRO
support for VxLAN-encapsulated packets can benefit many per-packet based
applications, like Open vSwitch.

This patchset is to support VxLAN GRO. The first patch cleans up current
gro codes for the sake of supporting tunneled GRO. The second patch
supports GRO on the VxLAN packets which have an outer IPv4 header and an
inner TCP/IPv4 packet.

Change log
===========
v2:
- comply RFC 6848 to process IP ID fields. Specifically, we require the
  IP ID fields of neighbor packets whose DF bit is 0 to be increased by
  1. We don't check IP ID for the packets whose DF bit is 1.
  Additionally, packets whose DF bits are different cannot be merged.
- update the programmer guide and function comments

Jiayu Hu (2):
  gro: code cleanup
  gro: support VxLAN GRO

 .../prog_guide/generic_receive_offload_lib.rst     | 269 ++++++-----
 doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231 bytes
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_tcp4.c                          | 324 +++++--------
 lib/librte_gro/gro_tcp4.h                          | 251 +++++++---
 lib/librte_gro/gro_vxlan_tcp4.c                    | 512 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 181 ++++++++
 lib/librte_gro/rte_gro.c                           | 181 +++++---
 lib/librte_gro/rte_gro.h                           |  93 ++--
 9 files changed, 1316 insertions(+), 496 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.png
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

-- 
2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v2 1/2] gro: code cleanup
  2017-12-14  2:49 ` [PATCH v2 0/2] Support " Jiayu Hu
@ 2017-12-14  2:49   ` Jiayu Hu
  2017-12-14  2:49   ` [PATCH v2 2/2] gro: support VxLAN GRO Jiayu Hu
  2017-12-22  7:25   ` [PATCH v3 0/2] Support " Jiayu Hu
  2 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-12-14  2:49 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, jianfeng.tan, junjie.j.chen, stephen,
	john.mcnamara, matvejchikov, Jiayu Hu

This patch updates gro codes as follows:
- extract common internal functions for supporting tunneled gro
- rename internal functions and variants for better understanding
- update the programmer guide and function comments
- comply RFC 6864 to process IP ID fields

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     | 246 +++++++++-------
 doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231 bytes
 lib/librte_gro/gro_tcp4.c                          | 324 +++++++--------------
 lib/librte_gro/gro_tcp4.h                          | 251 +++++++++++-----
 lib/librte_gro/rte_gro.c                           |  73 +++--
 lib/librte_gro/rte_gro.h                           |  90 +++---
 6 files changed, 501 insertions(+), 483 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.png

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index 22e50ec..f07d8f6 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -32,128 +32,162 @@ Generic Receive Offload Library
 ===============================
 
 Generic Receive Offload (GRO) is a widely used SW-based offloading
-technique to reduce per-packet processing overhead. It gains performance
-by reassembling small packets into large ones. To enable more flexibility
-to applications, DPDK implements GRO as a standalone library. Applications
-explicitly use the GRO library to merge small packets into large ones.
-
-The GRO library assumes all input packets have correct checksums. In
-addition, the GRO library doesn't re-calculate checksums for merged
-packets. If input packets are IP fragmented, the GRO library assumes
-they are complete packets (i.e. with L4 headers).
-
-Currently, the GRO library implements TCP/IPv4 packet reassembly.
-
-Reassembly Modes
-----------------
-
-The GRO library provides two reassembly modes: lightweight and
-heavyweight mode. If applications want to merge packets in a simple way,
-they can use the lightweight mode API. If applications want more
-fine-grained controls, they can choose the heavyweight mode API.
-
-Lightweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble_burst()`` function is used for reassembly in
-lightweight mode. It tries to merge N input packets at a time, where
-N should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
-
-In each invocation, ``rte_gro_reassemble_burst()`` allocates temporary
-reassembly tables for the desired GRO types. Note that the reassembly
-table is a table structure used to reassemble packets and different GRO
-types (e.g. TCP/IPv4 GRO and TCP/IPv6 GRO) have different reassembly table
-structures. The ``rte_gro_reassemble_burst()`` function uses the reassembly
-tables to merge the N input packets.
-
-For applications, performing GRO in lightweight mode is simple. They
-just need to invoke ``rte_gro_reassemble_burst()``. Applications can get
-GROed packets as soon as ``rte_gro_reassemble_burst()`` returns.
-
-Heavyweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble()`` function is used for reassembly in heavyweight
-mode. Compared with the lightweight mode, performing GRO in heavyweight mode
-is relatively complicated.
-
-Before performing GRO, applications need to create a GRO context object
-by calling ``rte_gro_ctx_create()``. A GRO context object holds the
-reassembly tables of desired GRO types. Note that all update/lookup
-operations on the context object are not thread safe. So if different
-processes or threads want to access the same context object simultaneously,
-some external syncing mechanisms must be used.
-
-Once the GRO context is created, applications can then use the
-``rte_gro_reassemble()`` function to merge packets. In each invocation,
-``rte_gro_reassemble()`` tries to merge input packets with the packets
-in the reassembly tables. If an input packet is an unsupported GRO type,
-or other errors happen (e.g. SYN bit is set), ``rte_gro_reassemble()``
-returns the packet to applications. Otherwise, the input packet is either
-merged or inserted into a reassembly table.
-
-When applications want to get GRO processed packets, they need to use
-``rte_gro_timeout_flush()`` to flush them from the tables manually.
+technique to reduce per-packet processing overheads. By reassembling
+small packets into larger ones, GRO enables applications to process
+fewer large packets directly, thus reducing the number of packets to
+be processed. To benefit DPDK-based applications, like Open vSwitch,
+DPDK also provides own GRO implementation. In DPDK, GRO is implemented
+as a standalone library. Applications explicitly use the GRO library to
+reassemble packets.
+
+Overview
+--------
+
+In the GRO library, there are many GRO types which are defined by packet
+types. One GRO type is in charge of process one kind of packets. For
+example, TCP/IPv4 GRO processes TCP/IPv4 packets.
+
+Each GRO type has a reassembly function, which defines own algorithm and
+table structure to reassemble packets. We assign input packets to the
+corresponding GRO functions by MBUF->packet_type.
+
+The GRO library doesn't check if input packets have correct checksums and
+doesn't re-calculate checksums for merged packets. The GRO library
+assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
+fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
+6864 to process the IPv4 ID field.
 
-TCP/IPv4 GRO
-------------
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+
+Two Sets of API
+---------------
+
+For different usage scenarios, the GRO library provides two sets of API.
+The one is called the lightweight mode API, which enables applications to
+merge a small number of packets rapidly; the other is called the
+heavyweight mode API, which provides fine-grained controls to
+applications and supports to merge a large number of packets.
+
+Lightweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+The lightweight mode only has one function ``rte_gro_reassemble_burst()``,
+which process N packets at a time. Using the lightweight mode API to
+merge packets is very simple. Calling ``rte_gro_reassemble_burst()`` is
+enough. The GROed packets are returned to applications as soon as it
+finishes.
+
+In ``rte_gro_reassemble_burst()``, table structures of different GRO
+types are allocated in the stack. This design simplifies applications'
+operations. However, limited by the stack size, the maximum number of
+packets that ``rte_gro_reassemble_burst()`` can process in an invocation
+should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
+
+Heavyweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+Compared with the lightweight mode, using the heavyweight mode API is
+relatively complex. Firstly, applications need to create a GRO context
+by ``rte_gro_ctx_create()``. ``rte_gro_ctx_create()`` allocates tables
+structures in the heap and stores their pointers in the GRO context.
+Secondly, applications use ``rte_gro_reassemble()`` to merge packets.
+If input packets have invalid parameters, ``rte_gro_reassemble()``
+returns them to applications. For example, packets of unsupported GRO
+types or TCP SYN packets are returned. Otherwise, the input packets are
+either merged with the existed packets in the tables or inserted into the
+tables. Finally, applications use ``rte_gro_timeout_flush()`` to flush
+packets from the tables, when they want to get the GROed packets.
+
+Note that all update/lookup operations on the GRO context are not thread
+safe. So if different processes or threads want to access the same
+context object simultaneously, some external syncing mechanisms must be
+used.
+
+Reassembly Algorithm
+--------------------
+
+The reassembly algorithm is used for reassembling packets. In the GRO
+library, different GRO types can use different algorithms. In this
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
 
-TCP/IPv4 GRO supports merging small TCP/IPv4 packets into large ones,
-using a table structure called the TCP/IPv4 reassembly table.
+Challenges
+~~~~~~~~~~
 
-TCP/IPv4 Reassembly Table
-~~~~~~~~~~~~~~~~~~~~~~~~~
+The reassembly algorithm determines the efficiency of GRO. There are two
+challenges in the algorithm design:
 
-A TCP/IPv4 reassembly table includes a "key" array and an "item" array.
-The key array keeps the criteria to merge packets and the item array
-keeps the packet information.
+- a high cost algorithm/implementation would cause packet dropping in a
+  high speed network.
 
-Each key in the key array points to an item group, which consists of
-packets which have the same criteria values but can't be merged. A key
-in the key array includes two parts:
+- packet reordering makes it hard to merge packets. For example, Linux
+  GRO fails to merge packets when encounters packet reordering.
 
-* ``criteria``: the criteria to merge packets. If two packets can be
-  merged, they must have the same criteria values.
+The above two challenges require our algorithm is:
 
-* ``start_index``: the item array index of the first packet in the item
-  group.
+- lightweight enough to scale fast networking speed
 
-Each element in the item array keeps the information of a packet. An item
-in the item array mainly includes three parts:
+- capable of handling packet reordering
 
-* ``firstseg``: the mbuf address of the first segment of the packet.
+In DPDK GRO, we use a key-based algorithm to address the two challenges.
 
-* ``lastseg``: the mbuf address of the last segment of the packet.
+Key-based Reassembly Algorithm
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:numref:`figure_gro-key-algorithm` illustrates the procedure of the
+key-based algorithm. Packets are classified into "flows" by some header
+fields (we call them as "key"). To process an input packet, the algorithm
+searches for a matched "flow" (i.e., the same value of key) for the
+packet first, then checks all packets in the "flow" and tries to find a
+"neighbor" for it. If find a "neighbor", merge the two packets together.
+If can't find a "neighbor", store the packet into its "flow". If can't
+find a matched "flow", insert a new "flow" and store the packet into the
+"flow".
+
+.. note::
+        Packets in the same "flow" that can't merge are always caused
+        by packet reordering.
+
+The key-based algorithm has two characters:
+
+- classifying packets into "flows" to accelerate packet aggregation is
+  simple (address challenge 1).
+
+- storing out-of-order packets makes it possible to merge later (address
+  challenge 2).
+
+.. _figure_gro-key-algorithm:
+
+.. figure:: img/gro-key-algorithm.*
+   :align: center
+
+   Key-based Reassembly Algorithm
+
+TCP/IPv4 GRO
+------------
 
-* ``next_pkt_index``: the item array index of the next packet in the same
-  item group. TCP/IPv4 GRO uses ``next_pkt_index`` to chain the packets
-  that have the same criteria value but can't be merged together.
+The table structure used by TCP/IPv4 GRO contains two arrays: flow array
+and item array. The flow array keeps flow information, and the item array
+keeps packet information.
 
-Procedure to Reassemble a Packet
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Header fields used to define a TCP/IPv4 flow include:
 
-To reassemble an incoming packet needs three steps:
+- source and destination: Ethernet and IP address, TCP port
 
-#. Check if the packet should be processed. Packets with one of the
-   following properties aren't processed and are returned immediately:
+- TCP acknowledge number
 
-   * FIN, SYN, RST, URG, PSH, ECE or CWR bit is set.
+TCP/IPv4 packets whose FIN, SYN, RST, URG, PSH, ECE or CWR bit is set
+won't be processed.
 
-   * L4 payload length is 0.
+Header fields deciding if two packets are neighbors include:
 
-#.  Traverse the key array to find a key which has the same criteria
-    value with the incoming packet. If found, go to the next step.
-    Otherwise, insert a new key and a new item for the packet.
+- TCP sequence number
 
-#. Locate the first packet in the item group via ``start_index``. Then
-   traverse all packets in the item group via ``next_pkt_index``. If a
-   packet is found which can be merged with the incoming one, merge them
-   together. If one isn't found, insert the packet into this item group.
-   Note that to merge two packets is to link them together via mbuf's
-   ``next`` field.
+- IP ID. The IP ID fields of the packets, whose DF bit is 0, should be
+  increased by 1.
 
-When packets are flushed from the reassembly table, TCP/IPv4 GRO updates
-packet header fields for the merged packets. Note that before reassembling
-the packet, TCP/IPv4 GRO doesn't check if the checksums of packets are
-correct. Also, TCP/IPv4 GRO doesn't re-calculate checksums for merged
-packets.
+.. note::
+        We comply RFC 6864 to process the IP ID field. Specifically,
+        we only check IP ID fields for the packets whose DF bit is 0.
+        For the packets whose DF bit is 1, we don't check IP ID fields.
+        Additionally, packets whose values of DF bit are different can't
+        be merged.
diff --git a/doc/guides/prog_guide/img/gro-key-algorithm.png b/doc/guides/prog_guide/img/gro-key-algorithm.png
new file mode 100644
index 0000000000000000000000000000000000000000..89cf427b3c7c406d7ddfb485d3cc5122b2206c1f
GIT binary patch
literal 28231
zcmd?Qc|4n2`~RD^yTjJ%q@_x!c9)i-F;k)~rGufZA&SsSwWd%*43R{42Q_QADn%)3
zN=mjCF-8XxLy0M-ASDP%QA2`6_}yvu^L?J*bAIQ%&iVWN(N`L|?{%;1TG#bi*IMg+
zM_;ot7Z*D$1_FV^FJCgf0RnC2f<T*aJGKFTi4CaH06sPa-Y~xaDruLU2EP1u@4V%C
z5U4b9=i2S9!1v$pU%D9x0__hH{o6EhS3VR3LJVCtJ#QQ8GE?VQsLBc(IpS6qKP21t
zSMeX8B`5j~w2tXDgHMi@75Z84FTcVh#b-=iBwv<Mvh&7YK1|<XK6k#@G?}d5bN_<H
z-yaon{D1tZ74P*}_ThfhYZrE^UTTO$@4c`oVsF{8huHb;7zY9&4|#-9jWpK$(3@7i
zoc4;b)tKk9Xt=nRes8vF^3{NMM5~y><GmMlC}?@!X72+&AMZ`8Ji`@zUf5yjoQnGS
z<6}iN3!|SuUXYN=m;3o+cJxl0MA4m(I~tx8ng6`QW9)O!@n6mP|KBucu%yaZP{r5G
zR(mf_kLU5Xidwjqa)0Lz<c^>1WuMwXd~ogQuRf$b{D1ht<A9Oi`v>pEu+@bL@2t7{
zjdfZvaRk+t%3i*jqE_sJo^M2ELh2S;R5%O1^+KMv`%ow|9v?ne7lP!nX~;}xzpE_-
zVOf3ajVdqdBGtIQnS1w?YikeQf>>l{W=CiV|6WSb(H}X@Z4A>a2z&G=U~V?^V7^>@
zjz@|6ix1Lm{V#}rB+16#f3j=V#@eWSz0RF&XH{G3k*x#0;pqO>y(Kwc<`G-*qV6Aq
z>b=i9NwhZ5FZa>KY!c0$Th>8o?oIAixuaZmow!$PzqyGY)3?ZQ=3eeBU)d(BwG*pB
zf7w-@wOEL_IilNwxi(x{Td23YdQ%kh&AJD7WV%@6kBNOheGmJr`sVr_8}&OLH7=#s
zU1X+fZ>T=<{xZUiiCGxXi}hhYeR;g<bn<iY3SMG9HYyvcj|ux@_|D0!UD&Hvr2gH+
z0ZR<`%H@}?uFa7Z<#ji-W(RuwY^+jjBQ1q{P1MVM9h9rS_F6vc`t4r>=WzhI0-l4w
z=H7Wzj)+otS*x6BS=Ty^ayw(HY~n7iy+luVGzcwc+(<hodJKEN@22=;=>sqIi|;Ux
z*daJK@FVQV+fqflax0M1sAgShdd_<l{Uf3%E4*5{RVyZ;t;d_LGf_4kV9HRgd;Bxl
zIy`|eF?-%t?qVa21H07E*DMb7)MRv{gm=@^)76Sjiv~-f=fu$F!xwcvxJ5mOEXlW^
z8qr^kUm#yqOM4Cn6<S{LE3WlZp(*{0;_Amv=W<V`@v~hgPL{d8uq^GWwihN-@vLL%
zd<QEAu4)0-W(S#(7q6z5ybq+Srbi6F32Nrz<oR`fwft{GFxTgD2j%X|jHy^KJwBpQ
zH%JMy&0N2kWJ$=}+z<hwxeo~$cVX=g8|W*-9}s2D*1N+3GVkhOJ;CMb^yYt??ZbAr
z&z(ke5@U?s?vlAHba{pHyre(Ym3KAWXyjy`yN&vE<i`nnq&b{%5%1Ev`k!W?F6Z0V
zo5S%mWbu_fLM9EjOWLzeY3|^Hv)_s2-1Hr<0#lCLmRKO6;V%dp+`o8%bVvM*R{<|k
zS8~vN)oE_RJWO7c$k}S6k{u@xyOsRZhK}2S^K*aYsX`Fyb5HK|U!$?t!00x+^W;C-
z|2p#w*W<uH0$^qRe-;r{s}KfenVlIvJAf92(%v)@aF>5q&Bjj`c&L3Ic+>d%FG;BS
z{oW%<rC$;>shA<|nDLK1q~F)^@c;Ex58wa4-4LWVJyeZeU6_;r#wlrk`?fY0_1Yhw
z+202~^!;6wNC#Y7W$F9410r%z$22f(lG!zLBthCMs&j3qf6`8cGD47*XqcwP0R@kJ
zW<U1fl`hqA@vEZL1rOKhjGy;412Qj}>s6j<htdiW2`;CqB#UaivBn+=pMwnpipoI%
zs_R{_3A!!-MA#9VEzCl(i!2lT*d>6>zf^j-_rla2(}s(&*UKHL%0_85K5TVZb(s!J
zW1djE&{AGT_p8SshCY<I)K!mIuet)d&DEJHm{BemG+%%Kp<!j8iYhzVNJH6AckTxy
zZYlhwJu^8!>!tlzx#c#}Ukfe$95aQhf`6NG!@2lgvYhI99d2<l^73?|5wCx;+e_$Z
z1;sY2thzK3%fv;IrKE0Q<d{(=>Xl=R54$EV*kZW_!yQ4taA~|w3e`ONv&Enww|p0=
zmrvzT9*BJ3nwbikxbD#3yVpUWt7BhZY2pJ}?ZxoXEqe_APKYPe@dg!dB&esOgg%$2
zb;e8<ocxXPgOPv>ei#%MZSec*N!0-pE$qp3KbP$&5GYhrcQ~k<-DxflqkJtbFD|;5
z2h6YeMUP(bl`=!e#5d{|hZ*xc8WsI*JG6coFMm@wz>4tXmMa1O%etqw1$0`hk+iW+
z&vKxAj+nPre+SQHzLh*j`td~FCud4;u(#)`O;~{oX6*MfAXynDp=qx{R8e0{(pla}
zNPQr;(R?E;%zq2$K`ZCbDuy+JRP}8sOvr;-)YS{~%M<OVtvPhR(9OF*HsjB%?-X}h
z3f0#>tG|=kcu7Qa*iZ=XPnzq-d_imMArPqcxk}c`H-lj1NbHn}{GcgKSQcB9k}x06
z4ih!-?QK?*V{RIX&`~e^>I8M=f5o_)Zvwq(%UYYH5=e_j{G2FV+vU;sC14?aCHXF0
zka^<{F(IqJce(Fs@q6Au>j#@|wa6|+ztc`D>Jy7OTx6%X%Lw#wwy8h7aeba5&oCXB
zh*A0oIwQ4v-n92{V-H7o)Of$$$6|v`AkdfV-=G$>+(D=-1}T9^QOX2u`8ae)05ld;
z)F-c6bQ|?Yi<lVbqh9Ssjo(d5?^AhfC~(~g(RI%H4H5&_0hc^^5F)z?L}nJL0K;XE
zv12OKgGUcFO56gao`oAfAEx&R68~OGsk3{u33MxD={H+2Z@M6B#B|s_3h3A0zmd_$
zYKTRBu5&mr-?0=|3IhFoHf!xiBQmZINKBpUA=08@I{ha&t7#U&<}%**9KuN5qd9;;
z{jZHH*8(|*!bd+pvFv$WFB+1oZts-dhXbMJ?W=9Nv6Aa`5(N6*zF03PLdMYq6Ib_%
z9CkU5YZ~llL!fN++Og5R2XqVVvcAA^oemXp`}}fd*F>Fn*bg@nbsoPj$LkNEjYQ`6
z8$s)9tH=m$vL6j7Tfat^W;VI8H-QplDFc|G4dJq@3X4s{<E32D{zzuLZi%SCvP}3g
zy)e-yIAgo6+)8l4{4Ki&^gtinD{<2T7^de;y&C-5(*lup@IUMlg40pU#KSv*E5<5`
ziD$THf3WJg^Rz9-+%I_f)j#V4i#NuM=1?rQQpA)1S}*i7uI1y^jhtu0cMK&M^Wz1r
zODE<9^}G*wnGs$ucbSwq4(^YvUl~&;uwn*_s%$3)o<?kR$~#fF3*U9!M(L5iD;IyA
z`>;X!+_MD~#QgbCr=yxL<Iy{%b)>)IGD<)7L?nfx6Oe{J)r|^qy7^daDCs*6Vae_P
zk6X5M#^B<r`M4~{q7=_K{HK;&JJd$V8GlBiYp99-Q;^Mur9%;=hq{lF1RKX3*BG!`
zoa=&x440dA1=po9F|&2cBi@8LTNF@Sjy!X%_f9XnV>7qnoYyAqMx8%KJvN5Cj=i|3
z>8irLIdTFe7yhd9zD{zOs^0;+55X33)^g}2=5dkyIjtpB-QB30WmP?pr#ie;RMh?3
zujLIg-Js<b<rQ}ME|Jd0Ts6{Ciwfc0;`EU<pYO@}Yhy(PnS(yC;>BsRb99ji+X;pl
zUZ?Z7<;z*gq&u!k`6VSjH}BkR!_!(g4cUJDO8TRszRzhD6htaNekcm}XW+RcSdt57
zs5gUi_nZp(sY87HEw;kvCrGSb`z#J&QCAUG4!NobGacw?9fEcdXQ%GoO6RE%3dwF>
zx7ed)rI!<3#`}y{B{&Gj;>55gM%HGYfoHfQa>Uzg-GJk`ZS9-37ll4Ot61%M?Z|xc
zkzpgj9^Yh&gvPcO!^@iOQk}y^a`lbXK@051j7hQy5|6>|R|v{cxwMkNpf8a28S?|B
zM+(F=6kW7>b}wHP-j{bySoR;l*uu~Brn(Q!RjSIf4BPodsJfPC{sLuN`*Nxp<(5qF
zYX*&npunv{zX#JQ@?98lHqChM%MB1QnNiK{bs~DzFT?<TGDDV755C5;y&);1LD7oC
z6h*QSU0Vw~8SI@-m{~>bTRwZY2Mg2GlJu`3Sk5v$$H2qpseC!KhU{m(rUQd-$~u03
z3k_QeH=Oqgb!<RA-ev0w!~E%x<dTmY?CW>tgp}7Qn@@L++f`=N5(W#GWF5w$*-I-`
zd!$2Hy70K0JBh}=v5LJaDhIu6v~}!Ua^J&8FbSWomCcnbI(<JY7x)=7Y)4PyzYNgF
z*5zB*Vl^4wcN*ry`YLxk{lu~{)R{k=mI(bSPslqqtK>EaW>!f{y+h9wm24FJNePml
zAuj8Vo7swI7-%f8)8}j{e;k&Rktry0u{2~{>qnPw&a-~+GZd#(^6V59>G@<B;Ym|4
znF~HiSZI_vk~|&UfNj)xJohrWWW6YhWYTMmN#Nf3Fe%YJJ^P_`Z*jGzPP~`-kZT<H
zE-dujSVg~_(00`yZA%ak4E-5Vi)Dj(3q1X8CWqkIL~XgibC?4)UNSZe6xXa}Vrhct
z$R)n5%{d%fbp16atY)%o@;!BHHba}Sh~3wykR{KTrhX86tyyCj7UYDqA{*ExbiwSO
zz?XD9vQ})Z3f0PCm}{B6mBFK-x64$?XTf)do-8btgL4q=N;pG<ZC7kwB?RuHO$&+9
zVI$^x?Wnae9m<f{1Xb{R-}CD?)1YDKF$rN)HpLv%Gap0$?u2`9wS|cJ50_<r%lrY)
z{(byS;qNXJ`&y(PS>lgCnsMKXG|)GYTX7E1gF|-2t``NI-=dU~ADwGJ(3(f=5}<eO
zT!&_Zqj2>(b|GZS(G<<zROG0C6y1>Vc+h^U_GqammjUn5bqzM!hAI3Co?Lcdg0~y)
zu3=xx$4*@_%3p88%m+6nc%U`X7-6pa;J4o%{7_bhm+O?$T<fkx_&eRDFLtp4cXH2T
zJmbBk!oIq#HJ6cL{wj$k7Y_>VD8F_o-c2DvDW!fJm}szWE=0w8HeAhwDM7zE3Qt`&
z;?3)8`3fL|esg|J@r8%=kcOX}{`7?gy~jISepEKvFMh-IWAQ_l4sz8>f4gS+d8KR*
zX?&FR>Ft}CH9lM=U%$}@GSDp97d3Hr0Rty&nHuC&?kHT6*!W-&sqj5LW!5PfLIzL4
z->n+;hJM|Y_@%_)MbYwEF3c5q>UHl~ZkkNym$ZAyod#O8Dmh~KItLvcyTyPt#Phio
zG~y9%5sPVK#kK@sX%f_Xjc4Lx#Uqe%RmB&OD+X_Z8_=-QdpmRxUa>7veKOor`Qbxj
z>zGQ6Ni(x>9ToTV8Ra~s#Z!Nqz<?7(ljz12gZ+KT7liRhQ?zA#;Q4Wl3h69ELLvse
zJDyHT<k1B(B9XE>V*LZQ#dc@b0TXD*ktoNR`(gT^#<Farz6TjWI=V}{L?>bQX0otJ
zDPU)x%v>eRtPd7y7t%0e^PoxU1XyIh!I%z}DEm$Wv-sYd6z9K--ygi@ICI3?aPk3P
zDVUaRQP;CPql?YLrt{85XiJ*Rhh;`b^8P4n=T^eXCQIUD7RWQ1i(xfmuPPcVCEKp`
zg%Qv?cUFgxPEO$Nvi1puQ1Cg#l*XOTm1t$_okaC986$3M>Cm`$=GZN(M0g$p*?nMf
z%0b^qE;i$(;Dp<NoP4L0#PW#>1^HGzzl#>F%-q`9NBj~9c|A7tU4aJV>E{`SxnM>p
zUbW9|OM#Wjo7q_F80F#|YFTA%=;lKR#uJB^TSj%HOeUzzURt}@>u*)E#5`@u9NgKl
zV7=6PFDiDL2u=(|R<=>9@H3xR9pN3?W+fq6@D6htQO!{({V&rMEbHnET=4U><sJ?_
zU@7pQ>Ek41j|Tng1lZ&Co{^EhOs|Nn6vwunIj}1H!v|`TIWc=~lBB|0x~(Y{?-=Xx
z4Ywp2W;KpQIa?a)=ojM?JNsD%OI24E51G4FXIVs*q<;WMA)16=XO?&+rLj{hWVDTJ
zmdPk@E?3A~kNi2+eU!-ap19>?sE;p;fpm{8DCg$S)a-2yIQp)|FMDnE?X1_mCyz`|
zQxMZ)g?7VasYmwJ*Z68anh?3zN=t#a6O{3Iv7G<ETPgP+R*pwlod}$(Xc_lT9`U4M
zRjcB>PDhR{9u4eTe2avHhBMqQdq)sryzJg{(7bh%ukZgZilGCWGAg{F(ZJBA_@~#`
zhuR?v8HMBqYn8|RnHj8Cm1_0P=!qJFl}#MC&6X^<d;A^7?G;>t(?aPA{1h54msO%K
z9WOXQVO^^f3OwrT>J@ZzyuQia-`lU)zf_8NR=y9h^-2o#wW(Dro%kuSE^?b!)-CW+
zvBZgv8jtPKp}Mmg)q^)^s=$=H<76F25fKOG<l?@3dgqt%n!wJTI()ofCU6)3K7;p~
z0gtE1UfXE6F}`jO2fsjP>te`3K92JZbbRFz&l&X`{Tn29Mj*eUAy^#Y4gEgtHdQ4z
zeK<wZH>+r&(m9(^XEawMUW$GWO=fzlEF1<xm4$GdFR4^}o_JYSesso!+`BX(lBWZ+
zUw}34^3mf{zyGm#eZn&vgs)kb^+32q4xMb^FE+bZZ!WRUB;6W%GWQoPK_TQL<dfUE
z1Ei+i7)F~gM(Xx*fhbI!JZc5JgTNdsKkGE4v-EiqZ6|-R?@&CDn}@uAO-MqCtIq{a
z@r^njzM&I#i#}6r-&1W_s6JlYVv)LCb;a4lCd?8K5zXb6e+p{MRI{C}5QrbCkD}kh
z8Ty~*)(}}3qQXmczJ=GjtB4<NFZ80*Z}gp507o;A*J7Ib(@3=x2#{!)exPR9r1x!y
zbgu&0UZPb(m7(voVee?V>htU(q$<w(z4EteHCR92MWL%XC#k^T)KEeRPb_(qM}FWj
z)JqC*cGby(ymEpo;HojZX!VtKZtuXH&&<6<@w&hDUA0L$esMftHi^@&ryNey{|0u9
z84^{2Ky?#>pz38!h9&#Hm|awxJmP3AWkT<e3U^$Qm3zPU)=-~v@e`3Tv-pm=5jg@P
zXKt|LiByXQyZhOai3H~>MI#~cM=)#)lUGu;pNXXvz~zCY!hSz)Av_m!!TWRQ*p$_c
zBztm0SeY0O?IOKj)rNV{COgSg>2Yh<<MNw982*6R3(TgzPnRYN<5tGcA&Y!dvJJt*
zg^1Y{EbeQ62<?dF8!TX^ZI&RZwUbJ!&^t*aUl^~pE1|qs=`bZ9`k+?pH<VxD`7{0O
zm@0g-2LpcQx0zhwa69bR<k@3fdZ4PF=3o)ecAgu|^d~8lUS+*nAP17?Z#Au}Mpk@6
za$kQqn6SlUEIw;njB?!*dS5v~puAK=RPk-Z)cBrzK^&BuM`|2%9SurmBx5hRy<T-7
z&+i*4S&<`BN@E+mA9;kmh0FcRc1aJ7$=`?Fn%Kpg-)_0NlLp<&j-vM*ypOn1b1l7Q
zGtO#|w5~F&M8pzWt{<WOF5_A5-gMDqy43geROxrgs;^-3yC)Q|Z})t6MG5J5mUsNz
zGw>km&@OkzKT~GkuO!pP1=e|olUKE<b(So5_efme<#`8IcKb5q!*u!#dImRm4g3eM
z><7uq8#>hNpV!j;85uOgA&Cj)y2r#mJS&Yu*wKA-%KiF6?fOze(YPhd*t|}I!D49~
z^SGNMsaKgme-~B=nee{gZ6&w3A}h8Gm%q&pIj4kE^vfI)i!{jA9>58ewK|bqrjPP2
z%eP*~433Ds@HK~m_8BM~co6^_bVU}a(y-DKb^>(3Bo8Ntn5*%&UpI{8zq3oVcanVp
zM6GiIw(qiB>s*d<;5%?@CBxr|;jh!qg4~Uadp<C6$ma`WG5p<p>K3`0ye45Wu(J3k
z2R$>x)7$dekINj%JF6%d(%`?Fxze|Kpe>P?5*=dHlGJvc7AclojK;8hc5(}7RzNJ3
z3l1;OeVNg;lUuf%T29Rmm4x4vZ=uKJ4rVS+8qUI1gx4B^C*aZ)^M*16MTy$o1Wu%8
z-*@Hh%a=oyUZ!p;c>;~SvkM;t>fcQn_@_Jlzlje6QRPus^VLShiryd6i86qeXS=!D
z5U>-pA8~ORe2t*BV}dkPwhq})^@VxZ;`%;DLsZK*a`IeKZDzGqX?&3sJMcU7N#gAh
z-N1Luz>`BMz@qQzdA$zaR!(T?yx4xw_ou)r{in8S`#g;FrRurr^u(0MP+W>%D9puD
zYan4h<qGibp}qu&ZQ?GNOLTJdBGP?o<>C;;X2QSfLOnF>F<FJ!Y0VLkn?pBQxZA?#
zwrY18=sf+1j6t7fu0*9udLA)J*6opz<%ovb9T@7{dkw<S0qnQE&63u~if5<pATu$K
z3_xg!e~jM@x|I;Oj|JwQIV2rYY`T(4{MfN2E|ywc#F<EO9a-yD{b$XuVU|JZ`O-U8
znrqKlglil19^pHPgCd!WK5T=K*&+6E`PC-&6DgN|pP~t1{@4UMV@{4*Ohthn9KRQL
zD`J;HIPZJ4d><?XR#3&N-VC}5kJC#m+78kSAn)K&j;5o!HI}UiWKn<gW@nE7+<@pl
z_?-akG3t<EXkdTJ5e966`YrL};#NzV8fw&^`^?tW{l|U;2xM*r{o6QVeXV@yRBn3Y
zz09z?!%BYm|4W<Y)xs5KVn_}REj@|34pXmN>S`YfXieX^eM4*UWw&UTC-rzjxL}9P
z9?<>qxP9Lf&!e_dFr5;S#%o`Uhu+p+Z?}ox3i>GPKSVeGw}pQdrM!w5i&oNc^74mm
zdXNVed29<vRv-SG0DcYImVcr|1@yPpPp_QpG)`SGPC<Puz*c_MJRp2}lbY0>p%Sqq
zvs|Bjgzr4Txd8vUSK`;tgSw(Zg;e5_rmqw`d`-R9c-4eazrGz-rE0UPUdu0ldl9hY
znOTY&Tuc~>PC53U17>o0&98Kf8o^<@lN7!-_O!-%*ZYh_2$PKG6_Zqg;0HLGI4PSw
z^l$U9pHm75G}&~>U(ROFiB!IBx5nI{{i_85cI~qaJC<uT+{5Bk%ct_c{OjucD9O!1
z?c)W(t3|F2thB<h_Kf?+cr8jxQA1PJ{Xym|p9NVy4%s70zAK@^$+k*){id=w-`bNq
z2|qaZd|O|rIaJ>)8pLCL&gP%lMNRrAAE(fLoUmCNGnNgOujICtT2l(_$pX!uYHZT~
zuV!~AJbcJ%_H0k~c}vuOGi$zEgS0aFO-jAxSpGzX#l*uCi`nUQhQWZc_{+pSWi|hk
zfB~|F&!*=F)hQWOqgZ>-gk+oO=YgYTlwmFvGi#2P6HS1>J$t@@mzVR)rVhFkFO1U^
zq)XPs;vtkipRp$=`VKBj7>q8h^iQMSy*pUo@YM2ttV3qUSq@718NC>_G=7vtXqxF-
z_kwaMMSSPv&d<W;^JMtJN3QRba+ii)+J9RrrNVO{<%9C_^F(qRrB3-v#MxU*1D-8e
z0`(qpx7hmCj}*640TKAp)cvc@U16LVNC8sKp5+<$iKefsG(=TyEB$LF;C%+KvSz)C
zDvY8&0u;wM;LcyUm+>7Jvo-Dw>P~iF{9^=kC-+dBQ|+4_ep1E8AZ^mQQ-^G)6;nKO
z2QJLSBfd^Oyg73(0Hu8wzK_#<a(Gj#?VmPtbY6UPf(@xC=~Hku`E-7C=$enlF^u3#
zGn4txR2+@Yq=ZZ%N7!p^v8syXZ&flf%O8Mp-HmPivRf{s&v>~?em?APV4u+3!sYUu
ziPCws<A=3Imzi8mU#iN*`G?19VNyB$ip4RUx5V4M_tL*+=ac3X6>_RSL?5g&HYp@~
zpT_ayx{my9u{)!Bg|ei!(B|lJ<g4z5)&9wO+4o(0lwuPpzlQS!U(#NAVO6?lS3(P&
z=jjq>tZW1R%w?}pS7GM9=@9R5*DU4gl0BtazIgcCK^#fns)_*H2{8*>=)RSlD|FI@
zn%s9a9EnM<mLz-Uv4`%y3owQ|!K_P&IdlExWYxrQvzcosZ!0L*%TP<rWaFG+w^yt4
zedFbQ`Rh^5=N%;sG9MU#12Wm;ElQVlJ{mOhdGo<pQ;L}x#c!rQ!rjvN5?it!zTBK6
z$j;)+0*Aw;aYKp|@$M&$5|XCH9>e!i{wB169VWn?bMtO*{CmN%0+o-bh<%)PcZ%?F
zOE5}DfrfsVM4`Zc1;pXb7r$d>HJ<77%mQSuuf><<To8;e>XGI1C_0l4ITe$TJ3WyI
zrYA|w)XjL<egf*`D5UX9{M-7T#~e~2e!_IlPZ?ly2(K?7ih1!<2D2z9xba)+6)36J
zPj<Ny7T6r=VvP50c&0BQ1v(C65;^m)#AaDQ(Jp^VY`2_Kr-3K(MgvgD>L(W<-Mj_Q
zBwFd19GMw{M6vQ0y&tKml743#5&OjUm&Sz#GC^=$>5Zy^ej+*O^w=SC?29Jmp+|P4
zX|Hlk8G)YSe9oKI(V10;@g=>Nt7E$ho+kdv(o%7Q5cG(5CPk({=~x!$LTL%oCFE7w
zA@c?8oSpIK<BILlEnEscVP};cG3+k_sHZP}?DgQW1B@a0^3KvT<oB0kRVLywwnII&
z_l-lq2?==rl13eN0eh*`4yFp1hZEiIe=T6xrO?@YoIiD<0kU&Of?2n~3PL0@_Yrc7
zIVR38Zj2c2=6}~%K!V5kH49(GxloZbJpU_x_}YQLaSLwOVr@U28{ZjAgsRkCQ1ah>
zSW%`qbuua3X{_<)rXISJPzu}>ZO@<_OSrw;Vz-0hwst*5*d|r^SUp9t%-!!kW8&cl
z=bs#(?Ka4~zp#>e`r@n~uzAp#5v>2bJGD=rm+JHO7;OIdVWn&aa-D8iG^bOonD)19
zL|Hv=eqE7#Y~p&xjbW=khe^Y#IEBy+Wz`BhWO+-7PpiKX`E$v<J!H9Y{{G4N!v+4c
zhqoM2ny?8Egivpm>n<P9LC+>Ng#nPki#}Zi{~F40U%nnh)kNLUuV!IKo{DR&??vZ`
zKJELyh4pSgA(8mvPzN!a^IOw-QY(nXhdb&_KWCe^ufiRNNaDM-s^Th*m`88(9}bfm
zz+WjRi0&!tQ6;sz(=OCgWX^6CWXQ!oeTY82(@IJ{<(JBzG>~_;daGh4_ZK1_16}K1
z_H;=wR#u;rVLtJoDSVKU+F2bJxO2`aud$+H@TkwrKv`hr&v`w6;WF|F=W}4O2KyjH
zZJ?u~0@`00Yp6zl(fyfOLIUUo-|i0oYoy+CGB>Mt6?OPOyN}#Fel|RJ#%|p?!Ybs=
zyQcF-uFgLV0N*E8+#72=9|Su}KALbFqE@9^vt%ALF{3$ODt;w#rdVy^K5C`%U&FKd
zE#O%3p|Jf^8ThgL?vHE6%$<+Sv8_2G6YPX=W7XNu0(&4H`ObMVtXBUHW`A|ByQ~tv
zOfu8k#Imf-4-!0>zq*fz93>Ua`PWpGe_gm{n3=ZSWLI5(u&mY`j<>Uqp<U04<f{O4
z=lZ4SZ9a@7n}*nPrgZ#SukAaNx}T^WDt)cEoq_g`_hhFNm$$}Amd2HQoCCSx&S@z6
z?+UyX@KIy_!hu9P=s9oL-^cxZj*&ic^S=0hW4`~nmfF=wJ{5SFz%cD8kxg_T50IUW
zk@si#xM|vr$VT8C4sjTfC8;!#876J77boWL572MKU*74;tQas~#WSnhR_wSHzK1Fr
znVylSmkQe}sn>A&-}%cklovljf8~g{2d5n>^^iny4r@G-6-sMPTO41CvLe;;4|E?1
z#g(CLIy927KF-fu3peA*CuRDT8L2?<{CR5o*F!xE^IqlMi4K<9W>u&6`JuH4)m@`{
zKT2=@EK@4Zvl7J1+Bdu|^0lrRwvZvjVijzIP}$@_(Zb}4O=Bur{LmF&&fZ$T<jD#i
zw|ljk>?MN(dyaY!=?G3n(0UPM0*<J?n?4rs_1Q&Tn)I^t6{HV^{ja|7OR_9fB*mwz
ziOnaz2>NW7ua=dH)k+h;?NZmGbk9Pfs>I_RZ8mlCqfK&NfLogH?jZlg_?Ry4P5P6T
z7G1ENv<AMt0Nf;XEqkUkqcj?xy2S>f1}-cDs;*PZ=DLv8mGaHLax(kcOp-^c|9ss2
zg>v_ou<u;Gd_hMCNO<yx49R8|kvbz9jQ)e&6f)P=)7M)rD-IZoA9`d2oasH(kxuUQ
zs;)RSLP5Y*44x%sRm_BcHOzE86a&d^@iyE|Ll3^}ozKu@6GR&n$IX<mS|ONm2`j}j
z=_WE?O0-7LdAPlTd^&h7Xn(AO{caMZ)6zmh>@m15QThI)LF5^;Z`1$Mt;7Kxcn))4
zB21CdF<!g6EgOGY?+DuGrKsABEy@mrZBsFR(i1j`jgj3_Jw_o}F9xg?@u=WJ<Oc%w
zyNsr@IWQ8F^UZp$aw@Jej&QAQCGf)MJpX3+;GoMsKe0fDVF>T$n`mYUlT(3&`9;d%
z70MD=8P_8kMYE2+f&amihV802s7Js#suIciu!ObT_N~krvHkwx`39p$eppy&RzGNv
zc_fwxjU2Hhq?x|2YGZ<RM48Y6cinp4$Y@$dBAFg?KA!T<(K9;Y8eTuCdvs}j{#Qih
zo5pPdW2ZCXRYs4cua~Tp&UM$yMmUz9aScCEfQG-Rjx<!2d)P|o8tR4QPNBoz^;pFx
z4i5A|j>vQMjjLvlyD)KwWaj;$=>gvpD0v*m0&D~4CIqWyj$Gn4${lCn$-{`b!&l?$
z*=>lH%8_1wq;NH~UHaF2@>@qV;lYtdwH?tkv9|+gXuj*6<L`uJMnIL>*_@Ape*`9Q
zrF*+VtKLQNvw01-PV`77Ki_ie$zXo$tA{Z>nl+|3c>C|UR6A+-KFXQx`9m*z`-`Yo
zV&P*D@wy*{yXNTZ4~Y{MQkr_w=(S&CcIr9YHC9&M6uP!o15+BOcEH$eP4@6k_gdMj
zPdn~;I017>8v@aIiHU~6^GJmmp<D7e31gWQr!Y7EUUUX5a;%8GRDKgK&)HjSuyD-6
zP-oB_xth9i54kp04x6yehtXdvF-*(I!!zFTNflMFD&gU$Bvd?ykic@DPX!c0bi}M_
z_IK7_lR+@)h2Ox3eI>+iwe~j^4xB6RAXTAd6eZ%JW{Rq6-^x5xWyL?a0|~%^<h$&u
z`u&En)7_e#>8NrA^+NZc+G?`XQnG^9Na+Zy^?JB6d{}!{?&^$_>Teg}j2t_{^x4e7
z2KWb{6a|)u?T0>LucXnD7q?x+Cv{G(Oy6~%YTRq1URK{SFzJ~op5*!JLaxD`z3}wM
zY}<NAg4?df2|G{Nq=rVrZ@AZ9esU@^#~%$CNMh~;&Y*Aw116lpmX5i>NmM}HLFEGO
zZ_bo5S|-UKTFnY{Z#Pn3%;>y_m`m3zspo{YizjwM1_jFglV4vUkCS-g$K8(u*nu&y
z0JixH*rwe2u@v0kyk!YkI5L2%W{0Yf$JYgZ5jj4hGyZlc0aBSzs>?txDze*MNtJfY
zP4|6nv2X8^UFLO;<gbr4#&XQoMhMuqzpl4|LaGx=S=20s>F_ECfQ&d0PGtUJi3jk_
zHhTmM3n}t2szPZZPlqeNvBF^_^9hiZ<(A0J0HA?pX9=4O$Rhypm0ox@VtShyX1$w&
zEtUP}W{qBnfjUd!2P&k-x2dL{Ipc03;wMNfBV7XgyM(o2X>|3Xl=Xw#SAhPxl9pZx
zdILQeBKCpb=Ms)Uc3bN1d$0*)Mkb6*?fjjz&L#Xe*>E)x!spkVV}#7Y+=0!6TjA^L
ztIH*Dj)>Zcu4vu+KcvI~8^Xaq>bAe#23u{|-nwlI=;M7z#Iy+~74=12_nixdOS3G4
zMgF8OZhuvJpPP!B2FMdvUAC&<YA;PhvZRP_2R*>9e_yOGGg#~en3f(YmiHn3WX<XM
z-wuIZeoVE#W3I{VdbEeM-dDp|O(5R>z6lh3F46^TTe<|77xXJut63Xv+V-GIlLt^I
zmUTf?qvdxpg$KhogMr>0_*Qp;;DGD9_VV=qpedb}b>j!a=hx>*?n2TvngW0o3143^
zet6I^+qF8Ih&Ea+30YH(GXi~mXkzr-D5@x>?DLp;!C2iKmc>qzCspzvwHgyq>mkPF
z>vK2l04QfN&Mj-bGm4br;0mAgGun&#t)?ers-{6iy6k+4J{Eokpbv#_M34+7d*R|v
z5NITd#61`BM|qiF)qt6(`=<i;Ti@B^#a0!5>h88Su3Nt$+$p8@u07MJrh1*vAs}~w
zZk;;GJGD(&QdyB59-Xq3n8*8xtcYa*@gM;fQOF&KL_mzi^Q6#Yu_EqMOJC%~gye42
zukQe(D1wf28>eS$=3F)!^+hOQhciGW#wN)pidf1WZ5Q=q%JhEGh+4X%J{@Dfn8l8v
z#~;qeO8nw+10~wGsapLa727QTKi^=^^2x;T21NXEwGNDb3RWHDDO?3$3&#k^TU+Wx
zYl?1`l|7p!f{a(o97Rm>KL~U>YQ9mbk522A@vHT(OB_U>HnXE#LqsD&1XkWYxjhMR
zX_cQKCO9al-OTKV2)f%)TU1sBHecSfHaS5FF0kQ<4Pm-9<HcZsjNF8FDu49G*#!4@
zKXFiUN2k8bD_XY}w!q!6d@vI_LE9}YI4vzyo+~{>Q?ntIb-x_+KMj8(x>_qTp?#pq
zN3|2+c^@aUQZ4J=>IkA^^Qgc4l0A>qYA5GwNpzo<xh3ooBYw?sfpu1plCcwgSBQ(8
z0*C+>jNl9!tE4F1sjT}9DKlI?m4+0SAap*rCw1}+&90*mm*L}URn)&+Mz1?^@0{xj
z&ZG1_%CW`z)Btp=h_=phNpY1nK!nNu?EYj<ZlI2a)4*@{zIX+yFi$?h1*Koi8^#J|
zyt~uf;kY%dtE<4XrX_mXqz9Tgcm{Z4QEQLWpxokRoJrUR^p|X%u(%$3R$uVdFzw)E
zw9Vc{CuM?iEBx5>GCMURvZvSUm!I-+m(x+t#R+c@dgL`GdlcJyJGoyuF+scXeqk4Q
z6f@C$&d+8<=dc$W(=9<!xPs_|tZ%uK=-uwuU9xVQ?~24{7G1(L|Kmn?%fX(I;x%O>
z(z=4lAn}Gz(%FW`drncw5tqW~9#KUEd#Xy&9(Jr9_bT&b_w0JZ!O+WsxM7OP6-?AW
zO&~>0NH?d>CWjL)5#Q3dq=3BaiOVJ}bbAwvy0}V+AUXY|PfwkJ-xm4hZg#D`Fwf_k
zli5aU9*<o53#YACM6mHtzjqGp&(`eiB6|BL5O{@rNQCdH<wji{m(gYMILrF480Ue$
z8lKyg8xfIl;y;tewrq!uF|UQ60cD@4!!koy?qkgGC2X-de2(!n?hE)bLQ;h5UI^?w
zW>NQ+HbFW#fnXHPsZb-sewoMMzD6}ORdNYX|1pN*C5_cVYvd%R;3!u$;o#3F^pqd6
z@q09^QKj)o<8M1*GQXu;tZLU>$3jIXFmtZb{T%UJIf_>v3`l~-b?ltWG<<pHkGGJC
ze=K`131^=mG_-!_?>X!4P!3bH!&8Ct(|Fj(HF0h3UxRNd3N3Y5Uid}t@DueU8>9GZ
z`d1`omtTxm{Cqi~?P9D5d47I9xzkQAo9a^RJJa`<%k@(K;4qCLomTowT~m50>N4Uw
zmS~Nz%c>z$$#ss6DP<AvD+NarxBN80`M<Q||BT)OF`PXOFwFnW-HKEz4N$F%msIAH
zv!(t)ZbizaB?57Oak8T4OdbUQ)i|7pd=&xMTH682I_ChCb2Cr$!o@G0`^CCu3$@2L
zB9A`aYf$?WqxLewFYSm2s-g=!jsP!H0ATfh{ROx`SW+Z@2_~ZSPJ5;1N~+Zv>=M1l
zP|Yc+>CI}KAu>oKy?=H6B-U2_YUjx`;WDEhwZ6*I)hC@|MHi|8mWUu)KxHj-xG8Ed
z$6*ww7>|b%d~euu816kxovV|>o+8W_MUPb{g%;O($=i#FV?B`wXTNGzblx^RX~-_6
z$T<~UoZfbhjyo83Inu>_(hmEp?^ZoMDpSTw9VQ$<xvH58y1nku{V#ksfSmlJJwyM4
z=&-;*67u8Y7Eb@lpf>?=Y+{}5cr(SP0r9Q;Fe91Kb%kWVo-5+TgJcaV|C_+Y{uhCZ
zakpV;3=TwjPq<{^?Sv<byHs4A+bAM{+JEXz{(7`hro(Ha@Zcjq8bx&qK5O;ko)YXF
z)c8IB19hW|?QIQt&o`zzZv#0Th{0?KiN>V~Pd3J~TpTE7l*_6Q(4#wD`%OEC@7OSI
z=!{`ji(5WI=|GsguHAi?T|V~qocf4hHl&2KGG(l;?h69l4?A+aer~9yq|vetSBbCC
z=Drj4WeK_0W@BglkDZ*O|F1yYnX=-dtS@J|B0d}io^C%o=Ug|3nRBXde;d3Bl%d5>
zzb>399D6?Y*I4&uk06`5gM4CE7>9i|tBO$Tu?ci8-FUSv%N4^qJ5rI5w+FKFO|g1+
z9{|cN5E*6moH}YLMiB3^*b_2`7YBhTEvt(xXrl4@iekVKGl2Nr3e|STTO0xoZ-$?j
zF2@Kfq#go*u~yz((T3l~OntHXUpS}Q#Tf>PUnF0L;hs%gTVh+w3mR!xH@J*srWokH
z$AO3?Vs4B%->GYE@O0OX#^Qo3EJ~u8O!EL}z5Z{;F*FrjJ0+m9(?cCJ2irv$3fOpp
zZw{h97*K?`@U=Z<8ms(0A5^=shx1(`E}@5!eZ?Fa!v`~)vcU*g1A!8@3Wr~eC6B!v
z>jO?1sw_=qtaD$XFo?V@r{_Ahfa1Og>IE;6KA4r!r@EcN0}JJ5qN;XBpA0B<R}3_9
ztu<mz+-Bn?au<Dq+oBeKmn}3}Lbr;8K86}1CvrLcIOE#G%(LGMP8WB0pI&n8VyR;a
zWz|6s;O4IP(35^xx<O`qm78%)(J~W?=?f51xBW+q-y8LWr`kLl$zE2`1^kz4puux2
zg*pqq07plbZ|T-R{fWtXzqxq*&>Js3QB|*GjP^eQp9465%N|(fnt3T|BLDDl(1W6E
zsQ-b!_Sa{LsG0IUolf77csI7X3tIRvw<ZDCDFTrr?ys_X!Mx8;cBU8BoL)_*t)#1h
zQf1Z)9>LMW6DRXWR8I26DsZ1f*HSlT3wpu!j58IctKn0tF99#e@SSWMb%h^r7=Wl=
zhTg<-*^k^GiRyR}aJ<no2h@DP(qZD#M)Y+|-*N{Oz6Ep!xz20TEnyA?QjLhw=o(L;
z{Em#mWdn~ePWW&&Y2#|uda13p8VF>=a>e|ZD`sY_n<0Vi!%ZN?2h8tXL9EvyC4%CO
zuMY7$K_7cM8E=)_hy5u>(wm6sqUD*yNB>|?6hkmVoL$a>@M0$_K_H9Q8>?Az4wNd)
zhOZcKBzI<$?7gS#Mk!ZkASPfOjIT)NM1>mg5<@|6mWYyVM(P#$mJ)ocr}L5Nu6)8q
ziL4ah(nS%3PpQ(p*Y0zS;VVLfEkKlkY|%Rz>|Q57n5fL_OU)@_@qzQnTl7P&zuWA&
z|9EJbDb>sBNnHELxGAKi$f+%UeBZD0jR6#Kxv6Z}6W|c@P<sWK)b)pDokfBLRopL}
zQ|%(`VTsV@lvNg4UPu=9k)5Xkll`RRR*K|FIwf-(vrhIA>rV3Oi*1{@>N0oRa>0~9
zDXf5jT@j~15x50JYsuo|vqglf8-0w27{pa(>$|o+g@48z#+6!dOGKUDbr0yGXF$G-
zVt2k~?u>>h4I4OiO;IJ~?A-xM<Yct{C#Ft%O}9=kP+>MyG|te=xQvJ;N<~YAV`V%B
z#T&Rsn27hN>0avYx>twG?F%fcCP0JBEayf!A1a^#{7h!9jfv{l`my6c`Ksb;8*2gE
zd2BYRV-3IXb*t#Orz+$tYgkr)#y><zZ^yj$o(t<c+sEpcm6FAje9vuug^~vM^^Gk@
z%-n0mur6q6xV@9;p71Jn$wF?9&hXn^n5h58W8_Y5gkvGbK+=oqK)KZy7Ff8dv#k{2
zFVI2IR@lc%A1Cu)7Nvv{qx<ChukcSLm26C=#n-J**{>#-FjS>c6U%txKwT~@)?!Qq
zDo<d+)C%%lugWaDLC^Hoya3TTB=NOi#4iO?@IX+JsCbVJOS%8Cry=N;5pU<>A=Bf>
z$p&E9tgQ9RIKSO+*CS^V8v7I50QX;8M4a|{x4^3^o(x@k3ZSj(Wsa_wCxyV|M@OTE
zzYDg#`O6n{=X>(K_I#JVS867HwU_osPX{IvXYVCXriS}~-DiIu&mmY46|OL?tSC`s
z!Qdlme87E5`_G7lr=$(>KOMIDHmRJg{<A~;(Q@}3spyB!Xp*UMouvd6LAw(d7B~3r
zy^mHraE3K&bJ}U@Lrda}jWO}nQfcj__JX7<Qbi6a{Jc`CaKA5}@=@TKm95gjuICZh
zLDGX2@zTo<ls7pHe1LPoBHsG2tDjBHnDe_I9LR`y;5C3BP|UC{BWt%$=*g94_Djj{
z%%z%4T6)*(ZNs_3yOHa^&)|c18*PWnmA7JIDVD6L!TrlNE;P};HGo6*<uPcpdL#S+
zDL=9qHzs?K+`~DT_R==eeq8m{YRTGParG^)KNL|3&0)K}OBxp6$9(ax*GpkV32x@;
zPkMdD1?lZ7Fq$oim<^0@LjMdGzQ?%(dQT5QVWG%PJnTVbV)Z$1r+1o3-RQrj3WyWc
za3c+K5~%DNN-Mt9r;|p?Rzj2$qQZhY+1}nR$pKD*G9Ui*x*aIF93Q%SYB}3IrbX7E
zV?4k)rQ@OjHo<n&6~!9P%8s_gT2~7y<xF~X6!~}p^gEDgchiNAie@XxIg-k{j46Ff
zVH8}Rx;@@i+l=mf#@T#mQol=k#B1@gba<b>Ej^X5w)ncWQ*7-Sp`phM1FbTdz(U~x
zX?iZHdV7L)d#smDnhp7-hZ$THozlWnsjhmb|I1R8Ol1`GlH-yz$_DnPW|zei>%V*+
zTWIapP_)aG)G;i6uX5+Hv#Y!s(V)}K=|glExD%q2)Y(s|P#zYDgD)uexqBs-;kqZ-
z_jKIhqr5!|X*an3Q)|s+NPZ*M@9e#?)CPE`d4?|iyHc4Sgk(bf1nh;IH@v5ubDvzz
zrIuI+o`z_5uPr9Upug{aLf5bzh`Xx#Z*&{S4|y(t#W#EAtbVJAR#-ebNwxKd&YR9_
zjdZX>5Usi&6*CEGUs_mIvQ+O}{kpcKF$FyxXyH%>6>Me9lmM4Q!y?N*!4JYqczynz
ztBy08za_?E{o<1chgc@Zs<JN3Bv7<_>7wJ+$xCv}Q7ub{Dkhgo;4`%c?VOZx{+*=3
z$-t9|1S}3wJg+BdC@Js&gw9swUU74Z<+I&oO0N|OEip<jm7X+lJv16I7&lWxuu*dy
zn+EwTprE)l?AU3q8mlxz>A-;tN`H5~P|x)E?9O{`y-R{Vc(77SryUyqh$!Bu<mdWY
z&2<%TC>UFaPk2f;6+uPtN#TJdw=^=1a@}}wEt=UqSn_dtCagV_)PP|Ej=dfEHQlQ|
zbiUKNZ7x-gb#0`6&M<}*FjPXqs}a2|Y_XZfnGuKY5p{EQPSstJkxBe=%`P9c%f>h;
zkA#bms$$SmGZfQeebYNLrqi<YBmSTaqg`yL4C4e*p`;=s@ZE9F+exGap<^up?6Cue
zNMyZQELua?#h=tc#^ZV1@`8(UjBLH#yQJ}?;23mUlU_;92yPenEmcirPKILSo7lI>
zRYpS)hfMC<^%_pA3~k4cl+4@JIF+s)x_@@L6PT$w{%n@O8)WAUbj*R<3Qx1HzZ#is
zK5iR4MDZ9qoA{$)>5o==yj5H(sm-+2optWD-<^PARcBv_!Z1bQe5ZTpJ7JYJRuyeA
zw)M@EEj`a_Yu>z%e~7Z^M#oP3$BnJ|Y;Dqu7+YJ?8D+9cwdS{dE;K3~(NhfD$X-;O
zgG&Ftk6Ny0<uB-1Y-0Hj>D*bLiD{uOQdkktI?{tBSQF(V^NekBk=&Oa!E1gm6n5om
zXn~6!&s2+o`I6oocHBR6V(Xu33t<`5zDse^H0`pTlt+m9b-OeQmGzvgR&gD8OJ}hG
z1DET|c92-^^S|cfZyXY>kg&z~DTHMZWJDZ=lI#ud8`I><Pqa(K`&op9`A0|yAMv7R
z_LZJ~9$!Z>lIzx!55Q{nI@GLV5=06x2ey&v*)G4mybU?&y#mhWMPsyX+NcM;LFc?e
zUkH1Mc%+0H>P;IzHn~X3yrVcL5pSj9P~PNTW0mVaeZ_AyU@>~KVz;)M!M4L8!dVR`
zv%jBMy6m<;IO_+baQ1u4gO6n|xuI&W4{b}4Ca$jNpNuZ~c;Hc(tc3i|B-Tamk#XS*
z_1Sv?5Whs$w+hzv@|nGbYlac%c@HjhB~|u4f(*FqsRq}CBs5#K^DX%mXR=%7(VA(C
zu$|gqzl2krdcVKsfOoT((ZjJUJIJ2lm$LP6>6YB!hN?`?^~~(P2T{v}E;kEr*mk&c
zi>Fs2^D5NmI%GMIQ)#QWdj~89XLtb_sy3DOsvBczbGF(ohCj+TwOTH}D4dNw+k#(B
zq9{^SCpa<`FlCU-sxtmfKCx1@J&a4#CP5%d`v@VQ09H0hhpV`qrgwdem%Z`;N>|3A
z{r`%twcd4rZuQ+6c%6r|B$kp+w|Q*}YgjAR5L{bF43^a5h*#)R$p{?QmeM60kpx^6
zw#!g42Tv$QS6R~f?We=)YZE;uV(B;o%Vfe}@D5w7oQ-^6rRs+n!{O~f2D)pi#ouP1
zh_J4!fHG|S7O*u<!V>bhLw|2S1+AaI3*0p*DJ3yxiG}RJZ%sxsJJRZ_GSxKuL`R^I
z2uGooi$lvy+4T~OAhPcE;`FDI0B*{TVkRfniTit7nGUHx?>3Z#=^nx~RBAVL#Qx)e
z$I5CHeMVaA=hfh%<v;Gr^Eyvk)cr7(N_#Uhi=cnI49umLyi}@yd|7b;Qd&pf_R6_}
z6StHHtsF8O9qui8Bhz`YnexW&uKez!{!egEwp{uZEVMn_U!`Uyc3yM7J2c5GcweXL
z(xBOSs{!1M2&j6lY&6&BxV3q;c35gx=}Z{#654Od#?&nK*z<X?{vCQ8<#wU6CIqOe
z%G~ClK>kUGMHFDX>k$+0@nC($oPC;Ug5>DfFgJ_qafm$kAhpKY5~ZxCG<{+l$LP62
zq#6^0*UB=+SO++<UQs(0%x>!G*n6s06n|MfDtA^s2|>1A#aDPaLD%FTQfv@M+@=~+
z;WK>ZA@RJJRyo(OM1U1Q*Yt$%FGRS>TekOb07zycdg4(seSh0x#deKZdfI9IXZRz7
zBVN|!t|~u>KoYfIaz#C9ZDZPQEs|9G@(B-k<nW%E0*T5iiG|;@!$Quxr{m;zYv>!L
z4P;Y3hSpaSC|$O?h8kOc$qvra4mCnRZc1QXt)2QFAh)_r!xukzRuXWFyeLYTRUu6k
z(urTX#zix^&s0VnE30tqhD9Ryc$PP85tkftNg8|?VM0rRhz^w(3u5rkd&4H*6Y~xy
zNt%qEYhbz_=_6sya42GxTJY%nfiwap(nW8h!WJ-`D80ufK9)lCa}L(utrR_|$m@J-
zfzjITf6Qy*+TBl{o!7=`yN-OV>?(Zp?(7xm<l4=zmT8YmS6@y?DMilz#v2EhI?u^J
zsu#dDbF;7oVC#?n6C}=AbmeMa$O|gv6SAU3_Mx?lG)Wm&Xe39$Y=P|*<xAyjeWQ|8
zVgg~ZjL(`O3jmgD&7;qqj7e6{+<q(}I`K*LL>dq<N@J@zS@S~&{ACx`F@T7+3CJ^c
z;gky#tW_rSFwuR8XHwA?D*IA#x#n;Z{15UUNd#4JF~@m24O(1l(h>#smRZo(_zeIy
z#El{{)Dw~cVxSA^dgqdCROlL>nMP+^v?H_*Oz4>!W|CcSv8fR<xKiIERE41UtFB@2
zct-X$dIKcwMN!J{0WjG3mUMA*sev-oX7NUe{v=9+h9${!>cU;MG-B$c!Ewy=L7+4O
zkM*|(HTrjLy((C_APEhqY59KKMHab>7MUt(`9ZHI`Y@-EzmIjvqHjJ%u{JgoebFM8
z;y0y#Yb<reA=S&p77z_c5PYBD*g~lnI7pUk_Ag_DYWEE5F#R3UhUUC15|Y=(Pf-(7
zU&75)RFequ3C*kM;>o*)^2qjyC9uDd-C)NeWpyERm%jOOBXV5kgCvAkS`1OFPH3!!
ztqi6Rz3C*%S7uQ~btVLEJn2A8VC}^PhoS{7fso|ud09GM>3}F|aC4|TD0YzCbNUVa
z&?H9zYQNoh^7ZXX)^bU$+Bi)D`_2nF`peef*Uv=)NjCsTmTA|gbwcmTclJX=eU=2-
zRy_``v_w7Qfa<|4)+IB{kF5g@OX#&BhFJ1Jzd!!NXa{V*#!^1D-<yFT-~L&zKuY|b
z%qW5mgr?lnyk?OwmOVd(n?#dV+ZT`LPo(Z+K51vM(Ongxa%M>>)IKlY$wN8Val;GT
zvUXN>Wr8wdYLy8bi(8GY1<DrIUR0Dzk!?jEX{D=-S>Cn^40ojX?C>8N)5hTw;R#4i
z=<e*8R<G`73mNJZKKwjF;%9j`_0eNt^?x;Y=J8Oj|NozJI&DYVRCc0IDnew<GN)`O
zS+Y*qMuu!5*_knIl>KzdQbdR_7*lp;<b)|ph%qvZkZl-~Wyp--cir?k=kxtNe!u^|
zkMHCACv)BReO=dmUDy4--tXu8b^YtWx!=3L+j_vCa;vjs-n266_z<6!`WYeSNBsmc
zzh!F6q$FDmDZeA2vvms-#jFE9m!AEU>NIkz&MdyD<X0DwcSV^}Zp+#0-W%uxVAn$1
z8<bOPX2E`UK0jGO1?!^QeU2nOfork3qmj9dGd=&1-J~>6IAc>|6JCL&u69K&cTBz*
z=sy?o+{l_K&R2EJ7l3s>!tB-m?3=EzBV&&y8YXXl8m-B$$y^C}_I}coBz0M5{#0IM
z(s*3xdV#o7iK;q^8#OQzT9kHZM)+33jgbXhdG=(|;yA%lOO@p1j^wHshMj(}f6P<P
zY<ETTSTZW+rerT&0fi?#oF^EEXksuHIyPC0)#S>VD+uaKC30<$_JhmKZb=uBVIL9V
zS*YKjp8GRj0_JrnbgVf%^~G*-Ow#n$BKA!G4b>oM?{Z$-m1b?nlYWELmgfFyH`nhu
zVYA1mXAiNEd4()v?}}~>q^hm*AI4V5CldT`+F!-zB_%He3NwCXiDHXPM3Izh0=Ph4
zu?gXu=r$XB)gANI-2NtB!jZlVbFS-RMq8`;F-kSAgKpG9WW2iqmOJ(vW7c2eWFH0@
zykB!S>*-esV95gvu(Pzr3z6;1)h;<O#%=dX;_b%yQB<wUmOZSjvx;sjkH6X8qb#ka
z>-d!_9+eOm(;}byeZSyOcGm1Y%>V$HLm@<?7Asn0$t}OK15!h$P-3QjaU`2rDYy4~
zaPZ`?geF9wU3GE2AGO*Dg-oCdp@u@V>k3+$14wx7Nh@I+j%RLV3;<<!rtQbbNZ%^H
z!x#LH9OQYc0ety(xZYYAzA-(BYkv|GRu}(pPmYPL3Z3Ifv&;X`(!3;mUD#B(NzC4r
zVQrpzQT%Qj?o(~Mo9n~fOIHeqO0Dwy$T@al5_ieXSpYC{k}57}y<9hT!L+pMLRf|Q
z?3*sqx%?8{m4rG!p<a?#CR3|VFVQ{D%b_<(YFsm`wcwMvXoV@nMZCR#xh(MV%!ROE
z;=@M&QB>V#55dHqwd(&Yc2~(o8}#Je(4aG=OH&~}wCl~g^6w=riY-C@)5w`rux;D@
zHoLL+-9;tCcnCl@xvli`I$>($fus{e(6g6%%>63Mo7vmV1Wa@?4xyStAr#{mp<`bp
z<6Id-S&NgMc%WnvW7?WNj;5r+#6+NCi9wq@{F$Fxc6}%3crZ-c2DOL)CeXWA_~7wh
z1m7(m-*0kX8rBxtaAZj=dVkjX(kp6oZBi0;Z48%${(6iExeaHi9-Qj2L@itQ&P-wp
zQ0BN<0)Glga?m&+98kiYWP)eH+P9Nyp|=D}gO%LI(tN5MFQso&UaKX1lk5qPP?dqf
z0viA0p;+LiKZ)p;lR$rKR4-{oIMm!4H^r?ThN7)Hi|uFcg-++Cmd;CVjA85hBm;ny
z0zPa1SHBkj@c+0~c4{eh_GUawl#%5&g%e*4=H<q(4KX=n@pZf3dA;!7uHa73Zq82e
zS1}u54_bz4=nVaKzQwoX;Ju&t?|_dpp88N0q-*_Cx$M+J-~wA5w8ex(sJzW#`&A|%
zfuk5BLf;6_@3}JYKyHPCSs9|LEFK9E0jPnH5<u^XCO|&PKv&ei<o)C_P00PpKbMde
zlJUh(p(9ZpuY8qW{p7qm{=k?LyQ{6&ggA*(8S{0Y)l*^D9{3hK!W<O>d%VZdYtJ4z
z>3s6m_ZMy7)=A!6*?)mKD{Ni&vf@wq8-clnR4&8r0JgJ^UPGS<>0OMN6>bm_XrsFO
z;1^a#lxs~erJWuTr}eD!oiKkE_I2D|EF5OAfb2`^@E?dQR9Kj{YtwUE_UExvksH*=
z<$+*f0Om}T#Os1b-ub^C=_(3h6{8vE*4P)a<?_!)WBW*sp~HGl!`Z4L%-9>1DEJ$H
zYQi5qz~Fi{e)zlBjX*1lq5E%_=C!xJ#a~>%RlJ=)o2x?ph%zLoDa7wWKCq(-H#?L4
znw^tJRFFn4>nO6%NAe}wFVrLe);WWLnJxJI>p7CmZy~oYmkB5r?@d$^;@G=Mn^_6}
zTCP-3WX8;0D!pAN=Nr;;N5MDleCf=@ae)M4OEc|_f2aB`0>1kj0=28HanX5mnYi=#
z+TMJ+xyfRMzJD!_sYxf-lM<WO$ceNJ?(F(ChWS28h{S*36#Xn*n4VDz^Li&)#g*=M
zxAw`g7krGlDXK5`Hb&^{ATm36`tFyw`wOdv&EIXr#$#y2B#vNJT=k!l{P4!$&!G~h
zP3J;XWo{~K*jA2~eY&+rN6l`+kycx($VeY8bEzyS-Kn%e7*m_n^-lkCE+G875VU+O
z<veyESN)1EfgR{>Ennz=|EsXe_SQ6fRO?6e?Wh;GbV35kR)R;4rt*hI#>@4RJ(|XH
z?UL@5`6tQE8>;Uxj$`wS%cUu1@?ClittA<I5|c|b@a1B9N&G8PRc~fQCcKe2KYEq1
z*7fo<TQ&mI*qbFGf+YaHs;h0kRn~@v%Bb^zGFeYptH0V&dMC5UsbneCKJ|ldvM91~
zK6t1&)PBuzY?_*+U$1%FZM3FrbA5aJ;Y*sW<F@;o%^Z9%Xom4+cTh>T!@%ASF7_C?
zNPULZp-k$X&TPD?lG+5QKt091H*C5@5B7?gi0eDhE59BsSM*JNf4;7$N)G!n*}Sor
zW>9~zi-A^|H;$7*s$U@Ny(01NW{Dk1!WhJpIkQ(@X}l5=LK^g1wPw%T$4RzIj%Bsl
zniMbWYxdV9`vS5qkIXF95?Blyu13BT;bY^xwKEW0+v&fke||5z9)&;U-)+jCP}(mt
zti9vr>(IolcX0H@nEmr_nDeF{@NxhBi^e?9o%^dUfwU0jUrc&Po>LRz=BU9Xx2jC%
zZw&rjom)oNFMH$2B<W|C7~A5<pW}C^^|)QAz~fKITQ6UfziG{m3mszAu1~Yug~sHx
zulvK{-Aoa=$q&@jhY1a(nl|UeBr^q}Rhk&@!-%^3{O7>-kWV+f8E>ZP%g@6{oHP8L
zlei*I#Ggi1|L*cDSD`LH)af$?q|@GUgi$^-Hl{3`_q`*UF1pQjiVsifh>qxucq7}i
zt4gtSuu=7n;^eJ={)QVSz&S(~XNqvJWPT|PK|Q5mhBg5S5fb5=_X}4li`pN?gH#`C
zQFCcEzstiy!8!c5X7W#0S6#{u+q?}rqwatFc;$!kyK1NT{@r-9Xi_K>^|HUu?&1pW
zsEx1!Ty!vA(7?=#{h3NaW6n_6o%s&5Ak0J01LKQqX~n_N;vzyX`!`P~n3uRfUYsm{
zqJX&h);N?h!R+ynJ%*P#>4zq3&uJa;O{MMcsn?;F98EV3i?jVLiFa^5y7?!XNBk(M
z%t8Ca=DtQiLs(4Ce#a+rL9)Wy%QwpJ6AMd3@BYrt{GR=-0`K*vpcEU+9mo%EVIF4O
z<4X8l*S570UT;xB#>tMA&J->Litp3V4Q&5=aNaJsYu6%p|3-T*!7uwoPyzXYZ>o?N
zqj^7S_^^riP18LS(uo(T#`{1Y#B42;W;@8OwhoI*t%TFL+M0bFka)-*;*)e#3O*gI
z#HOn)<-e2BK~4sWcLm8Lc{<!#A+3sxBa+QVTXweSkxe$o-soG~?cK5kZs_yx?~pz6
z2F8W9t1zh{kNS6l0r*0kW?5kmGLyP*%XN9Vx&3~WZF;`q;8?Q6`O7=TGztHni&qM>
zY&c!0m!J|Y8uk1&8G2=Lg(2M0N(DO{Ba(7b_d}4A;<B|IhcvBOh<M_x9XJRz`-i!4
zsF^*!B<#X?kT%P>;==H@!bcsGU-uY!Ppkh~Qm}3zmP9^_Uwbz{R<ng=3OL8#h8y@}
zG%<TU8FiVzWU1Aqh5ON#+3D5Ne&s*oRZ8BsmCvly|I9j3M~9O#xAj~gTR@H7GF;3p
z9FS&NdO)h`_kEu?UVd)wrMXV}l^5O|PKK9~(!Yl%v~|dRQ{#GF@}@}U-${H9cxVvZ
z-hIaKZB|Y-DMR;x3j42duH*a><+AW0eVHodM%0_Q!X!hY9Y%dFZXsw0=0O?4k4(8I
zlkwx#^^b)_g(StbprHn0mjBe<hmqUJjtx-BSEd%xuGetx-hp~e&`C*9dC)(|5kLNZ
zwyjvs5qSSboU#Sw7bR!33JG`S=amMHavh`~($4oiz&x+09fHAvC&v}x0N|t|u{;@u
zf)T09X6)w^@3Kva4_ioaQuvDaW}vzC-h(UiGHRJ+Z-b~^3?aT^)%H%viYH1+PGVGI
z3!!AlS}822UonM$I|$}?-;Hr8K5{ul202yaf9LVYzf6eTvX!l|(O0e`<+{Sj^#hZa
zXHkr(JvA+->BR*|J0dGw^+YZS7cbi}b^QVSkLPNc$G9Gctg!Q=%|Ut)M`5{DR{$RP
ziK7%9s>@<(+a4(fQb6E#(}n}J*!WIHZtyX&?8XMfR%xq^StAg+CUNIzAi(@yu-3}~
zWFFo{4e0_AFgT=H7hTWQqC0Jsj-UrDo@>#aZxR4;Zn4!12q<>!aI=_Zl$-W9Pg)}S
zP7Var$L4#JlK?0B^sKur5~#PSTZCxVjzo+0@vzek>vgx#Fj)RaY9{)tUB)oDJbb#_
z2)Pj)&C*&aeGLW5b+8+25mW;E&Q6JbeVW^JCC%4<WusGf<I?zdC=@-3U9p<csA8o`
z#RLB1Zs1}E9xhE<h}oBtumg5`1ik+0L7$Qhj)Es~7%~G2m)51Dhr_<UYE$|dumGC@
zd;hsat+{@E(vZrt!c+<D+I&zheIdoqk@vxG9sT(x3^rvX*W*s-_{^knK6ge23%I4e
zf~4$pcEA_!NS-F|hB-6V(jzulK^Z-QowK<BI9>X350A}S2qBL7{a{nqwT9;_gHH4M
z$-0#>rER@XxV|R8zC=t83WEb7xpm2CcJo;-Kt~jZAfvp+vpEyt4}(SLB7Ey1UR6>0
zO6{5xPf(IKIiB_t%;Ydu_%HdY!PhaNbu&i+M-ofy!d|e-#eIaq-p<?^!XA<<zmSxT
zb&RaA!d=Dk$XN<xpj{UG9ljS?bIVO&KA74F=(FEu;h90dt$%%>Sh{iaH|#VQP{{IH
zyI^PB4{=H1>_#>=mI6;%Sn1;9S<L=9oQ;2(o(}Ct$diw-t4VE2+hJ)U;pqnc%)y#~
z+|#3Cy}g$>Ezy8#{XP)F57^)I?iGO<Z@E;CygDYFGyMty5{`$|L-{uLZHLJuWqxF+
z1~qS-$U@%|m|pH_G$rQY>#Z;MHE1^69Qlh=r-5%+=cqgOgRz+ClI8NkCEF!u-D>vW
z2BEi-GdQVRCDCveyGT2;3_Ekxb?PwrP^T_$+T2fiaQ&Rprtl?GPN$<Q>ayH&!vbO7
z6G0O|ix}VsL!%;x@n!9svX{@EEUE7V#EYN-PcVRVcqc4TyyPP{$%X8cUA3u^DRY8&
zuL!QNxZM)j^bOCdLGkQ9ZmgN_D~5RS@u<VFFI^|sr}(ZdHJ{;-E!QQ&v#VA6w3e;{
z#l_!;z+Pg81vA=Zir`f4!n8eG*KORag#NG|9$%vU6>%8F=$0OTyhlo9-!xo_!RytU
zt48@OuzfDf-v!i;<;nGvdrwK6QebkkTps+2xOe1KRww|^06e(Abcej<58n1-wMhBz
zKLFhmBVJw70D^<?wv79@^KplM0J;bNe;4?Sdr1JHUP^*Gy;@Cg1#lGMv`untc{EP3
z6fSAZNi?TKSPrwpKGg<Jf2zfr>#%#TxA_jZWkATA#y#A+LP;l7t7rJiTdK<Em6!iC
z)y|I;1E#kvgY}6M4`F<(US+sPp2K~o6ANq7IRqfUCc_T##27;LM632wGAavonVeTS
zjxAf7rBW(B{I^s?08*kV;9B0{x$YRd!vg<j6NX)6Mx8lsT(?4xz>hZoZ80i758!cf
zT2CnsnN9+q+>@2c>(f}PlUYa>t3mEEw|6*YDNqj^@dE98ujMyxfw_fa3x`3$&F~xy
z#Zv2$H}Icqu*e9M@PMM^w&ZShcyi0}zGK#gLB+`v>viS>8!yL$r9qeP>NtPT(NrbQ
zLf%TT=ak+_J+J8y;fCst*QczX@#EuPw)O-Q+mMuDOGFruAk%9NXrM?YyX!Gjgsqa3
zkLMPx8OP;Q)X#~eR}V2`U(a`p74Xmj*S7iA5L@vWRbK+p*XqekpKfUzx2e`e^t4Y8
zN%ma}lIs;=le*w_+I}q9(o$<vG`LGF5YXd#?kJ&gzpj|q*AivaTXztO8Z41v1HM*N
zL?(Brb>_8;qjFVIgK}fao{~M~<N{^3GAT#I=%QP-PYMmk!7ZQDsiX#lQ1Z0HOoHy^
z@a%ZjPR!h~Yo{7CO!X9N#J5ISSEC80ZOYB+)V9Io;n#Oc;=Ovi7F<;xJegb$etJ2N
zYk%io;|VCsixZk68#fpgj#;@er7uPARSzpLgN87u?AmfKzv+N1%@BBfEPfaz{OWjx
z9$+k?zTA11Eq^bqZQfh8<giN{?yr(tCE4TS!R24k48$c($Lo%?=r25N-<0;(wja4)
z(*?X!ml+`yHRgnbS`Z_pL@ZjI+uJ&s{*YAp{0We%+YZeN55~E)Lcdd~J*mT9lq}WY
zFujFSIwF*{u!pzm7ON|ttkCj^91N{Cx);^Vxiti8^A2-<bL7_UsC$+YL!<U45no3Z
zBy&p9#z}@YI64NO=Q{4+BPXjDAMBk6#H1J@5Pk`+$;UO8=DQtPQDsN&r03(0KmA&?
zUYtGd!!^Sobn}dIiz-|H6*6>X%5kyOf3a=~qkf_!gi+;*pP3VlBhU+zg0fP2_il<T
ztL%JUTbSxv=62T}dx)_zjpN9%lP7H^q{eNS9hTuLMapH4ICc?o+Uu1~LOQ*W#mTjV
zC;w4X84TTxvn)zUCYQ5VV%D~E?xo!Fm%+QB9?sUBI%z`uO7VGWBzoy7Dpfr&k1m3%
zw~fesl9^CsTVT^*Qj<_NFv1jh<M9IL-xH}k&?<@y=nk$xdMda3uZ4K}i3toHEl5aV
zSW5OzFl<EeO}IVy7BY~LDR5ZvyvW@DV~lhI*w^r~+vl5~54Bzi1}v~i!5+`k=2<o^
zT?^4)t-{@`-ER3j8QLK{@A0gpulmDLIa98UTf8SD54~qCHi19lIK^#@#j`e~nCID`
zf<E2S+U($oqYfQS$M0n!-dQyKkSl(CQ`Jbe0Y8TbkqxsU$?YrpXQ9({S=|@Ot~EyU
z3}oo|c58g~3-uYGe*w{^%OYxQe;V}7{g429MoL8<<j*?TC3LdJxjHMQD-+%6K`TQc
zNYQug&2fV2HBg!R`9Lq(F24LAQaF6R{cPurThIX8WeU`o%TL<i*u|e97TSXWQOJdM
zkXkPEyA@D(mpAi=#D1aD9p^!5jZNX*zwu+~yuWE=K|mw_*FQHK{$CX2Ph4jp!>98o
z?r`pB3Vij%hHDT|Fl9H4H_QRbnAa!`duw@l)@qOSX={^uNEjE={LsGy)h$ANuy=;&
z#yjs4bPWj9`rYC(s}`#yK-+>pHJb1?)`vGDf<SJJL){e&*M6ggLC1byr?9jadT?;=
znqmoL0BnlWA~q(UwgJul3j}ke%Y@5H76c2WPxA|#6fE#ArhCMKwz7d4vaYzXpopj%
z3Z`*J19??EByNM{KIed8elTi{6NSMXoOpORXCLlC<eX%QZz#>4N?1&>_q~c4mqUKm
zw*i$du+L2o7v`k<)_P(rIGHPh^_;fhI%}u5FxZ!l`fo@HJKo!sHD=Hroq&mU&I|%{
zShjA07lYlJfSB5tTgh}Ju8kYD#T|jc$}MK=xsqsB#o-92W-!ZpUjfr>IGg&bPP7Do
zDi7%qT{^LF@B!+5)onr~5zY%r67@Re)L}4gF2a8;k@whKw<e|%bXQk85X@bn+A@+{
zaB%`K*o{s+dKU1~;XvZmwA=ucpceym+jYVdg>xKZHPZMFn6vg$i3SVJt-_;DV^!in
z!}6dKR>cY$oYHYd&Mz=n-Qrov?~Ot6n5Awbbqhf-e8O`LmDn8h$=WYdSq0XDAj_=(
zxgutQ1k`BZdC-`Yg_u)d)04hTH5{|uEFjKXx!T+h{Ixzg3ZTf6At=%bHZGvZUsjHP
zz#ti2c1kP(qP}Gq(09zQ_hkM$7(4dJo9mn;xT6zP2!03!og`rG`9$u%q8cS?7Yw$%
z6SAQSE^Y42ZbKGD@A1tI-dpI50>(iQ<#7Jl8Cnd30GA0c(9KC&yX7R_v`iegkWPq0
z&sNxEPB+#2FQ;y9(_sHIx94n|pUGN_@g7?M>0Np4s5$ODA9aDTBHMXz^^xRvt?_zJ
z?r=4h$b(o+`jZ{B9+64C?6G`KpcpQ&pgV0hG%63~um7f>tFS&A*WfTApkB``p)P!I
zwh2qwmA~%#w%2@H!c$0_U-lYf-~EI?yIV2IEC{k3{=?jQ_5W3d+*Iz=_(6`dB?Uvz
zyHf}kKEv+$4J89+Qnqm#XWLP$Ii(nD{Ff$#U%V|VK=;_OrN{6ko?p1zWYq@qLED<z
zjZr*l>03qmE}x{;UbQ^pq$A7W6%I|w<=ptoWDsA`(3KFsY(u2z%DXrosM1UiPe>hL
zpo^-!_ZU!|KvIz--q_~r@>HRT*osC<*s#g4#zbuqPZ9vJPOP)#kyWaGRPAML+(c??
zTL57ujjZ%RBME3VSl5We`Al{?%wK^PyyP?NoiD=k48J07+O5jIJHj8vCtP&ncFv<%
zZrgWFa}~eC-OjOBNDr}x9I{_mg>fz?OUYUdtICv!iUV)9ksa5zTIaI~qRGE=sp67b
z#7FCZmn@B43D>+6n|v|_a%zRA(&H#ftg~z{gz7%kjmoF+faWpit7!wWkYU0KNf%YZ
zI64z?Ehk0X)2X&}h1Uc>;ZvfjC9}4MMSQ99(L$~kPwRDM-<-OUGvr$`TYH18^Nd!{
z@U<$&1ZB9SKSA*32|`}8LlU{c<P+Ov5kWP?WT~XFG+N%m7$e9gT35?aeXl<@A*OXD
zbRsN`eANe`nRD*CEOJ%VOjbRSuuz9D;SKen-+UR%uPIgIUMa%(n<*YnNci`7xNZ99
zAJdO&<|jRFk1l&KTvJ@a!VKW){SHPdXoCC&Ls^lR=r=DgLxz-VXNBiUt%5Ub<JUdx
zi26bc`wX@FH`Gj3y1lSwf@+D%QS|&s95F{Z{XjAft=InbkL6vI>G{}2XOFtk4rA?>
z0V7)U@+Gp$vRvYu$?QaXn-uOdKZ&!(R+)^nDie&iR()cNA!L3*Z|!JlZJUt6*p0`+
z3ALv6@HmgO96}!ur6UjoltbterWm)f3>iCobd&P^Dx=4Ry4Eu0w1v>G+VVM{<NL1^
zWyzyJtQK=C!p#9i|1}x6FlKt5uj<5byiLCff*ET_j3WPQg1t6C*;Ci<F=ao-s1for
zwhMJwQ1W{kLLCl5mAk5_K_?Xyfvte>Q>ml}Kr1&?SjH#LaCnEgC;bS2QhQ$2zGkkG
zUZ!cTX{x8NYP$RwlOh`_sTYr%KFh{Tn1@+M!&#r`)mXkNcaPyfZCr3gd|(q<x-V}*
z8y_t7@sWa_V(`<rFo&h9v@tPt-JQMpvX~E{+R#S>%&|u?hMx`PM0sliu?S_{Va$jR
z%~|gdzr7lQP$HdgV7Kb>Sx?vt9o5!J8_FcTeq!YIeNlm#z^xJ*kjPcW1vhKU7u7~r
zzKNO<#N^XgE2Wa+biM012dgFnvn%<-WXpchmH~^x<>My2_e}`PQR7<<#}j2_Nv{ud
zm<&>Bm5tG-`jR`6`Ex!8=jo>CJQ<8uaEPSq_Ug+ewSIG#yqD&_C%TdxPqf5$qKbR#
zW))CbG!maz9mDf!cy`Sc{JqU{y_V~99;cyQf%R@HxUzqRjtKrnyeYcs+_Tx%rI_rN
z?*6ih<)CK|ePW>ZNES`QBsfLQbkIUn*}i^jD7w<k2(y{z;hjkej|UCJCUsE_9sIxZ
zRbV_KbksOk^AvX{-1{+*jaqhG)bEiN_{x0r-RQn%Qq`pB&rfrzi%P=-&r0Q{A!w)w
z*&ZRk3;L(^;A8wz^CaUWTC)1GQ3KA5LWmoQzkW97bvcn?t`EI6Z*&DiZt*$!RIZx$
zu+#*Fd0+tFdQ3$kk}2EA3igZ;YcR>!Nnn*C-2$14y}IJB`tjNy9b4!@<I2j$J?oT&
zoPkp1==PXwa`<i!vA^3d!S~42T<txjwX{-GN2I*fq3R)*{WSdPCDGRGCf=AAMv_)q
znFyTm{5VcAFSn+TWxS&Fo?H1ae}LQk>L)P!HHls!DznOe%?lM3hl=5pM?0?4hk1W~
zu&s@3!QlMol$T2{jlD!GIkK18Hnb-xro@4g*BFzf9v@pBCDy@i4aoKepNmL+R}bwY
z%Br9m>J}v~hKolMvg3R9qp~{8Pve!ZTwqr2#+4D2!?5Y)3bnRn_SV;}fTPbqckf}0
zT=%1X3Gp5W+n47i^6`$!-Hv$19?iWr4R&L*%YnjN1v@gC*`BZ4333dShArnqf6VA(
z2ODZ%D?e5?G!yh!rRbw<pGpf$3BLiDZTRS~?ykV)@0GsC9c}p{+!}e~altZT$V+b!
z*-#hnm8HAajR8Ii3DXM94rcdcXnf@<DzlZ0aGMcgvxm4w_XZJI?+k-fz_lL|>Fje*
z$uzIgkwtpy%E%(z#=}!%$wFH*`GJ$`AUfV&ChWtKu+MF!Z<-IcmTE<zl6+48ZsFxM
zh2Y5N_x+^8&ZdQH>_+c}3=*&!@saTt_MFG-eUoiUheG9F7We<SAss$<D7>?VBMQ<}
z-hKEIaNx)PRh9Es_ZdYwedv}`FYc<^yeQz{){jt>`hSf90vyTEF&^yZ3H-+Qp7CbL
zvf8#~`uy-Qllsvh)o1gUe><7oKW2Q^cLaSil>j`j$m*NdFpaAqLC!Ml{LlaRjW=xt
z;B%GabxS%TZ$?f&%2H7L9ZD}UcU+wUdI4dR&(jNl@wtm=0gA~XN{baWR@UFGq;A=k
zSWKt*j1c$RyM2ED7>`I0&Sr0)qz!Y_Z8v6;*7d?Hz$2O6g8LyCFe#A$V)koEroEeH
zZvilB1HrZeuXt`BZxF}@nLs|<tW|Y5DY0bo<8zw6J=BN}k&a$CZ>eX7Z!;T*5w)>!
zIm{p2b_5N!2DVTVe+g;^yWY7GCg20D&g$MOR*DcEftg>u!&;@y;GJ;_-~_R*B&WJn
z?&NS?Vqy3}Xx9<sX>GN!(58s@tr<^Y8%)h%zY*(^`(S0;l$T<&WAh++21Z`0(}4a}
zXkzvi<U4U%5v14aJ5SXL@rGDBZ8+eiwtoS6S+Nte1`K=lO{Z^;99U6%`=;`p!slux
zOH2B}fg5ZblDoR`z$OBAdvEzUJUo_H+M%^%mrd>h*04y?J+KMJkA4@A$mZKRYXmx;
zV`d}qieoPfmePb=r{}TZDW6ulxZZM6^z6xGQBWV+lyg2k9B35UvLc4LrH;ig*b76X
z{fDYSFsBC$cB->eoIZqs{Q(uX0`LrZMGtUh$^~c#ws9&2dm1F2dq_S{+jRDUz7RLu
s@&`)&Hy$?zZu>uSyNlixoQ<DYCk$5m-3A|n!{1;R4b1dQFF4=*KeaXjr~m)}

literal 0
HcmV?d00001

diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 61a0423..3f4794f 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -34,8 +34,6 @@
 #include <rte_mbuf.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
 
 #include "gro_tcp4.h"
 
@@ -72,20 +70,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
 	}
 	tbl->max_item_num = entries_num;
 
-	size = sizeof(struct gro_tcp4_key) * entries_num;
-	tbl->keys = rte_zmalloc_socket(__func__,
+	size = sizeof(struct gro_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
 			size,
 			RTE_CACHE_LINE_SIZE,
 			socket_id);
-	if (tbl->keys == NULL) {
+	if (tbl->flows == NULL) {
 		rte_free(tbl->items);
 		rte_free(tbl);
 		return NULL;
 	}
-	/* INVALID_ARRAY_INDEX indicates empty key */
+	/* INVALID_ARRAY_INDEX indicates an empty flow */
 	for (i = 0; i < entries_num; i++)
-		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
-	tbl->max_key_num = entries_num;
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
 
 	return tbl;
 }
@@ -97,111 +95,11 @@ gro_tcp4_tbl_destroy(void *tbl)
 
 	if (tcp_tbl) {
 		rte_free(tcp_tbl->items);
-		rte_free(tcp_tbl->keys);
+		rte_free(tcp_tbl->flows);
 	}
 	rte_free(tcp_tbl);
 }
 
-/*
- * merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
-		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
-		int cmp)
-{
-	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t tcp_datalen;
-
-	if (cmp > 0) {
-		pkt_head = item_src->firstseg;
-		pkt_tail = pkt;
-	} else {
-		pkt_head = pkt;
-		pkt_tail = item_src->firstseg;
-	}
-
-	/* check if the packet length will be beyond the max value */
-	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
-		pkt_tail->l3_len - pkt_tail->l4_len;
-	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
-			TCP4_MAX_L3_LENGTH)
-		return 0;
-
-	/* remove packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail,
-			pkt_tail->l2_len +
-			pkt_tail->l3_len +
-			pkt_tail->l4_len);
-
-	/* chain two packets together */
-	if (cmp > 0) {
-		item_src->lastseg->next = pkt;
-		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
-		/* update IP ID to the larger value */
-		item_src->ip_id = ip_id;
-	} else {
-		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item_src->firstseg;
-		item_src->firstseg = pkt;
-		/* update sent_seq to the smaller value */
-		item_src->sent_seq = sent_seq;
-	}
-	item_src->nb_merged++;
-
-	/* update mbuf metadata for the merged packet */
-	pkt_head->nb_segs += pkt_tail->nb_segs;
-	pkt_head->pkt_len += pkt_tail->pkt_len;
-
-	return 1;
-}
-
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
-		struct tcp_hdr *tcp_hdr,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
-		uint16_t ip_id,
-		uint32_t sent_seq)
-{
-	struct rte_mbuf *pkt0 = item->firstseg;
-	struct ipv4_hdr *ipv4_hdr0;
-	struct tcp_hdr *tcp_hdr0;
-	uint16_t tcp_hl0, tcp_dl0;
-	uint16_t len;
-
-	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
-			pkt0->l2_len);
-	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
-	tcp_hl0 = pkt0->l4_len;
-
-	/* check if TCP option fields equal. If not, return 0. */
-	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
-	if ((tcp_hl != tcp_hl0) ||
-			((len > 0) && (memcmp(tcp_hdr + 1,
-					tcp_hdr0 + 1,
-					len) != 0)))
-		return 0;
-
-	/* check if the two packets are neighbors */
-	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
-	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
-			(ip_id == (item->ip_id + 1)))
-		/* append the new packet */
-		return 1;
-	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
-			((ip_id + item->nb_merged) == item->ip_id))
-		/* pre-pend the new packet */
-		return -1;
-	else
-		return 0;
-}
-
 static inline uint32_t
 find_an_empty_item(struct gro_tcp4_tbl *tbl)
 {
@@ -215,13 +113,13 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
 }
 
 static inline uint32_t
-find_an_empty_key(struct gro_tcp4_tbl *tbl)
+find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 {
 	uint32_t i;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num;
 
-	for (i = 0; i < max_key_num; i++)
-		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
 			return i;
 	return INVALID_ARRAY_INDEX;
 }
@@ -229,10 +127,11 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
 static inline uint32_t
 insert_new_item(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
+		uint64_t start_time,
 		uint32_t prev_idx,
-		uint64_t start_time)
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint8_t is_atomic)
 {
 	uint32_t item_idx;
 
@@ -247,9 +146,10 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 	tbl->items[item_idx].sent_seq = sent_seq;
 	tbl->items[item_idx].ip_id = ip_id;
 	tbl->items[item_idx].nb_merged = 1;
+	tbl->items[item_idx].is_atomic = is_atomic;
 	tbl->item_num++;
 
-	/* if the previous packet exists, chain the new one with it */
+	/* If the previous packet exists, chain them together. */
 	if (prev_idx != INVALID_ARRAY_INDEX) {
 		tbl->items[item_idx].next_pkt_idx =
 			tbl->items[prev_idx].next_pkt_idx;
@@ -260,12 +160,13 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 }
 
 static inline uint32_t
-delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
+delete_item(struct gro_tcp4_tbl *tbl,
+		uint32_t item_idx,
 		uint32_t prev_item_idx)
 {
 	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
 
-	/* set NULL to firstseg to indicate it's an empty item */
+	/* NULL indicates an empty item. */
 	tbl->items[item_idx].firstseg = NULL;
 	tbl->item_num--;
 	if (prev_item_idx != INVALID_ARRAY_INDEX)
@@ -275,53 +176,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 }
 
 static inline uint32_t
-insert_new_key(struct gro_tcp4_tbl *tbl,
-		struct tcp4_key *key_src,
+insert_new_flow(struct gro_tcp4_tbl *tbl,
+		struct tcp4_flow_key *src,
 		uint32_t item_idx)
 {
-	struct tcp4_key *key_dst;
-	uint32_t key_idx;
+	struct tcp4_flow_key *dst;
+	uint32_t flow_idx;
 
-	key_idx = find_an_empty_key(tbl);
-	if (key_idx == INVALID_ARRAY_INDEX)
+	flow_idx = find_an_empty_flow(tbl);
+	if (flow_idx == INVALID_ARRAY_INDEX)
 		return INVALID_ARRAY_INDEX;
 
-	key_dst = &(tbl->keys[key_idx].key);
+	dst = &(tbl->flows[flow_idx].key);
 
-	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
-	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
-	key_dst->ip_src_addr = key_src->ip_src_addr;
-	key_dst->ip_dst_addr = key_src->ip_dst_addr;
-	key_dst->recv_ack = key_src->recv_ack;
-	key_dst->src_port = key_src->src_port;
-	key_dst->dst_port = key_src->dst_port;
+	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
+	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+	dst->ip_src_addr = src->ip_src_addr;
+	dst->ip_dst_addr = src->ip_dst_addr;
+	dst->recv_ack = src->recv_ack;
+	dst->src_port = src->src_port;
+	dst->dst_port = src->dst_port;
 
-	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
-	tbl->keys[key_idx].start_index = item_idx;
-	tbl->key_num++;
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
 
-	return key_idx;
+	return flow_idx;
 }
 
-static inline int
-is_same_key(struct tcp4_key k1, struct tcp4_key k2)
-{
-	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
-		return 0;
-
-	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
-		return 0;
-
-	return ((k1.ip_src_addr == k2.ip_src_addr) &&
-			(k1.ip_dst_addr == k2.ip_dst_addr) &&
-			(k1.recv_ack == k2.recv_ack) &&
-			(k1.src_port == k2.src_port) &&
-			(k1.dst_port == k2.dst_port));
-}
-
-/*
- * update packet length for the flushed packet.
- */
 static inline void
 update_header(struct gro_tcp4_item *item)
 {
@@ -343,30 +224,40 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	struct ipv4_hdr *ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
 	uint32_t sent_seq;
-	uint16_t tcp_dl, ip_id;
+	uint16_t ip_id, frag_off, tcp_dl, hdr_len;
+	uint8_t is_atomic;
 
-	struct tcp4_key key;
+	struct tcp4_flow_key key;
 	uint32_t cur_idx, prev_idx, item_idx;
-	uint32_t i, max_key_num;
+	uint32_t i, max_flow_num;
 	int cmp;
 
 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
 	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
 
 	/*
-	 * if FIN, SYN, RST, PSH, URG, ECE or
-	 * CWR is set, return immediately.
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+	 * or CWR set.
 	 */
 	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
 		return -1;
-	/* if payload length is 0, return immediately */
-	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
-		pkt->l4_len;
-	if (tcp_dl == 0)
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
 		return -1;
 
-	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+	/*
+	 * Save IP ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IP ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
 	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
 
 	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
@@ -377,50 +268,55 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	key.dst_port = tcp_hdr->dst_port;
 	key.recv_ack = tcp_hdr->recv_ack;
 
-	/* search for a key */
-	max_key_num = tbl->max_key_num;
-	for (i = 0; i < max_key_num; i++) {
-		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
-				is_same_key(tbl->keys[i].key, key))
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	for (i = 0; i < max_flow_num; i++) {
+		if ((tbl->flows[i].start_index != INVALID_ARRAY_INDEX) &&
+				is_same_tcp4_flow(tbl->flows[i].key, key))
 			break;
 	}
 
-	/* can't find a key, so insert a new key and a new item. */
-	if (i == tbl->max_key_num) {
-		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
-				INVALID_ARRAY_INDEX, start_time);
+	/*
+	 * Fail to find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (i == tbl->max_flow_num) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, ip_id,
+				is_atomic);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
-		if (insert_new_key(tbl, &key, item_idx) ==
+		if (insert_new_flow(tbl, &key, item_idx) ==
 				INVALID_ARRAY_INDEX) {
-			/*
-			 * fail to insert a new key, so
-			 * delete the inserted item
-			 */
+			/* Fail to insert a new flow. */
 			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
 			return -1;
 		}
 		return 0;
 	}
 
-	/* traverse all packets in the item group to find one to merge */
-	cur_idx = tbl->keys[i].start_index;
+	/*
+	 * Check all packets in the flow and try to find a neighbor for
+	 * the input packet.
+	 */
+	cur_idx = tbl->flows[i].start_index;
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				pkt->l4_len, tcp_dl, ip_id, sent_seq);
+				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
+				is_atomic);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, ip_id,
-						sent_seq, cmp))
+						pkt, cmp, sent_seq, ip_id, 0))
 				return 1;
 			/*
-			 * fail to merge two packets since the packet
-			 * length will be greater than the max value.
-			 * So insert the packet into the item group.
+			 * Fail to merge the two packets, as the packet
+			 * length is greater than the max value. Store
+			 * the packet into the flow.
 			 */
-			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
-						prev_idx, start_time) ==
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, ip_id,
+						is_atomic) ==
 					INVALID_ARRAY_INDEX)
 				return -1;
 			return 0;
@@ -429,12 +325,9 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		cur_idx = tbl->items[cur_idx].next_pkt_idx;
 	} while (cur_idx != INVALID_ARRAY_INDEX);
 
-	/*
-	 * can't find a packet in the item group to merge,
-	 * so insert the packet into the item group.
-	 */
-	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
-				start_time) == INVALID_ARRAY_INDEX)
+	/* Fail to find a neighbor, so store the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
 		return -1;
 
 	return 0;
@@ -446,46 +339,35 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf **out,
 		uint16_t nb_out)
 {
-	uint16_t k = 0;
+	uint32_t max_flow_num = tbl->max_flow_num;
 	uint32_t i, j;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint16_t k = 0;
 
-	for (i = 0; i < max_key_num; i++) {
-		/* all keys have been checked, return immediately */
-		if (tbl->key_num == 0)
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
 			return k;
 
-		j = tbl->keys[i].start_index;
+		j = tbl->flows[i].start_index;
 		while (j != INVALID_ARRAY_INDEX) {
 			if (tbl->items[j].start_time <= flush_timestamp) {
 				out[k++] = tbl->items[j].firstseg;
 				if (tbl->items[j].nb_merged > 1)
 					update_header(&(tbl->items[j]));
 				/*
-				 * delete the item and get
-				 * the next packet index
+				 * Delete the packet and get the next
+				 * packet in the flow.
 				 */
-				j = delete_item(tbl, j,
-						INVALID_ARRAY_INDEX);
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
 
-				/*
-				 * delete the key as all of
-				 * packets are flushed
-				 */
-				if (j == INVALID_ARRAY_INDEX) {
-					tbl->keys[i].start_index =
-						INVALID_ARRAY_INDEX;
-					tbl->key_num--;
-				} else
-					/* update start_index of the key */
-					tbl->keys[i].start_index = j;
-
-				if (k == nb_out)
+				if (unlikely(k == nb_out))
 					return k;
 			} else
 				/*
-				 * left packets of this key won't be
-				 * timeout, so go to check other keys.
+				 * The left packets in this flow won't be
+				 * timeout. Go to check other flows.
 				 */
 				break;
 		}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 0a81716..9ac8bc9 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -33,17 +33,20 @@
 #ifndef _GRO_TCP4_H_
 #define _GRO_TCP4_H_
 
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
 /*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of the L3
+ * header, the L4 header and the data payload.
  */
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
 
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow */
+struct tcp4_flow_key {
 	struct ether_addr eth_saddr;
 	struct ether_addr eth_daddr;
 	uint32_t ip_src_addr;
@@ -54,77 +57,76 @@ struct tcp4_key {
 	uint16_t dst_port;
 };
 
-struct gro_tcp4_key {
-	struct tcp4_key key;
+struct gro_tcp4_flow {
+	struct tcp4_flow_key key;
 	/*
-	 * the index of the first packet in the item group.
-	 * If the value is INVALID_ARRAY_INDEX, it means
-	 * the key is empty.
+	 * The index of the first packet in the flow.
+	 * INVALID_ARRAY_INDEX indicates an empty flow.
 	 */
 	uint32_t start_index;
 };
 
 struct gro_tcp4_item {
 	/*
-	 * first segment of the packet. If the value
+	 * The first MBUF segment of the packet. If the value
 	 * is NULL, it means the item is empty.
 	 */
 	struct rte_mbuf *firstseg;
-	/* last segment of the packet */
+	/* The last MBUF segment of the packet */
 	struct rte_mbuf *lastseg;
 	/*
-	 * the time when the first packet is inserted
-	 * into the table. If a packet in the table is
-	 * merged with an incoming packet, this value
-	 * won't be updated. We set this value only
-	 * when the first packet is inserted into the
-	 * table.
+	 * The time when the first packet is inserted into the table.
+	 * This value won't be updated, even if the packet is merged
+	 * with other packets.
 	 */
 	uint64_t start_time;
 	/*
-	 * we use next_pkt_idx to chain the packets that
-	 * have same key value but can't be merged together.
+	 * next_pkt_idx is used to chain the packets that
+	 * are in the same flow but can't be merged together
+	 * (e.g. caused by packet reordering).
 	 */
 	uint32_t next_pkt_idx;
-	/* the sequence number of the packet */
+	/* TCP sequence number of the packet */
 	uint32_t sent_seq;
-	/* the IP ID of the packet */
+	/* IPv4 ID of the packet */
 	uint16_t ip_id;
-	/* the number of merged packets */
+	/* The number of merged packets */
 	uint16_t nb_merged;
+	/* Indicate if IP ID can be ignored */
+	uint8_t is_atomic;
 };
 
 /*
- * TCP/IPv4 reassembly table structure.
+ * TCP/IPv4 reassembly table structure
  */
 struct gro_tcp4_tbl {
 	/* item array */
 	struct gro_tcp4_item *items;
-	/* key array */
-	struct gro_tcp4_key *keys;
+	/* flow array */
+	struct gro_tcp4_flow *flows;
 	/* current item number */
 	uint32_t item_num;
-	/* current key num */
-	uint32_t key_num;
+	/* current flow num */
+	uint32_t flow_num;
 	/* item array size */
 	uint32_t max_item_num;
-	/* key array size */
-	uint32_t max_key_num;
+	/* flow array size */
+	uint32_t max_flow_num;
 };
 
 /**
  * This function creates a TCP/IPv4 reassembly table.
  *
  * @param socket_id
- *  socket index for allocating TCP/IPv4 reassemble table
+ *  Socket index for allocating the TCP/IPv4 reassemble table
  * @param max_flow_num
- *  the maximum number of flows in the TCP/IPv4 GRO table
+ *  The maximum number of flows in the TCP/IPv4 GRO table
  * @param max_item_per_flow
- *  the maximum packet number per flow.
+ *  The maximum number of packets per flow
  *
  * @return
- *  if create successfully, return a pointer which points to the
- *  created TCP/IPv4 GRO table. Otherwise, return NULL.
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
  */
 void *gro_tcp4_tbl_create(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -134,62 +136,55 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
  * This function destroys a TCP/IPv4 reassembly table.
  *
  * @param tbl
- *  a pointer points to the TCP/IPv4 reassembly table.
+ *  Pointer pointint to the TCP/IPv4 reassembly table.
  */
 void gro_tcp4_tbl_destroy(void *tbl);
 
 /**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload. The packet is returned if it has invalid parameters or there
+ * is no available space in the table.
  *
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==0).
  *
  * @param pkt
- *  packet to reassemble.
+ *  Packet to reassemble
  * @param tbl
- *  a pointer that points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table
  * @start_time
- *  the start time that the packet is inserted into the table
+ *  The time when the packet is inserted into the table
  *
  * @return
- *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- *  or URG bit is set, or there is no available space in the table to
- *  insert a new item or a new key, return a negative value. If the
- *  packet is merged successfully, return an positive value. If the
- *  packet is inserted into the table, return 0.
+ *  - Return a positive value if the input packet is merged.
+ *  - Return zero if the input packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
  */
 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		struct gro_tcp4_tbl *tbl,
 		uint64_t start_time);
 
 /**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
  *
  * @param tbl
- *  a pointer that points to a TCP GRO table.
+ *  TCP/IPv4 reassembly table pointer
  * @param flush_timestamp
- *  this function flushes packets which are inserted into the table
- *  before or at the flush_timestamp.
+ *  Flush packets which are inserted into the table before or at the
+ *  flush_timestamp.
  * @param out
- *  pointer array which is used to keep flushed packets.
+ *  Pointer array used to keep flushed packets
  * @param nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number in 'out'. It also determines the maximum number of
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of packets that are returned.
+ *  The number of flushed packets
  */
 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		uint64_t flush_timestamp,
@@ -201,10 +196,130 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
  * reassembly table.
  *
  * @param tbl
- *  pointer points to a TCP/IPv4 reassembly table.
+ *  TCP/IPv4 reassembly table pointer
  *
  * @return
- *  the number of packets in the table
+ *  The number of packets in the table
  */
 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
+			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
+			(k1.ip_src_addr == k2.ip_src_addr) &&
+			(k1.ip_dst_addr == k2.ip_dst_addr) &&
+			(k1.recv_ack == k2.recv_ack) &&
+			(k1.src_port == k2.src_port) &&
+			(k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+		struct tcp_hdr *tcph,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint16_t l2_offset,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	struct ipv4_hdr *iph_orig;
+	struct tcp_hdr *tcph_orig;
+	uint16_t len, l4_len_orig;
+
+	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			l2_offset + pkt_orig->l2_len);
+	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+	l4_len_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
+	if ((tcp_hl != l4_len_orig) || ((len > 0) &&
+				(memcmp(tcph + 1, tcph_orig + 1,
+					len) != 0)))
+		return 0;
+
+	/* Don't merge packets whose DF bits are different */
+	if (item->is_atomic ^ is_atomic)
+		return 0;
+
+	/* Check if the two packets are neighbors */
+	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+		pkt_orig->l3_len - l4_len_orig;
+	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
+				(ip_id == item->ip_id + item->nb_merged)))
+		/* Append the new packet */
+		return 1;
+	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
+				(ip_id + 1 == item->ip_id)))
+		/* Pre-pend the new packet */
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+	uint16_t hdr_len;
+
+	if (cmp > 0) {
+		pkt_head = item->firstseg;
+		pkt_tail = pkt;
+	} else {
+		pkt_head = pkt;
+		pkt_tail = item->firstseg;
+	}
+
+	/* Check if the length is greater than the max value */
+	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+		pkt_head->l4_len;
+	if (pkt_head->pkt_len - l2_offset - pkt_head->l2_len +
+			pkt_tail->pkt_len - hdr_len > MAX_IPV4_PKT_LENGTH)
+		return 0;
+
+	/* Remove the packet header */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+	/* Chain two packets together */
+	if (cmp > 0) {
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
+	} else {
+		lastseg = rte_pktmbuf_lastseg(pkt);
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
+		/* Update sent_seq and ip_id */
+		item->sent_seq = sent_seq;
+		item->ip_id = ip_id;
+	}
+	item->nb_merged++;
+
+	/* Update MBUF metadata for the merged packet */
+	pkt_head->nb_segs += pkt_tail->nb_segs;
+	pkt_head->pkt_len += pkt_tail->pkt_len;
+
+	return 1;
+}
 #endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 7853246..d43f8e8 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -51,11 +51,14 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, NULL};
 
+#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
+
 /*
- * GRO context structure, which is used to merge packets. It keeps
- * many reassembly tables of desired GRO types. Applications need to
- * create GRO context objects before using rte_gro_reassemble to
- * perform GRO.
+ * GRO context structure. It keeps the table structures, which are
+ * used to merge packets, for different GRO types. Before using
+ * rte_gro_reassemble(), applications need to create the GRO context
+ * first.
  */
 struct gro_ctx {
 	/* GRO types to perform */
@@ -93,7 +96,7 @@ rte_gro_ctx_create(const struct rte_gro_param *param)
 				param->max_flow_num,
 				param->max_item_per_flow);
 		if (gro_ctx->tbls[i] == NULL) {
-			/* destroy all created tables */
+			/* Destroy all created tables */
 			gro_ctx->gro_types = gro_types;
 			rte_gro_ctx_destroy(gro_ctx);
 			return NULL;
@@ -131,62 +134,55 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		const struct rte_gro_param *param)
 {
-	uint16_t i;
-	uint16_t nb_after_gro = nb_pkts;
-	uint32_t item_num;
-
-	/* allocate a reassembly table for TCP/IPv4 GRO */
+	/* Allocate a reassembly table for TCP/IPv4 GRO */
 	struct gro_tcp4_tbl tcp_tbl;
-	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
-	uint16_t unprocess_num = 0;
-	int32_t ret;
 	uint64_t current_time;
+	uint32_t item_num;
+	int32_t ret;
+	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
 
 	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
 		return nb_pkts;
 
-	/* get the actual number of packets */
+	/* Get the actual number of packets */
 	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
-			param->max_item_per_flow));
+				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
 	for (i = 0; i < item_num; i++)
-		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
 
-	tcp_tbl.keys = tcp_keys;
+	tcp_tbl.flows = tcp_flows;
 	tcp_tbl.items = tcp_items;
-	tcp_tbl.key_num = 0;
+	tcp_tbl.flow_num = 0;
 	tcp_tbl.item_num = 0;
-	tcp_tbl.max_key_num = item_num;
+	tcp_tbl.max_flow_num = item_num;
 	tcp_tbl.max_item_num = item_num;
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			ret = gro_tcp4_reassemble(pkts[i],
-					&tcp_tbl,
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl,
 					current_time);
 			if (ret > 0)
-				/* merge successfully */
+				/* Merge successfully */
 				nb_after_gro--;
-			else if (ret < 0) {
-				unprocess_pkts[unprocess_num++] =
-					pkts[i];
-			}
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 
-	/* re-arrange GROed packets */
 	if (nb_after_gro < nb_pkts) {
+		/* Flush packets from the tables */
 		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
 				pkts, nb_pkts);
+		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
 					sizeof(struct rte_mbuf *) *
@@ -202,31 +198,28 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		void *ctx)
 {
-	uint16_t i, unprocess_num = 0;
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
+	void *tcp_tbl;
 	uint64_t current_time;
+	uint16_t i, unprocess_num = 0;
 
 	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
 		return nb_pkts;
 
+	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			if (gro_tcp4_reassemble(pkts[i],
-						gro_ctx->tbls
-						[RTE_GRO_TCP_IPV4_INDEX],
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 	if (unprocess_num > 0) {
-		memcpy(pkts, unprocess_pkts,
-				sizeof(struct rte_mbuf *) *
+		memcpy(pkts, unprocess_pkts, sizeof(struct rte_mbuf *) *
 				unprocess_num);
 	}
 
@@ -252,6 +245,7 @@ rte_gro_timeout_flush(void *ctx,
 				flush_timestamp,
 				out, max_nb_out);
 	}
+
 	return 0;
 }
 
@@ -274,5 +268,6 @@ rte_gro_get_pkt_count(void *ctx)
 			continue;
 		item_num += pkt_count_fn(gro_ctx->tbls[i]);
 	}
+
 	return item_num;
 }
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index d57e0c5..c28b3a2 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -59,8 +59,8 @@ extern "C" {
 /**< TCP/IPv4 GRO flag */
 
 /**
- * A structure which is used to create GRO context objects or tell
- * rte_gro_reassemble_burst() what reassembly rules are demanded.
+ * Structure used to create GRO context objects or used to pass
+ * application-determined parameters to rte_gro_reassemble_burst().
  */
 struct rte_gro_param {
 	uint64_t gro_types;
@@ -106,26 +106,23 @@ void rte_gro_ctx_destroy(void *ctx);
 
 /**
  * This is one of the main reassembly APIs, which merges numbers of
- * packets at a time. It assumes that all inputted packets are with
- * correct checksums. That is, applications should guarantee all
- * inputted packets are correct. Besides, it doesn't re-calculate
- * checksums for merged packets. If inputted packets are IP fragmented,
- * this function assumes them are complete (i.e. with L4 header). After
- * finishing processing, it returns all GROed packets to applications
- * immediately.
+ * packets at a time. It doesn't check if input packets have correct
+ * checksums and doesn't re-calculate checksums for merged packets.
+ * It assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==1). The GROed packets
+ * are returned as soon as the function finishes.
  *
  * @param pkts
- *  a pointer array which points to the packets to reassemble. Besides,
- *  it keeps mbuf addresses for the GROed packets.
+ *  Pointer array pointing to the packets to reassemble. Besides, it
+ *  keeps MBUF addresses for the GROed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param param
- *  applications use it to tell rte_gro_reassemble_burst() what rules
- *  are demanded.
+ *  Application-determined parameters for reassembling packets.
  *
  * @return
- *  the number of packets after been GROed. If no packets are merged,
- *  the returned value is nb_pkts.
+ *  The number of packets after been GROed. If no packets are merged,
+ *  the return value is equals to nb_pkts.
  */
 uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -135,32 +132,28 @@ uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * Reassembly function, which tries to merge inputted packets with
- * the packets in the reassembly tables of a given GRO context. This
- * function assumes all inputted packets are with correct checksums.
- * And it won't update checksums if two packets are merged. Besides,
- * if inputted packets are IP fragmented, this function assumes they
- * are complete packets (i.e. with L4 header).
+ * Reassembly function, which tries to merge input packets with the
+ * existed packets in the reassembly tables of a given GRO context.
+ * It doesn't check if input packets have correct checksums and doesn't
+ * re-calculate checksums for merged packets. Additionally, it assumes
+ * the packets are complete (i.e., MF==0 && frag_off==0), when IP
+ * fragmentation is possible (i.e., DF==1).
  *
- * If the inputted packets don't have data or are with unsupported GRO
- * types etc., they won't be processed and are returned to applications.
- * Otherwise, the inputted packets are either merged or inserted into
- * the table. If applications want get packets in the table, they need
- * to call flush API.
+ * If the input packets have invalid parameters (e.g. no data payload,
+ * unsupported GRO types), they are returned to applications. Otherwise,
+ * they are either merged or inserted into the table. Applications need
+ * to flush packets from the tables by flush API, if they want to get the
+ * GROed packets.
  *
  * @param pkts
- *  packet to reassemble. Besides, after this function finishes, it
- *  keeps the unprocessed packets (e.g. without data or unsupported
- *  GRO types).
+ *  Packets to reassemble. It's also used to store the unprocessed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer
  *
  * @return
- *  return the number of unprocessed packets (e.g. without data or
- *  unsupported GRO types). If all packets are processed (merged or
- *  inserted into the table), return 0.
+ *  - The number of unprocessed packets.
  */
 uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -170,25 +163,24 @@ uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * This function flushes the timeout packets from reassembly tables of
- * desired GRO types. The max number of flushed timeout packets is the
- * element number of the array which is used to keep the flushed packets.
+ * This function flushes the timeout packets from the reassembly tables
+ * of desired GRO types. The max number of flushed packets is the
+ * element number of 'out'.
  *
- * Besides, this function won't re-calculate checksums for merged
- * packets in the tables. That is, the returned packets may be with
- * wrong checksums.
+ * Additionally, the flushed packets may have incorrect checksums, since
+ * this function doesn't re-calculate checksums for merged packets.
  *
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer.
  * @param timeout_cycles
- *  max TTL for packets in reassembly tables, measured in nanosecond.
+ *  The max TTL for packets in reassembly tables, measured in nanosecond.
  * @param gro_types
- *  this function only flushes packets which belong to the GRO types
- *  specified by gro_types.
+ *  This function flushes packets whose GRO types are specified by
+ *  gro_types.
  * @param out
- *  a pointer array that is used to keep flushed timeout packets.
+ *  Pointer array used to keep flushed packets.
  * @param max_nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number of 'out'. It's also the max number of timeout
  *  packets that can be flushed finally.
  *
  * @return
@@ -208,10 +200,10 @@ uint16_t rte_gro_timeout_flush(void *ctx,
  * of a given GRO context.
  *
  * @param ctx
- *  pointer points to a GRO context object.
+ *  GRO context object pointer.
  *
  * @return
- *  the number of packets in all reassembly tables.
+ *  The number of packets in the tables.
  */
 uint64_t rte_gro_get_pkt_count(void *ctx);
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 2/2] gro: support VxLAN GRO
  2017-12-14  2:49 ` [PATCH v2 0/2] Support " Jiayu Hu
  2017-12-14  2:49   ` [PATCH v2 1/2] gro: code cleanup Jiayu Hu
@ 2017-12-14  2:49   ` Jiayu Hu
  2017-12-14  2:58     ` Stephen Hemminger
  2017-12-14  3:02     ` Stephen Hemminger
  2017-12-22  7:25   ` [PATCH v3 0/2] Support " Jiayu Hu
  2 siblings, 2 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-12-14  2:49 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, jianfeng.tan, junjie.j.chen, stephen,
	john.mcnamara, matvejchikov, Jiayu Hu

This patch adds a framework that allows GRO on tunneled packets.
Furthermore, it leverages that framework to provide GRO support for
VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
IPv4 header, and contain an inner TCP/IPv4 packet.

VxLAN GRO doesn't check if input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it assumes
the packets are complete (i.e., MF==0 && frag_off==0), when IP
fragmentation is possible (i.e., DF==0).

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     |  31 +-
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_vxlan_tcp4.c                    | 512 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 181 ++++++++
 lib/librte_gro/rte_gro.c                           | 124 ++++-
 lib/librte_gro/rte_gro.h                           |   3 +
 6 files changed, 827 insertions(+), 25 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index f07d8f6..13280a5 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
 fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
 6864 to process the IPv4 ID field.
 
-Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets and
+VxLAN packets which contain an outer IPv4 header and an inner TCP/IPv4
+packet.
 
 Two Sets of API
 ---------------
@@ -108,7 +110,8 @@ Reassembly Algorithm
 
 The reassembly algorithm is used for reassembling packets. In the GRO
 library, different GRO types can use different algorithms. In this
-section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
+and VxLAN GRO.
 
 Challenges
 ~~~~~~~~~~
@@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors include:
 - IP ID. The IP ID fields of the packets, whose DF bit is 0, should be
   increased by 1.
 
+VxLAN GRO
+---------
+
+The table structure used by VxLAN GRO, which is in charge of processing
+VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
+similar with that of TCP/IPv4 GRO. Differently, the header fields used
+to define a VxLAN flow include:
+
+- outer source and destination: Ethernet and IP address, UDP port
+
+- VxLAN header (VNI and flag)
+
+- inner source and destination: Ethernet and IP address, TCP port
+
+Header fields deciding if packets are neighbors include:
+
+- Outer IP ID. The IP ID fields of the packets, whose DF bit in the outer
+  IPv4 header is 0, should be increased by 1.
+
+- Inner TCP sequence number
+
+- Inner IP ID. The IP ID fields of the packets, whose DF bit in the outer
+  IPv4 header is 0, should be increased by 1.
+
 .. note::
         We comply RFC 6864 to process the IP ID field. Specifically,
         we only check IP ID fields for the packets whose DF bit is 0.
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index eb423cc..0110455 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -45,6 +45,7 @@ LIBABIVER := 1
 # source files
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c
new file mode 100644
index 0000000..3269de6
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.c
@@ -0,0 +1,512 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_udp.h>
+
+#include "gro_vxlan_tcp4.h"
+
+void *
+gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_vxlan_tcp4_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_vxlan_tcp4_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_vxlan_tcp4_tbl_destroy(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
+
+	if (vxlan_tbl) {
+		rte_free(vxlan_tbl->items);
+		rte_free(vxlan_tbl->flows);
+	}
+	rte_free(vxlan_tbl);
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_item_num = tbl->max_item_num, i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (tbl->items[i].inner_item.firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_flow_num = tbl->max_flow_num, i;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
+		struct rte_mbuf *pkt,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(tbl);
+	if (item_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	tbl->items[item_idx].inner_item.firstseg = pkt;
+	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
+	tbl->items[item_idx].inner_item.start_time = start_time;
+	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
+	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
+	tbl->items[item_idx].inner_item.ip_id = ip_id;
+	tbl->items[item_idx].inner_item.nb_merged = 1;
+	tbl->items[item_idx].inner_item.is_atomic = is_atomic;
+	tbl->items[item_idx].outer_ip_id = outer_ip_id;
+	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
+	tbl->item_num++;
+
+	/* If the previous packet exists, chain the new one with it. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		tbl->items[item_idx].inner_item.next_pkt_idx =
+			tbl->items[prev_idx].inner_item.next_pkt_idx;
+		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_vxlan_tcp4_tbl *tbl,
+		uint32_t item_idx,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
+
+	/* NULL indicates an empty item. */
+	tbl->items[item_idx].inner_item.firstseg = NULL;
+	tbl->item_num--;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
+		struct vxlan_tcp4_flow_key *src,
+		uint32_t item_idx)
+{
+	struct vxlan_tcp4_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (flow_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	ether_addr_copy(&(src->inner_key.eth_saddr),
+			&(dst->inner_key.eth_saddr));
+	ether_addr_copy(&(src->inner_key.eth_daddr),
+			&(dst->inner_key.eth_daddr));
+	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
+	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
+	dst->inner_key.recv_ack = src->inner_key.recv_ack;
+	dst->inner_key.src_port = src->inner_key.src_port;
+	dst->inner_key.dst_port = src->inner_key.dst_port;
+
+	dst->vxlan_hdr = src->vxlan_hdr;
+	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
+	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
+	dst->outer_ip_src_addr = src->outer_ip_src_addr;
+	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
+	dst->outer_src_port = src->outer_src_port;
+	dst->outer_dst_port = src->outer_dst_port;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+static inline int
+is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
+		struct vxlan_tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr) &&
+			is_same_ether_addr(&k1.outer_eth_daddr,
+				&k2.outer_eth_daddr) &&
+			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
+			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
+			(k1.outer_src_port == k2.outer_src_port) &&
+			(k1.outer_dst_port == k2.outer_dst_port) &&
+			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
+			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
+			is_same_tcp4_flow(k1.inner_key, k2.inner_key));
+}
+
+static inline int
+check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
+		struct tcp_hdr *tcp_hdr,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	int cmp;
+	uint16_t l2_offset;
+
+	/* Don't merge packets whose outer DF bits are different. */
+	if (item->outer_is_atomic ^ outer_is_atomic)
+		return 0;
+
+	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
+	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
+			tcp_hl, tcp_dl, l2_offset, is_atomic);
+	if ((cmp == 1) && (outer_is_atomic || (outer_ip_id ==
+					item->outer_ip_id +
+					item->inner_item.nb_merged)))
+		/* Append the packet. */
+		return 1;
+	else if ((cmp == -1) && (outer_is_atomic || (outer_ip_id + 1 ==
+					item->outer_ip_id)))
+		/* Prepend the packet. */
+		return -1;
+	else
+		return 0;
+}
+
+static inline int
+merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id)
+{
+	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
+				ip_id, pkt->outer_l2_len +
+				pkt->outer_l3_len)) {
+		item->outer_ip_id = cmp < 0 ? outer_ip_id : item->outer_ip_id;
+		return 1;
+	} else
+		return 0;
+}
+
+static inline void
+update_vxlan_header(struct gro_vxlan_tcp4_item *item)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct udp_hdr *udp_hdr;
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	uint16_t len;
+
+	/* Update the outer IPv4 header. */
+	len = pkt->pkt_len - pkt->outer_l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Update the outer UDP header. */
+	len -= pkt->outer_l3_len;
+	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
+
+	/* Update the inner IPv4 header. */
+	len -= pkt->l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+}
+
+int32_t
+gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time)
+{
+	struct ether_hdr *outer_eth_hdr, *eth_hdr;
+	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
+	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
+	struct vxlan_hdr *vxlan_hdr;
+	uint32_t sent_seq;
+	uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
+	uint8_t outer_is_atomic, is_atomic;
+
+	struct vxlan_tcp4_flow_key key;
+	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t i, max_flow_num;
+	int cmp;
+	uint16_t hdr_len;
+
+	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
+			pkt->outer_l2_len);
+	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
+			pkt->outer_l3_len);
+	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
+			sizeof(struct udp_hdr));
+	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
+			sizeof(struct vxlan_hdr));
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+
+	/*
+	 * Check if the inner TCP header flag sets FIN, SYN, RST,
+	 * PSH, URG, ECE or CWR bit.
+	 */
+	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
+		return -1;
+	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len + pkt->l4_len;
+	/*
+	 * If the payload length is less than or equal to 0, return
+	 * immediately.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
+		return -1;
+
+	/*
+	 * Save IP ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IP ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
+	outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	outer_ip_id = outer_is_atomic ? 0 :
+		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
+	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
+	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
+	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.inner_key.src_port = tcp_hdr->src_port;
+	key.inner_key.dst_port = tcp_hdr->dst_port;
+	key.inner_key.recv_ack = tcp_hdr->recv_ack;
+
+	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
+	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
+	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
+	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
+	key.outer_src_port = udp_hdr->src_port;
+	key.outer_dst_port = udp_hdr->dst_port;
+	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
+	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	for (i = 0; i < max_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX &&
+				is_same_vxlan_tcp4_flow(tbl->flows[i].key,
+					key))
+			break;
+	}
+
+	/*
+	 * Can't find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (i == tbl->max_flow_num) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
+				ip_id, outer_is_atomic, is_atomic);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+				INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so
+			 * delete the inserted packet.
+			 */
+			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+
+	/* Check all packets in the flow and try to find a neighbor. */
+	cur_idx = tbl->flows[i].start_index;
+	prev_idx = cur_idx;
+	do {
+		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
+				sent_seq, outer_ip_id, ip_id, pkt->l4_len,
+				tcp_dl, outer_is_atomic, is_atomic);
+		if (cmp) {
+			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
+						pkt, cmp, sent_seq,
+						outer_ip_id, ip_id))
+				return 1;
+			/*
+			 * Can't merge two packets, as the packet
+			 * length will be greater than the max value.
+			 * Insert the packet into the flow.
+			 */
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, outer_ip_id,
+						ip_id, outer_is_atomic,
+						is_atomic) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+		prev_idx = cur_idx;
+		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Can't find neighbor. Insert the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				outer_ip_id, ip_id, outer_is_atomic,
+				is_atomic) == INVALID_ARRAY_INDEX)
+		return -1;
+
+	return 0;
+}
+
+uint16_t
+gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].inner_item.start_time <=
+					flush_timestamp) {
+				out[k++] = tbl->items[j].inner_item.firstseg;
+				if (tbl->items[j].inner_item.nb_merged > 1)
+					update_vxlan_header(&(tbl->items[j]));
+				/*
+				 * Delete the item and get the next packet
+				 * index.
+				 */
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in the flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_vxlan_tcp4_tbl_pkt_count(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
new file mode 100644
index 0000000..6047190
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.h
@@ -0,0 +1,181 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GRO_VXLAN_TCP4_H_
+#define _GRO_VXLAN_TCP4_H_
+
+#include "gro_tcp4.h"
+
+#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a VxLAN flow */
+struct vxlan_tcp4_flow_key {
+	struct tcp4_flow_key inner_key;
+	struct vxlan_hdr vxlan_hdr;
+
+	struct ether_addr outer_eth_saddr;
+	struct ether_addr outer_eth_daddr;
+
+	uint32_t outer_ip_src_addr;
+	uint32_t outer_ip_dst_addr;
+
+	/* Outer UDP ports */
+	uint16_t outer_src_port;
+	uint16_t outer_dst_port;
+
+};
+
+struct gro_vxlan_tcp4_flow {
+	struct vxlan_tcp4_flow_key key;
+	/*
+	 * The index of the first item in the flow. INVALID_ARRAY_INDEX
+	 * indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+struct gro_vxlan_tcp4_item {
+	struct gro_tcp4_item inner_item;
+	/* IP ID in the outer IPv4 header */
+	uint16_t outer_ip_id;
+	/* Indicate if outer IPv4 ID can be ignored */
+	uint8_t outer_is_atomic;
+};
+
+/*
+ * VxLAN (with an outer IPv4 header and inner TCP/IPv4 headers)
+ * reassembly table structure
+ */
+struct gro_vxlan_tcp4_tbl {
+	/* item array */
+	struct gro_vxlan_tcp4_item *items;
+	/* flow array */
+	struct gro_vxlan_tcp4_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow number */
+	uint32_t flow_num;
+	/* the maximum item number */
+	uint32_t max_item_num;
+	/* the maximum flow number */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a VxLAN reassembly table for VxLAN packets
+ * which have an outer IPv4 header and inner TCP/IPv4 headers.
+ *
+ * @param socket_id
+ *  Socket index for allocating the reassemblt table
+ * @param max_flow_num
+ *  The maximum number of flows in the table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a VxLAN reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ */
+void gro_vxlan_tcp4_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a VxLAN packet which has an outer IPv4 header and
+ * inner TCP/IPv4 headers. It doesn't process the packet, which has SYN,
+ * FIN, RST, PSH, CWR, ECE or URG set, or doesn't have payload. It
+ * returns the packet if there is no available space in the table.
+ *
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0), when
+ * IP fragmentation is possible (i.e., DF==0).
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  VxLAN reassembly table pointer
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters.
+ */
+int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in the VxLAN reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  Pointer that points to a TCP GRO table.
+ * @param flush_timestamp
+ *  This function flushes packets which are inserted into the table
+ *  before or at the flush_timestamp.
+ * @param out
+ *  Pointer array which is used to keep flushed packets.
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets.
+ */
+uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a VxLAN
+ * reassembly table.
+ *
+ * @param tbl
+ *  Pointer points to a TCP/IPv4 reassembly table.
+ *
+ * @return
+ *  The number of packets in the table.
+ */
+uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index d43f8e8..b96aa48 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -37,6 +37,7 @@
 
 #include "rte_gro.h"
 #include "gro_tcp4.h"
+#include "gro_vxlan_tcp4.h"
 
 typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -45,15 +46,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
 typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
-		gro_tcp4_tbl_create, NULL};
+		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_destroy, NULL};
+			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
+			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_pkt_count, NULL};
+			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
+			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
 		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
 
+#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
+		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
+		 RTE_PTYPE_TUNNEL_VXLAN) && \
+		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
+		  RTE_PTYPE_INNER_L4_TCP) && \
+		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
+		    (RTE_PTYPE_INNER_L3_IPV4 | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
+
 /*
  * GRO context structure. It keeps the table structures, which are
  * used to merge packets, for different GRO types. Before using
@@ -139,13 +153,21 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
+	/* Allocate a reassembly table for VXLAN GRO. */
+	struct gro_vxlan_tcp4_tbl vxlan_tbl;
+	struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
+		{{0}, 0, 0} };
+
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	uint64_t current_time;
 	uint32_t item_num;
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
+	uint8_t do_tcp4_gro = 0, do_vxlan_tcp4_gro = 0;
 
-	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if ((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0)
 		return nb_pkts;
 
 	/* Get the actual number of packets */
@@ -153,20 +175,46 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
-	for (i = 0; i < item_num; i++)
-		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_tbl.flows = vxlan_flows;
+		vxlan_tbl.items = vxlan_items;
+		vxlan_tbl.flow_num = 0;
+		vxlan_tbl.item_num = 0;
+		vxlan_tbl.max_flow_num = item_num;
+		vxlan_tbl.max_item_num = item_num;
+		do_vxlan_tcp4_gro = 1;
+	}
 
-	tcp_tbl.flows = tcp_flows;
-	tcp_tbl.items = tcp_items;
-	tcp_tbl.flow_num = 0;
-	tcp_tbl.item_num = 0;
-	tcp_tbl.max_flow_num = item_num;
-	tcp_tbl.max_item_num = item_num;
+	if (param->gro_types & RTE_GRO_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		tcp_tbl.flows = tcp_flows;
+		tcp_tbl.items = tcp_items;
+		tcp_tbl.flow_num = 0;
+		tcp_tbl.item_num = 0;
+		tcp_tbl.max_flow_num = item_num;
+		tcp_tbl.max_item_num = item_num;
+		do_tcp4_gro = 1;
+	}
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+		if (do_vxlan_tcp4_gro && IS_IPV4_VXLAN_TCP4_PKT(
+					pkts[i]->packet_type)) {
+			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl,
+					current_time);
+			if (ret > 0)
+				/* Merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (do_tcp4_gro && IS_IPV4_TCP_PKT(
+					pkts[i]->packet_type)) {
 			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl,
 					current_time);
 			if (ret > 0)
@@ -179,9 +227,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	}
 
 	if (nb_after_gro < nb_pkts) {
+		i = 0;
 		/* Flush packets from the tables */
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
-				pkts, nb_pkts);
+		if (do_vxlan_tcp4_gro) {
+			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
+					current_time, pkts, nb_pkts);
+		}
+		if (do_tcp4_gro) {
+			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl,
+					current_time, &pkts[i], nb_pkts - i);
+		}
 		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
@@ -200,18 +255,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 {
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
-	void *tcp_tbl;
+	void *tcp_tbl, *vxlan_tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
+	uint8_t do_tcp4_gro = 0, do_vxlan_tcp4_gro = 0;
 
-	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if ((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0)
 		return nb_pkts;
+	if (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		do_vxlan_tcp4_gro = 1;
+		vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
+	}
+	if (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) {
+		do_tcp4_gro = 1;
+		tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
+	}
 
-	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+		if (do_vxlan_tcp4_gro && IS_IPV4_VXLAN_TCP4_PKT(
+					pkts[i]->packet_type)) {
+			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (do_tcp4_gro && IS_IPV4_TCP_PKT(
+					pkts[i]->packet_type)) {
 			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
@@ -235,18 +305,26 @@ rte_gro_timeout_flush(void *ctx,
 {
 	struct gro_ctx *gro_ctx = ctx;
 	uint64_t flush_timestamp;
+	uint16_t num = 0;
 
 	gro_types = gro_types & gro_ctx->gro_types;
 	flush_timestamp = rte_rdtsc() - timeout_cycles;
 
-	if (gro_types & RTE_GRO_TCP_IPV4) {
-		return gro_tcp4_tbl_timeout_flush(
+	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
+				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
+				flush_timestamp, out, max_nb_out);
+	}
+
+	max_nb_out -= num;
+	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
+		num += gro_tcp4_tbl_timeout_flush(
 				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
 				flush_timestamp,
-				out, max_nb_out);
+				&out[num], max_nb_out);
 	}
 
-	return 0;
+	return num;
 }
 
 uint64_t
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index c28b3a2..9d676ce 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -57,6 +57,9 @@ extern "C" {
 #define RTE_GRO_TCP_IPV4_INDEX 0
 #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)
 /**< TCP/IPv4 GRO flag */
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
+/**< VxLAN GRO flag. */
 
 /**
  * Structure used to create GRO context objects or used to pass
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 2/2] gro: support VxLAN GRO
  2017-12-14  2:49   ` [PATCH v2 2/2] gro: support VxLAN GRO Jiayu Hu
@ 2017-12-14  2:58     ` Stephen Hemminger
  2017-12-14  3:02     ` Stephen Hemminger
  1 sibling, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2017-12-14  2:58 UTC (permalink / raw)
  To: Jiayu Hu
  Cc: dev, konstantin.ananyev, jianfeng.tan, junjie.j.chen,
	john.mcnamara, matvejchikov

On Thu, 14 Dec 2017 10:49:39 +0800
Jiayu Hu <jiayu.hu@intel.com> wrote:

> +
> +static inline int
> +is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
> +		struct vxlan_tcp4_flow_key k2)
> +{
> +	return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr) &&
> +			is_same_ether_addr(&k1.outer_eth_daddr,
> +				&k2.outer_eth_daddr) &&
> +			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
> +			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
> +			(k1.outer_src_port == k2.outer_src_port) &&
> +			(k1.outer_dst_port == k2.outer_dst_port) &&
> +			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
> +			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
> +			is_same_tcp4_flow(k1.inner_key, k2.inner_key));

Maybe this could be optimized with memcmp because many of the fields are contiguous.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 2/2] gro: support VxLAN GRO
  2017-12-14  2:49   ` [PATCH v2 2/2] gro: support VxLAN GRO Jiayu Hu
  2017-12-14  2:58     ` Stephen Hemminger
@ 2017-12-14  3:02     ` Stephen Hemminger
  2017-12-14  4:37       ` Hu, Jiayu
  1 sibling, 1 reply; 31+ messages in thread
From: Stephen Hemminger @ 2017-12-14  3:02 UTC (permalink / raw)
  To: Jiayu Hu
  Cc: dev, konstantin.ananyev, jianfeng.tan, junjie.j.chen,
	john.mcnamara, matvejchikov

On Thu, 14 Dec 2017 10:49:39 +0800
Jiayu Hu <jiayu.hu@intel.com> wrote:

> +	/* Don't merge packets whose outer DF bits are different. */
> +	if (item->outer_is_atomic ^ outer_is_atomic)

unlikely() here?

> +		return 0;
> +
> +	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> +	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
> +			tcp_hl, tcp_dl, l2_offset, is_atomic);
> +	if ((cmp == 1) && (outer_is_atomic || (outer_ip_id ==
> +					item->outer_ip_id +
> +					item->inner_item.nb_merged)))

More readable if you break the line at the ||

> +		/* Append the packet. */
> +		return 1;
> +	else if ((cmp == -1) && (outer_is_atomic || (outer_ip_id + 1 ==
> +					item->outer_ip_id)))

else unecessary after return. Similar line break for readabilty.

> +		/* Prepend the packet. */
> +		return -1;
> +	else
> +		return 0;
> +}
> +

maybe?
	if (cmp == 1) {
		if (outer_is_atomic ||
		    outer_ip_id == item->outer_ip_id + item->inner_item.nb_merged)
			return 1;
	} else if (cmp == -1) {
		if (uter_is_atomic || outer_ip_id + 1 == item->outer_ip_id)
			return -1;
	}
	return 0;

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 2/2] gro: support VxLAN GRO
  2017-12-14  3:02     ` Stephen Hemminger
@ 2017-12-14  4:37       ` Hu, Jiayu
  0 siblings, 0 replies; 31+ messages in thread
From: Hu, Jiayu @ 2017-12-14  4:37 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Ananyev, Konstantin, Tan, Jianfeng, Chen, Junjie J,
	Mcnamara, John, matvejchikov

Hi Stephen,

> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, December 14, 2017 11:03 AM
> To: Hu, Jiayu <jiayu.hu@intel.com>
> Cc: dev@dpdk.org; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> Tan, Jianfeng <jianfeng.tan@intel.com>; Chen, Junjie J
> <junjie.j.chen@intel.com>; Mcnamara, John <john.mcnamara@intel.com>;
> matvejchikov@gmail.com
> Subject: Re: [PATCH v2 2/2] gro: support VxLAN GRO
> 
> On Thu, 14 Dec 2017 10:49:39 +0800
> Jiayu Hu <jiayu.hu@intel.com> wrote:
> 
> > +	/* Don't merge packets whose outer DF bits are different. */
> > +	if (item->outer_is_atomic ^ outer_is_atomic)
> 
> unlikely() here?

Agree. Add it in the next version.

> 
> > +		return 0;
> > +
> > +	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> > +	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq,
> ip_id,
> > +			tcp_hl, tcp_dl, l2_offset, is_atomic);
> > +	if ((cmp == 1) && (outer_is_atomic || (outer_ip_id ==
> > +					item->outer_ip_id +
> > +					item->inner_item.nb_merged)))
> 
> More readable if you break the line at the ||

Exactly, I will change it in the next version.

> 
> > +		/* Append the packet. */
> > +		return 1;
> > +	else if ((cmp == -1) && (outer_is_atomic || (outer_ip_id + 1 ==
> > +					item->outer_ip_id)))
> 
> else unecessary after return. Similar line break for readabilty.

Correctly, I will change it.

> 
> > +		/* Prepend the packet. */
> > +		return -1;
> > +	else
> > +		return 0;
> > +}
> > +
> 
> maybe?
> 	if (cmp == 1) {
> 		if (outer_is_atomic ||
> 		    outer_ip_id == item->outer_ip_id + item-
> >inner_item.nb_merged)
> 			return 1;
> 	} else if (cmp == -1) {
> 		if (uter_is_atomic || outer_ip_id + 1 == item->outer_ip_id)
> 			return -1;
> 	}
> 	return 0;
> 

The reason to replace "if ((cmp == 1) && (outer_is_atomic ...))" with two if statements is
for readability?

But will two if statements make codes less efficient?

Thanks,
Jiayu

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 0/2] Support VxLAN GRO
  2017-12-14  2:49 ` [PATCH v2 0/2] Support " Jiayu Hu
  2017-12-14  2:49   ` [PATCH v2 1/2] gro: code cleanup Jiayu Hu
  2017-12-14  2:49   ` [PATCH v2 2/2] gro: support VxLAN GRO Jiayu Hu
@ 2017-12-22  7:25   ` Jiayu Hu
  2017-12-22  7:25     ` [PATCH v3 1/2] gro: code cleanup Jiayu Hu
                       ` (3 more replies)
  2 siblings, 4 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-12-22  7:25 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, junjie.j.chen, konstantin.ananyev, stephen,
	ferruh.yigit, lei.a.yao, Jiayu Hu

VxLAN is one of the most widely used tunneled protocols. Providing GRO
support for VxLAN-encapsulated packets can benefit many per-packet based
applications, like Open vSwitch.

This patchset is to support VxLAN GRO. The first patch cleans up current
gro codes for the sake of supporting tunneled GRO. The second patch
supports GRO on the VxLAN packets which have an outer IPv4 header and an
inner TCP/IPv4 packet.

Change log
===========
v3:
- remove needless check
- add "likely()" and "unlikely()" to optimize branch prediction
- fix a bug in merge_two_tcp4_packets(): for VxLAN packets, check if
  the outer IPv4 packet length is less than or equal to UINT16_MAX,
  rather than the inner IPv4 packet length.
- fix a bug in rte_gro.h: change RTE_GRO_TYPE_SUPPORT_NUM to 2
- Avoid inserting timestamp in rte_gro_reassemble_burst(), since all
  packets in the tables will be flushed.
- fix typos
v2:
- comply RFC 6848 to process IP ID fields. Specifically, we require the
  IP ID fields of neighbor packets whose DF bit is 0 to be increased by
  1. We don't check IP ID for the packets whose DF bit is 1.
  Additionally, packets whose DF bits are different cannot be merged.
- update the programmer guide and function comments

Jiayu Hu (2):
  gro: code cleanup
  gro: support VxLAN GRO

 .../prog_guide/generic_receive_offload_lib.rst     | 269 ++++++-----
 doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231 bytes
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_tcp4.c                          | 330 +++++--------
 lib/librte_gro/gro_tcp4.h                          | 253 +++++++---
 lib/librte_gro/gro_vxlan_tcp4.c                    | 515 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
 lib/librte_gro/rte_gro.c                           | 199 +++++---
 lib/librte_gro/rte_gro.h                           |  97 ++--
 9 files changed, 1337 insertions(+), 511 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.png
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

-- 
2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/2] gro: code cleanup
  2017-12-22  7:25   ` [PATCH v3 0/2] Support " Jiayu Hu
@ 2017-12-22  7:25     ` Jiayu Hu
  2017-12-29  3:53       ` Chen, Junjie J
  2018-01-02 11:26       ` Bruce Richardson
  2017-12-22  7:25     ` [PATCH v3 2/2] gro: support VxLAN GRO Jiayu Hu
                       ` (2 subsequent siblings)
  3 siblings, 2 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-12-22  7:25 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, junjie.j.chen, konstantin.ananyev, stephen,
	ferruh.yigit, lei.a.yao, Jiayu Hu

- Remove needless check and variants
- For better understanding, update the programmer guide and rename
  internal functions and variants
- For supporting tunneled gro, move common internal functions from
  gro_tcp4.c to gro_tcp4.h
- Comply RFC 6864 to process the IPv4 ID field

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     | 246 ++++++++-------
 doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231 bytes
 lib/librte_gro/gro_tcp4.c                          | 330 +++++++--------------
 lib/librte_gro/gro_tcp4.h                          | 253 +++++++++++-----
 lib/librte_gro/rte_gro.c                           |  98 +++---
 lib/librte_gro/rte_gro.h                           |  92 +++---
 6 files changed, 518 insertions(+), 501 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.png

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index 22e50ec..c2d7a41 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -32,128 +32,162 @@ Generic Receive Offload Library
 ===============================
 
 Generic Receive Offload (GRO) is a widely used SW-based offloading
-technique to reduce per-packet processing overhead. It gains performance
-by reassembling small packets into large ones. To enable more flexibility
-to applications, DPDK implements GRO as a standalone library. Applications
-explicitly use the GRO library to merge small packets into large ones.
-
-The GRO library assumes all input packets have correct checksums. In
-addition, the GRO library doesn't re-calculate checksums for merged
-packets. If input packets are IP fragmented, the GRO library assumes
-they are complete packets (i.e. with L4 headers).
-
-Currently, the GRO library implements TCP/IPv4 packet reassembly.
-
-Reassembly Modes
-----------------
-
-The GRO library provides two reassembly modes: lightweight and
-heavyweight mode. If applications want to merge packets in a simple way,
-they can use the lightweight mode API. If applications want more
-fine-grained controls, they can choose the heavyweight mode API.
-
-Lightweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble_burst()`` function is used for reassembly in
-lightweight mode. It tries to merge N input packets at a time, where
-N should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
-
-In each invocation, ``rte_gro_reassemble_burst()`` allocates temporary
-reassembly tables for the desired GRO types. Note that the reassembly
-table is a table structure used to reassemble packets and different GRO
-types (e.g. TCP/IPv4 GRO and TCP/IPv6 GRO) have different reassembly table
-structures. The ``rte_gro_reassemble_burst()`` function uses the reassembly
-tables to merge the N input packets.
-
-For applications, performing GRO in lightweight mode is simple. They
-just need to invoke ``rte_gro_reassemble_burst()``. Applications can get
-GROed packets as soon as ``rte_gro_reassemble_burst()`` returns.
-
-Heavyweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble()`` function is used for reassembly in heavyweight
-mode. Compared with the lightweight mode, performing GRO in heavyweight mode
-is relatively complicated.
-
-Before performing GRO, applications need to create a GRO context object
-by calling ``rte_gro_ctx_create()``. A GRO context object holds the
-reassembly tables of desired GRO types. Note that all update/lookup
-operations on the context object are not thread safe. So if different
-processes or threads want to access the same context object simultaneously,
-some external syncing mechanisms must be used.
-
-Once the GRO context is created, applications can then use the
-``rte_gro_reassemble()`` function to merge packets. In each invocation,
-``rte_gro_reassemble()`` tries to merge input packets with the packets
-in the reassembly tables. If an input packet is an unsupported GRO type,
-or other errors happen (e.g. SYN bit is set), ``rte_gro_reassemble()``
-returns the packet to applications. Otherwise, the input packet is either
-merged or inserted into a reassembly table.
-
-When applications want to get GRO processed packets, they need to use
-``rte_gro_timeout_flush()`` to flush them from the tables manually.
+technique to reduce per-packet processing overheads. By reassembling
+small packets into larger ones, GRO enables applications to process
+fewer large packets directly, thus reducing the number of packets to
+be processed. To benefit DPDK-based applications, like Open vSwitch,
+DPDK also provides own GRO implementation. In DPDK, GRO is implemented
+as a standalone library. Applications explicitly use the GRO library to
+reassemble packets.
+
+Overview
+--------
+
+In the GRO library, there are many GRO types which are defined by packet
+types. One GRO type is in charge of process one kind of packets. For
+example, TCP/IPv4 GRO processes TCP/IPv4 packets.
+
+Each GRO type has a reassembly function, which defines own algorithm and
+table structure to reassemble packets. We assign input packets to the
+corresponding GRO functions by MBUF->packet_type.
+
+The GRO library doesn't check if input packets have correct checksums and
+doesn't re-calculate checksums for merged packets. The GRO library
+assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
+fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
+6864 to process the IPv4 ID field.
 
-TCP/IPv4 GRO
-------------
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+
+Two Sets of API
+---------------
+
+For different usage scenarios, the GRO library provides two sets of API.
+The one is called the lightweight mode API, which enables applications to
+merge a small number of packets rapidly; the other is called the
+heavyweight mode API, which provides fine-grained controls to
+applications and supports to merge a large number of packets.
+
+Lightweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+The lightweight mode only has one function ``rte_gro_reassemble_burst()``,
+which process N packets at a time. Using the lightweight mode API to
+merge packets is very simple. Calling ``rte_gro_reassemble_burst()`` is
+enough. The GROed packets are returned to applications as soon as it
+finishes.
+
+In ``rte_gro_reassemble_burst()``, table structures of different GRO
+types are allocated in the stack. This design simplifies applications'
+operations. However, limited by the stack size, the maximum number of
+packets that ``rte_gro_reassemble_burst()`` can process in an invocation
+should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
+
+Heavyweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+Compared with the lightweight mode, using the heavyweight mode API is
+relatively complex. Firstly, applications need to create a GRO context
+by ``rte_gro_ctx_create()``. ``rte_gro_ctx_create()`` allocates tables
+structures in the heap and stores their pointers in the GRO context.
+Secondly, applications use ``rte_gro_reassemble()`` to merge packets.
+If input packets have invalid parameters, ``rte_gro_reassemble()``
+returns them to applications. For example, packets of unsupported GRO
+types or TCP SYN packets are returned. Otherwise, the input packets are
+either merged with the existed packets in the tables or inserted into the
+tables. Finally, applications use ``rte_gro_timeout_flush()`` to flush
+packets from the tables, when they want to get the GROed packets.
+
+Note that all update/lookup operations on the GRO context are not thread
+safe. So if different processes or threads want to access the same
+context object simultaneously, some external syncing mechanisms must be
+used.
+
+Reassembly Algorithm
+--------------------
+
+The reassembly algorithm is used for reassembling packets. In the GRO
+library, different GRO types can use different algorithms. In this
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
 
-TCP/IPv4 GRO supports merging small TCP/IPv4 packets into large ones,
-using a table structure called the TCP/IPv4 reassembly table.
+Challenges
+~~~~~~~~~~
 
-TCP/IPv4 Reassembly Table
-~~~~~~~~~~~~~~~~~~~~~~~~~
+The reassembly algorithm determines the efficiency of GRO. There are two
+challenges in the algorithm design:
 
-A TCP/IPv4 reassembly table includes a "key" array and an "item" array.
-The key array keeps the criteria to merge packets and the item array
-keeps the packet information.
+- a high cost algorithm/implementation would cause packet dropping in a
+  high speed network.
 
-Each key in the key array points to an item group, which consists of
-packets which have the same criteria values but can't be merged. A key
-in the key array includes two parts:
+- packet reordering makes it hard to merge packets. For example, Linux
+  GRO fails to merge packets when encounters packet reordering.
 
-* ``criteria``: the criteria to merge packets. If two packets can be
-  merged, they must have the same criteria values.
+The above two challenges require our algorithm is:
 
-* ``start_index``: the item array index of the first packet in the item
-  group.
+- lightweight enough to scale fast networking speed
 
-Each element in the item array keeps the information of a packet. An item
-in the item array mainly includes three parts:
+- capable of handling packet reordering
 
-* ``firstseg``: the mbuf address of the first segment of the packet.
+In DPDK GRO, we use a key-based algorithm to address the two challenges.
 
-* ``lastseg``: the mbuf address of the last segment of the packet.
+Key-based Reassembly Algorithm
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:numref:`figure_gro-key-algorithm` illustrates the procedure of the
+key-based algorithm. Packets are classified into "flows" by some header
+fields (we call them as "key"). To process an input packet, the algorithm
+searches for a matched "flow" (i.e., the same value of key) for the
+packet first, then checks all packets in the "flow" and tries to find a
+"neighbor" for it. If find a "neighbor", merge the two packets together.
+If can't find a "neighbor", store the packet into its "flow". If can't
+find a matched "flow", insert a new "flow" and store the packet into the
+"flow".
+
+.. note::
+        Packets in the same "flow" that can't merge are always caused
+        by packet reordering.
+
+The key-based algorithm has two characters:
+
+- classifying packets into "flows" to accelerate packet aggregation is
+  simple (address challenge 1).
+
+- storing out-of-order packets makes it possible to merge later (address
+  challenge 2).
+
+.. _figure_gro-key-algorithm:
+
+.. figure:: img/gro-key-algorithm.*
+   :align: center
+
+   Key-based Reassembly Algorithm
+
+TCP/IPv4 GRO
+------------
 
-* ``next_pkt_index``: the item array index of the next packet in the same
-  item group. TCP/IPv4 GRO uses ``next_pkt_index`` to chain the packets
-  that have the same criteria value but can't be merged together.
+The table structure used by TCP/IPv4 GRO contains two arrays: flow array
+and item array. The flow array keeps flow information, and the item array
+keeps packet information.
 
-Procedure to Reassemble a Packet
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Header fields used to define a TCP/IPv4 flow include:
 
-To reassemble an incoming packet needs three steps:
+- source and destination: Ethernet and IP address, TCP port
 
-#. Check if the packet should be processed. Packets with one of the
-   following properties aren't processed and are returned immediately:
+- TCP acknowledge number
 
-   * FIN, SYN, RST, URG, PSH, ECE or CWR bit is set.
+TCP/IPv4 packets whose FIN, SYN, RST, URG, PSH, ECE or CWR bit is set
+won't be processed.
 
-   * L4 payload length is 0.
+Header fields deciding if two packets are neighbors include:
 
-#.  Traverse the key array to find a key which has the same criteria
-    value with the incoming packet. If found, go to the next step.
-    Otherwise, insert a new key and a new item for the packet.
+- TCP sequence number
 
-#. Locate the first packet in the item group via ``start_index``. Then
-   traverse all packets in the item group via ``next_pkt_index``. If a
-   packet is found which can be merged with the incoming one, merge them
-   together. If one isn't found, insert the packet into this item group.
-   Note that to merge two packets is to link them together via mbuf's
-   ``next`` field.
+- IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
+  be increased by 1.
 
-When packets are flushed from the reassembly table, TCP/IPv4 GRO updates
-packet header fields for the merged packets. Note that before reassembling
-the packet, TCP/IPv4 GRO doesn't check if the checksums of packets are
-correct. Also, TCP/IPv4 GRO doesn't re-calculate checksums for merged
-packets.
+.. note::
+        We comply RFC 6864 to process the IPv4 ID field. Specifically,
+        we check IPv4 ID fields for the packets whose DF bit is 0 and
+        ignore IPv4 ID fields for the packets whose DF bit is 1.
+        Additionally, packets which have different value of DF bit can't
+        be merged.
diff --git a/doc/guides/prog_guide/img/gro-key-algorithm.png b/doc/guides/prog_guide/img/gro-key-algorithm.png
new file mode 100644
index 0000000000000000000000000000000000000000..89cf427b3c7c406d7ddfb485d3cc5122b2206c1f
GIT binary patch
literal 28231
zcmd?Qc|4n2`~RD^yTjJ%q@_x!c9)i-F;k)~rGufZA&SsSwWd%*43R{42Q_QADn%)3
zN=mjCF-8XxLy0M-ASDP%QA2`6_}yvu^L?J*bAIQ%&iVWN(N`L|?{%;1TG#bi*IMg+
zM_;ot7Z*D$1_FV^FJCgf0RnC2f<T*aJGKFTi4CaH06sPa-Y~xaDruLU2EP1u@4V%C
z5U4b9=i2S9!1v$pU%D9x0__hH{o6EhS3VR3LJVCtJ#QQ8GE?VQsLBc(IpS6qKP21t
zSMeX8B`5j~w2tXDgHMi@75Z84FTcVh#b-=iBwv<Mvh&7YK1|<XK6k#@G?}d5bN_<H
z-yaon{D1tZ74P*}_ThfhYZrE^UTTO$@4c`oVsF{8huHb;7zY9&4|#-9jWpK$(3@7i
zoc4;b)tKk9Xt=nRes8vF^3{NMM5~y><GmMlC}?@!X72+&AMZ`8Ji`@zUf5yjoQnGS
z<6}iN3!|SuUXYN=m;3o+cJxl0MA4m(I~tx8ng6`QW9)O!@n6mP|KBucu%yaZP{r5G
zR(mf_kLU5Xidwjqa)0Lz<c^>1WuMwXd~ogQuRf$b{D1ht<A9Oi`v>pEu+@bL@2t7{
zjdfZvaRk+t%3i*jqE_sJo^M2ELh2S;R5%O1^+KMv`%ow|9v?ne7lP!nX~;}xzpE_-
zVOf3ajVdqdBGtIQnS1w?YikeQf>>l{W=CiV|6WSb(H}X@Z4A>a2z&G=U~V?^V7^>@
zjz@|6ix1Lm{V#}rB+16#f3j=V#@eWSz0RF&XH{G3k*x#0;pqO>y(Kwc<`G-*qV6Aq
z>b=i9NwhZ5FZa>KY!c0$Th>8o?oIAixuaZmow!$PzqyGY)3?ZQ=3eeBU)d(BwG*pB
zf7w-@wOEL_IilNwxi(x{Td23YdQ%kh&AJD7WV%@6kBNOheGmJr`sVr_8}&OLH7=#s
zU1X+fZ>T=<{xZUiiCGxXi}hhYeR;g<bn<iY3SMG9HYyvcj|ux@_|D0!UD&Hvr2gH+
z0ZR<`%H@}?uFa7Z<#ji-W(RuwY^+jjBQ1q{P1MVM9h9rS_F6vc`t4r>=WzhI0-l4w
z=H7Wzj)+otS*x6BS=Ty^ayw(HY~n7iy+luVGzcwc+(<hodJKEN@22=;=>sqIi|;Ux
z*daJK@FVQV+fqflax0M1sAgShdd_<l{Uf3%E4*5{RVyZ;t;d_LGf_4kV9HRgd;Bxl
zIy`|eF?-%t?qVa21H07E*DMb7)MRv{gm=@^)76Sjiv~-f=fu$F!xwcvxJ5mOEXlW^
z8qr^kUm#yqOM4Cn6<S{LE3WlZp(*{0;_Amv=W<V`@v~hgPL{d8uq^GWwihN-@vLL%
zd<QEAu4)0-W(S#(7q6z5ybq+Srbi6F32Nrz<oR`fwft{GFxTgD2j%X|jHy^KJwBpQ
zH%JMy&0N2kWJ$=}+z<hwxeo~$cVX=g8|W*-9}s2D*1N+3GVkhOJ;CMb^yYt??ZbAr
z&z(ke5@U?s?vlAHba{pHyre(Ym3KAWXyjy`yN&vE<i`nnq&b{%5%1Ev`k!W?F6Z0V
zo5S%mWbu_fLM9EjOWLzeY3|^Hv)_s2-1Hr<0#lCLmRKO6;V%dp+`o8%bVvM*R{<|k
zS8~vN)oE_RJWO7c$k}S6k{u@xyOsRZhK}2S^K*aYsX`Fyb5HK|U!$?t!00x+^W;C-
z|2p#w*W<uH0$^qRe-;r{s}KfenVlIvJAf92(%v)@aF>5q&Bjj`c&L3Ic+>d%FG;BS
z{oW%<rC$;>shA<|nDLK1q~F)^@c;Ex58wa4-4LWVJyeZeU6_;r#wlrk`?fY0_1Yhw
z+202~^!;6wNC#Y7W$F9410r%z$22f(lG!zLBthCMs&j3qf6`8cGD47*XqcwP0R@kJ
zW<U1fl`hqA@vEZL1rOKhjGy;412Qj}>s6j<htdiW2`;CqB#UaivBn+=pMwnpipoI%
zs_R{_3A!!-MA#9VEzCl(i!2lT*d>6>zf^j-_rla2(}s(&*UKHL%0_85K5TVZb(s!J
zW1djE&{AGT_p8SshCY<I)K!mIuet)d&DEJHm{BemG+%%Kp<!j8iYhzVNJH6AckTxy
zZYlhwJu^8!>!tlzx#c#}Ukfe$95aQhf`6NG!@2lgvYhI99d2<l^73?|5wCx;+e_$Z
z1;sY2thzK3%fv;IrKE0Q<d{(=>Xl=R54$EV*kZW_!yQ4taA~|w3e`ONv&Enww|p0=
zmrvzT9*BJ3nwbikxbD#3yVpUWt7BhZY2pJ}?ZxoXEqe_APKYPe@dg!dB&esOgg%$2
zb;e8<ocxXPgOPv>ei#%MZSec*N!0-pE$qp3KbP$&5GYhrcQ~k<-DxflqkJtbFD|;5
z2h6YeMUP(bl`=!e#5d{|hZ*xc8WsI*JG6coFMm@wz>4tXmMa1O%etqw1$0`hk+iW+
z&vKxAj+nPre+SQHzLh*j`td~FCud4;u(#)`O;~{oX6*MfAXynDp=qx{R8e0{(pla}
zNPQr;(R?E;%zq2$K`ZCbDuy+JRP}8sOvr;-)YS{~%M<OVtvPhR(9OF*HsjB%?-X}h
z3f0#>tG|=kcu7Qa*iZ=XPnzq-d_imMArPqcxk}c`H-lj1NbHn}{GcgKSQcB9k}x06
z4ih!-?QK?*V{RIX&`~e^>I8M=f5o_)Zvwq(%UYYH5=e_j{G2FV+vU;sC14?aCHXF0
zka^<{F(IqJce(Fs@q6Au>j#@|wa6|+ztc`D>Jy7OTx6%X%Lw#wwy8h7aeba5&oCXB
zh*A0oIwQ4v-n92{V-H7o)Of$$$6|v`AkdfV-=G$>+(D=-1}T9^QOX2u`8ae)05ld;
z)F-c6bQ|?Yi<lVbqh9Ssjo(d5?^AhfC~(~g(RI%H4H5&_0hc^^5F)z?L}nJL0K;XE
zv12OKgGUcFO56gao`oAfAEx&R68~OGsk3{u33MxD={H+2Z@M6B#B|s_3h3A0zmd_$
zYKTRBu5&mr-?0=|3IhFoHf!xiBQmZINKBpUA=08@I{ha&t7#U&<}%**9KuN5qd9;;
z{jZHH*8(|*!bd+pvFv$WFB+1oZts-dhXbMJ?W=9Nv6Aa`5(N6*zF03PLdMYq6Ib_%
z9CkU5YZ~llL!fN++Og5R2XqVVvcAA^oemXp`}}fd*F>Fn*bg@nbsoPj$LkNEjYQ`6
z8$s)9tH=m$vL6j7Tfat^W;VI8H-QplDFc|G4dJq@3X4s{<E32D{zzuLZi%SCvP}3g
zy)e-yIAgo6+)8l4{4Ki&^gtinD{<2T7^de;y&C-5(*lup@IUMlg40pU#KSv*E5<5`
ziD$THf3WJg^Rz9-+%I_f)j#V4i#NuM=1?rQQpA)1S}*i7uI1y^jhtu0cMK&M^Wz1r
zODE<9^}G*wnGs$ucbSwq4(^YvUl~&;uwn*_s%$3)o<?kR$~#fF3*U9!M(L5iD;IyA
z`>;X!+_MD~#QgbCr=yxL<Iy{%b)>)IGD<)7L?nfx6Oe{J)r|^qy7^daDCs*6Vae_P
zk6X5M#^B<r`M4~{q7=_K{HK;&JJd$V8GlBiYp99-Q;^Mur9%;=hq{lF1RKX3*BG!`
zoa=&x440dA1=po9F|&2cBi@8LTNF@Sjy!X%_f9XnV>7qnoYyAqMx8%KJvN5Cj=i|3
z>8irLIdTFe7yhd9zD{zOs^0;+55X33)^g}2=5dkyIjtpB-QB30WmP?pr#ie;RMh?3
zujLIg-Js<b<rQ}ME|Jd0Ts6{Ciwfc0;`EU<pYO@}Yhy(PnS(yC;>BsRb99ji+X;pl
zUZ?Z7<;z*gq&u!k`6VSjH}BkR!_!(g4cUJDO8TRszRzhD6htaNekcm}XW+RcSdt57
zs5gUi_nZp(sY87HEw;kvCrGSb`z#J&QCAUG4!NobGacw?9fEcdXQ%GoO6RE%3dwF>
zx7ed)rI!<3#`}y{B{&Gj;>55gM%HGYfoHfQa>Uzg-GJk`ZS9-37ll4Ot61%M?Z|xc
zkzpgj9^Yh&gvPcO!^@iOQk}y^a`lbXK@051j7hQy5|6>|R|v{cxwMkNpf8a28S?|B
zM+(F=6kW7>b}wHP-j{bySoR;l*uu~Brn(Q!RjSIf4BPodsJfPC{sLuN`*Nxp<(5qF
zYX*&npunv{zX#JQ@?98lHqChM%MB1QnNiK{bs~DzFT?<TGDDV755C5;y&);1LD7oC
z6h*QSU0Vw~8SI@-m{~>bTRwZY2Mg2GlJu`3Sk5v$$H2qpseC!KhU{m(rUQd-$~u03
z3k_QeH=Oqgb!<RA-ev0w!~E%x<dTmY?CW>tgp}7Qn@@L++f`=N5(W#GWF5w$*-I-`
zd!$2Hy70K0JBh}=v5LJaDhIu6v~}!Ua^J&8FbSWomCcnbI(<JY7x)=7Y)4PyzYNgF
z*5zB*Vl^4wcN*ry`YLxk{lu~{)R{k=mI(bSPslqqtK>EaW>!f{y+h9wm24FJNePml
zAuj8Vo7swI7-%f8)8}j{e;k&Rktry0u{2~{>qnPw&a-~+GZd#(^6V59>G@<B;Ym|4
znF~HiSZI_vk~|&UfNj)xJohrWWW6YhWYTMmN#Nf3Fe%YJJ^P_`Z*jGzPP~`-kZT<H
zE-dujSVg~_(00`yZA%ak4E-5Vi)Dj(3q1X8CWqkIL~XgibC?4)UNSZe6xXa}Vrhct
z$R)n5%{d%fbp16atY)%o@;!BHHba}Sh~3wykR{KTrhX86tyyCj7UYDqA{*ExbiwSO
zz?XD9vQ})Z3f0PCm}{B6mBFK-x64$?XTf)do-8btgL4q=N;pG<ZC7kwB?RuHO$&+9
zVI$^x?Wnae9m<f{1Xb{R-}CD?)1YDKF$rN)HpLv%Gap0$?u2`9wS|cJ50_<r%lrY)
z{(byS;qNXJ`&y(PS>lgCnsMKXG|)GYTX7E1gF|-2t``NI-=dU~ADwGJ(3(f=5}<eO
zT!&_Zqj2>(b|GZS(G<<zROG0C6y1>Vc+h^U_GqammjUn5bqzM!hAI3Co?Lcdg0~y)
zu3=xx$4*@_%3p88%m+6nc%U`X7-6pa;J4o%{7_bhm+O?$T<fkx_&eRDFLtp4cXH2T
zJmbBk!oIq#HJ6cL{wj$k7Y_>VD8F_o-c2DvDW!fJm}szWE=0w8HeAhwDM7zE3Qt`&
z;?3)8`3fL|esg|J@r8%=kcOX}{`7?gy~jISepEKvFMh-IWAQ_l4sz8>f4gS+d8KR*
zX?&FR>Ft}CH9lM=U%$}@GSDp97d3Hr0Rty&nHuC&?kHT6*!W-&sqj5LW!5PfLIzL4
z->n+;hJM|Y_@%_)MbYwEF3c5q>UHl~ZkkNym$ZAyod#O8Dmh~KItLvcyTyPt#Phio
zG~y9%5sPVK#kK@sX%f_Xjc4Lx#Uqe%RmB&OD+X_Z8_=-QdpmRxUa>7veKOor`Qbxj
z>zGQ6Ni(x>9ToTV8Ra~s#Z!Nqz<?7(ljz12gZ+KT7liRhQ?zA#;Q4Wl3h69ELLvse
zJDyHT<k1B(B9XE>V*LZQ#dc@b0TXD*ktoNR`(gT^#<Farz6TjWI=V}{L?>bQX0otJ
zDPU)x%v>eRtPd7y7t%0e^PoxU1XyIh!I%z}DEm$Wv-sYd6z9K--ygi@ICI3?aPk3P
zDVUaRQP;CPql?YLrt{85XiJ*Rhh;`b^8P4n=T^eXCQIUD7RWQ1i(xfmuPPcVCEKp`
zg%Qv?cUFgxPEO$Nvi1puQ1Cg#l*XOTm1t$_okaC986$3M>Cm`$=GZN(M0g$p*?nMf
z%0b^qE;i$(;Dp<NoP4L0#PW#>1^HGzzl#>F%-q`9NBj~9c|A7tU4aJV>E{`SxnM>p
zUbW9|OM#Wjo7q_F80F#|YFTA%=;lKR#uJB^TSj%HOeUzzURt}@>u*)E#5`@u9NgKl
zV7=6PFDiDL2u=(|R<=>9@H3xR9pN3?W+fq6@D6htQO!{({V&rMEbHnET=4U><sJ?_
zU@7pQ>Ek41j|Tng1lZ&Co{^EhOs|Nn6vwunIj}1H!v|`TIWc=~lBB|0x~(Y{?-=Xx
z4Ywp2W;KpQIa?a)=ojM?JNsD%OI24E51G4FXIVs*q<;WMA)16=XO?&+rLj{hWVDTJ
zmdPk@E?3A~kNi2+eU!-ap19>?sE;p;fpm{8DCg$S)a-2yIQp)|FMDnE?X1_mCyz`|
zQxMZ)g?7VasYmwJ*Z68anh?3zN=t#a6O{3Iv7G<ETPgP+R*pwlod}$(Xc_lT9`U4M
zRjcB>PDhR{9u4eTe2avHhBMqQdq)sryzJg{(7bh%ukZgZilGCWGAg{F(ZJBA_@~#`
zhuR?v8HMBqYn8|RnHj8Cm1_0P=!qJFl}#MC&6X^<d;A^7?G;>t(?aPA{1h54msO%K
z9WOXQVO^^f3OwrT>J@ZzyuQia-`lU)zf_8NR=y9h^-2o#wW(Dro%kuSE^?b!)-CW+
zvBZgv8jtPKp}Mmg)q^)^s=$=H<76F25fKOG<l?@3dgqt%n!wJTI()ofCU6)3K7;p~
z0gtE1UfXE6F}`jO2fsjP>te`3K92JZbbRFz&l&X`{Tn29Mj*eUAy^#Y4gEgtHdQ4z
zeK<wZH>+r&(m9(^XEawMUW$GWO=fzlEF1<xm4$GdFR4^}o_JYSesso!+`BX(lBWZ+
zUw}34^3mf{zyGm#eZn&vgs)kb^+32q4xMb^FE+bZZ!WRUB;6W%GWQoPK_TQL<dfUE
z1Ei+i7)F~gM(Xx*fhbI!JZc5JgTNdsKkGE4v-EiqZ6|-R?@&CDn}@uAO-MqCtIq{a
z@r^njzM&I#i#}6r-&1W_s6JlYVv)LCb;a4lCd?8K5zXb6e+p{MRI{C}5QrbCkD}kh
z8Ty~*)(}}3qQXmczJ=GjtB4<NFZ80*Z}gp507o;A*J7Ib(@3=x2#{!)exPR9r1x!y
zbgu&0UZPb(m7(voVee?V>htU(q$<w(z4EteHCR92MWL%XC#k^T)KEeRPb_(qM}FWj
z)JqC*cGby(ymEpo;HojZX!VtKZtuXH&&<6<@w&hDUA0L$esMftHi^@&ryNey{|0u9
z84^{2Ky?#>pz38!h9&#Hm|awxJmP3AWkT<e3U^$Qm3zPU)=-~v@e`3Tv-pm=5jg@P
zXKt|LiByXQyZhOai3H~>MI#~cM=)#)lUGu;pNXXvz~zCY!hSz)Av_m!!TWRQ*p$_c
zBztm0SeY0O?IOKj)rNV{COgSg>2Yh<<MNw982*6R3(TgzPnRYN<5tGcA&Y!dvJJt*
zg^1Y{EbeQ62<?dF8!TX^ZI&RZwUbJ!&^t*aUl^~pE1|qs=`bZ9`k+?pH<VxD`7{0O
zm@0g-2LpcQx0zhwa69bR<k@3fdZ4PF=3o)ecAgu|^d~8lUS+*nAP17?Z#Au}Mpk@6
za$kQqn6SlUEIw;njB?!*dS5v~puAK=RPk-Z)cBrzK^&BuM`|2%9SurmBx5hRy<T-7
z&+i*4S&<`BN@E+mA9;kmh0FcRc1aJ7$=`?Fn%Kpg-)_0NlLp<&j-vM*ypOn1b1l7Q
zGtO#|w5~F&M8pzWt{<WOF5_A5-gMDqy43geROxrgs;^-3yC)Q|Z})t6MG5J5mUsNz
zGw>km&@OkzKT~GkuO!pP1=e|olUKE<b(So5_efme<#`8IcKb5q!*u!#dImRm4g3eM
z><7uq8#>hNpV!j;85uOgA&Cj)y2r#mJS&Yu*wKA-%KiF6?fOze(YPhd*t|}I!D49~
z^SGNMsaKgme-~B=nee{gZ6&w3A}h8Gm%q&pIj4kE^vfI)i!{jA9>58ewK|bqrjPP2
z%eP*~433Ds@HK~m_8BM~co6^_bVU}a(y-DKb^>(3Bo8Ntn5*%&UpI{8zq3oVcanVp
zM6GiIw(qiB>s*d<;5%?@CBxr|;jh!qg4~Uadp<C6$ma`WG5p<p>K3`0ye45Wu(J3k
z2R$>x)7$dekINj%JF6%d(%`?Fxze|Kpe>P?5*=dHlGJvc7AclojK;8hc5(}7RzNJ3
z3l1;OeVNg;lUuf%T29Rmm4x4vZ=uKJ4rVS+8qUI1gx4B^C*aZ)^M*16MTy$o1Wu%8
z-*@Hh%a=oyUZ!p;c>;~SvkM;t>fcQn_@_Jlzlje6QRPus^VLShiryd6i86qeXS=!D
z5U>-pA8~ORe2t*BV}dkPwhq})^@VxZ;`%;DLsZK*a`IeKZDzGqX?&3sJMcU7N#gAh
z-N1Luz>`BMz@qQzdA$zaR!(T?yx4xw_ou)r{in8S`#g;FrRurr^u(0MP+W>%D9puD
zYan4h<qGibp}qu&ZQ?GNOLTJdBGP?o<>C;;X2QSfLOnF>F<FJ!Y0VLkn?pBQxZA?#
zwrY18=sf+1j6t7fu0*9udLA)J*6opz<%ovb9T@7{dkw<S0qnQE&63u~if5<pATu$K
z3_xg!e~jM@x|I;Oj|JwQIV2rYY`T(4{MfN2E|ywc#F<EO9a-yD{b$XuVU|JZ`O-U8
znrqKlglil19^pHPgCd!WK5T=K*&+6E`PC-&6DgN|pP~t1{@4UMV@{4*Ohthn9KRQL
zD`J;HIPZJ4d><?XR#3&N-VC}5kJC#m+78kSAn)K&j;5o!HI}UiWKn<gW@nE7+<@pl
z_?-akG3t<EXkdTJ5e966`YrL};#NzV8fw&^`^?tW{l|U;2xM*r{o6QVeXV@yRBn3Y
zz09z?!%BYm|4W<Y)xs5KVn_}REj@|34pXmN>S`YfXieX^eM4*UWw&UTC-rzjxL}9P
z9?<>qxP9Lf&!e_dFr5;S#%o`Uhu+p+Z?}ox3i>GPKSVeGw}pQdrM!w5i&oNc^74mm
zdXNVed29<vRv-SG0DcYImVcr|1@yPpPp_QpG)`SGPC<Puz*c_MJRp2}lbY0>p%Sqq
zvs|Bjgzr4Txd8vUSK`;tgSw(Zg;e5_rmqw`d`-R9c-4eazrGz-rE0UPUdu0ldl9hY
znOTY&Tuc~>PC53U17>o0&98Kf8o^<@lN7!-_O!-%*ZYh_2$PKG6_Zqg;0HLGI4PSw
z^l$U9pHm75G}&~>U(ROFiB!IBx5nI{{i_85cI~qaJC<uT+{5Bk%ct_c{OjucD9O!1
z?c)W(t3|F2thB<h_Kf?+cr8jxQA1PJ{Xym|p9NVy4%s70zAK@^$+k*){id=w-`bNq
z2|qaZd|O|rIaJ>)8pLCL&gP%lMNRrAAE(fLoUmCNGnNgOujICtT2l(_$pX!uYHZT~
zuV!~AJbcJ%_H0k~c}vuOGi$zEgS0aFO-jAxSpGzX#l*uCi`nUQhQWZc_{+pSWi|hk
zfB~|F&!*=F)hQWOqgZ>-gk+oO=YgYTlwmFvGi#2P6HS1>J$t@@mzVR)rVhFkFO1U^
zq)XPs;vtkipRp$=`VKBj7>q8h^iQMSy*pUo@YM2ttV3qUSq@718NC>_G=7vtXqxF-
z_kwaMMSSPv&d<W;^JMtJN3QRba+ii)+J9RrrNVO{<%9C_^F(qRrB3-v#MxU*1D-8e
z0`(qpx7hmCj}*640TKAp)cvc@U16LVNC8sKp5+<$iKefsG(=TyEB$LF;C%+KvSz)C
zDvY8&0u;wM;LcyUm+>7Jvo-Dw>P~iF{9^=kC-+dBQ|+4_ep1E8AZ^mQQ-^G)6;nKO
z2QJLSBfd^Oyg73(0Hu8wzK_#<a(Gj#?VmPtbY6UPf(@xC=~Hku`E-7C=$enlF^u3#
zGn4txR2+@Yq=ZZ%N7!p^v8syXZ&flf%O8Mp-HmPivRf{s&v>~?em?APV4u+3!sYUu
ziPCws<A=3Imzi8mU#iN*`G?19VNyB$ip4RUx5V4M_tL*+=ac3X6>_RSL?5g&HYp@~
zpT_ayx{my9u{)!Bg|ei!(B|lJ<g4z5)&9wO+4o(0lwuPpzlQS!U(#NAVO6?lS3(P&
z=jjq>tZW1R%w?}pS7GM9=@9R5*DU4gl0BtazIgcCK^#fns)_*H2{8*>=)RSlD|FI@
zn%s9a9EnM<mLz-Uv4`%y3owQ|!K_P&IdlExWYxrQvzcosZ!0L*%TP<rWaFG+w^yt4
zedFbQ`Rh^5=N%;sG9MU#12Wm;ElQVlJ{mOhdGo<pQ;L}x#c!rQ!rjvN5?it!zTBK6
z$j;)+0*Aw;aYKp|@$M&$5|XCH9>e!i{wB169VWn?bMtO*{CmN%0+o-bh<%)PcZ%?F
zOE5}DfrfsVM4`Zc1;pXb7r$d>HJ<77%mQSuuf><<To8;e>XGI1C_0l4ITe$TJ3WyI
zrYA|w)XjL<egf*`D5UX9{M-7T#~e~2e!_IlPZ?ly2(K?7ih1!<2D2z9xba)+6)36J
zPj<Ny7T6r=VvP50c&0BQ1v(C65;^m)#AaDQ(Jp^VY`2_Kr-3K(MgvgD>L(W<-Mj_Q
zBwFd19GMw{M6vQ0y&tKml743#5&OjUm&Sz#GC^=$>5Zy^ej+*O^w=SC?29Jmp+|P4
zX|Hlk8G)YSe9oKI(V10;@g=>Nt7E$ho+kdv(o%7Q5cG(5CPk({=~x!$LTL%oCFE7w
zA@c?8oSpIK<BILlEnEscVP};cG3+k_sHZP}?DgQW1B@a0^3KvT<oB0kRVLywwnII&
z_l-lq2?==rl13eN0eh*`4yFp1hZEiIe=T6xrO?@YoIiD<0kU&Of?2n~3PL0@_Yrc7
zIVR38Zj2c2=6}~%K!V5kH49(GxloZbJpU_x_}YQLaSLwOVr@U28{ZjAgsRkCQ1ah>
zSW%`qbuua3X{_<)rXISJPzu}>ZO@<_OSrw;Vz-0hwst*5*d|r^SUp9t%-!!kW8&cl
z=bs#(?Ka4~zp#>e`r@n~uzAp#5v>2bJGD=rm+JHO7;OIdVWn&aa-D8iG^bOonD)19
zL|Hv=eqE7#Y~p&xjbW=khe^Y#IEBy+Wz`BhWO+-7PpiKX`E$v<J!H9Y{{G4N!v+4c
zhqoM2ny?8Egivpm>n<P9LC+>Ng#nPki#}Zi{~F40U%nnh)kNLUuV!IKo{DR&??vZ`
zKJELyh4pSgA(8mvPzN!a^IOw-QY(nXhdb&_KWCe^ufiRNNaDM-s^Th*m`88(9}bfm
zz+WjRi0&!tQ6;sz(=OCgWX^6CWXQ!oeTY82(@IJ{<(JBzG>~_;daGh4_ZK1_16}K1
z_H;=wR#u;rVLtJoDSVKU+F2bJxO2`aud$+H@TkwrKv`hr&v`w6;WF|F=W}4O2KyjH
zZJ?u~0@`00Yp6zl(fyfOLIUUo-|i0oYoy+CGB>Mt6?OPOyN}#Fel|RJ#%|p?!Ybs=
zyQcF-uFgLV0N*E8+#72=9|Su}KALbFqE@9^vt%ALF{3$ODt;w#rdVy^K5C`%U&FKd
zE#O%3p|Jf^8ThgL?vHE6%$<+Sv8_2G6YPX=W7XNu0(&4H`ObMVtXBUHW`A|ByQ~tv
zOfu8k#Imf-4-!0>zq*fz93>Ua`PWpGe_gm{n3=ZSWLI5(u&mY`j<>Uqp<U04<f{O4
z=lZ4SZ9a@7n}*nPrgZ#SukAaNx}T^WDt)cEoq_g`_hhFNm$$}Amd2HQoCCSx&S@z6
z?+UyX@KIy_!hu9P=s9oL-^cxZj*&ic^S=0hW4`~nmfF=wJ{5SFz%cD8kxg_T50IUW
zk@si#xM|vr$VT8C4sjTfC8;!#876J77boWL572MKU*74;tQas~#WSnhR_wSHzK1Fr
znVylSmkQe}sn>A&-}%cklovljf8~g{2d5n>^^iny4r@G-6-sMPTO41CvLe;;4|E?1
z#g(CLIy927KF-fu3peA*CuRDT8L2?<{CR5o*F!xE^IqlMi4K<9W>u&6`JuH4)m@`{
zKT2=@EK@4Zvl7J1+Bdu|^0lrRwvZvjVijzIP}$@_(Zb}4O=Bur{LmF&&fZ$T<jD#i
zw|ljk>?MN(dyaY!=?G3n(0UPM0*<J?n?4rs_1Q&Tn)I^t6{HV^{ja|7OR_9fB*mwz
ziOnaz2>NW7ua=dH)k+h;?NZmGbk9Pfs>I_RZ8mlCqfK&NfLogH?jZlg_?Ry4P5P6T
z7G1ENv<AMt0Nf;XEqkUkqcj?xy2S>f1}-cDs;*PZ=DLv8mGaHLax(kcOp-^c|9ss2
zg>v_ou<u;Gd_hMCNO<yx49R8|kvbz9jQ)e&6f)P=)7M)rD-IZoA9`d2oasH(kxuUQ
zs;)RSLP5Y*44x%sRm_BcHOzE86a&d^@iyE|Ll3^}ozKu@6GR&n$IX<mS|ONm2`j}j
z=_WE?O0-7LdAPlTd^&h7Xn(AO{caMZ)6zmh>@m15QThI)LF5^;Z`1$Mt;7Kxcn))4
zB21CdF<!g6EgOGY?+DuGrKsABEy@mrZBsFR(i1j`jgj3_Jw_o}F9xg?@u=WJ<Oc%w
zyNsr@IWQ8F^UZp$aw@Jej&QAQCGf)MJpX3+;GoMsKe0fDVF>T$n`mYUlT(3&`9;d%
z70MD=8P_8kMYE2+f&amihV802s7Js#suIciu!ObT_N~krvHkwx`39p$eppy&RzGNv
zc_fwxjU2Hhq?x|2YGZ<RM48Y6cinp4$Y@$dBAFg?KA!T<(K9;Y8eTuCdvs}j{#Qih
zo5pPdW2ZCXRYs4cua~Tp&UM$yMmUz9aScCEfQG-Rjx<!2d)P|o8tR4QPNBoz^;pFx
z4i5A|j>vQMjjLvlyD)KwWaj;$=>gvpD0v*m0&D~4CIqWyj$Gn4${lCn$-{`b!&l?$
z*=>lH%8_1wq;NH~UHaF2@>@qV;lYtdwH?tkv9|+gXuj*6<L`uJMnIL>*_@Ape*`9Q
zrF*+VtKLQNvw01-PV`77Ki_ie$zXo$tA{Z>nl+|3c>C|UR6A+-KFXQx`9m*z`-`Yo
zV&P*D@wy*{yXNTZ4~Y{MQkr_w=(S&CcIr9YHC9&M6uP!o15+BOcEH$eP4@6k_gdMj
zPdn~;I017>8v@aIiHU~6^GJmmp<D7e31gWQr!Y7EUUUX5a;%8GRDKgK&)HjSuyD-6
zP-oB_xth9i54kp04x6yehtXdvF-*(I!!zFTNflMFD&gU$Bvd?ykic@DPX!c0bi}M_
z_IK7_lR+@)h2Ox3eI>+iwe~j^4xB6RAXTAd6eZ%JW{Rq6-^x5xWyL?a0|~%^<h$&u
z`u&En)7_e#>8NrA^+NZc+G?`XQnG^9Na+Zy^?JB6d{}!{?&^$_>Teg}j2t_{^x4e7
z2KWb{6a|)u?T0>LucXnD7q?x+Cv{G(Oy6~%YTRq1URK{SFzJ~op5*!JLaxD`z3}wM
zY}<NAg4?df2|G{Nq=rVrZ@AZ9esU@^#~%$CNMh~;&Y*Aw116lpmX5i>NmM}HLFEGO
zZ_bo5S|-UKTFnY{Z#Pn3%;>y_m`m3zspo{YizjwM1_jFglV4vUkCS-g$K8(u*nu&y
z0JixH*rwe2u@v0kyk!YkI5L2%W{0Yf$JYgZ5jj4hGyZlc0aBSzs>?txDze*MNtJfY
zP4|6nv2X8^UFLO;<gbr4#&XQoMhMuqzpl4|LaGx=S=20s>F_ECfQ&d0PGtUJi3jk_
zHhTmM3n}t2szPZZPlqeNvBF^_^9hiZ<(A0J0HA?pX9=4O$Rhypm0ox@VtShyX1$w&
zEtUP}W{qBnfjUd!2P&k-x2dL{Ipc03;wMNfBV7XgyM(o2X>|3Xl=Xw#SAhPxl9pZx
zdILQeBKCpb=Ms)Uc3bN1d$0*)Mkb6*?fjjz&L#Xe*>E)x!spkVV}#7Y+=0!6TjA^L
ztIH*Dj)>Zcu4vu+KcvI~8^Xaq>bAe#23u{|-nwlI=;M7z#Iy+~74=12_nixdOS3G4
zMgF8OZhuvJpPP!B2FMdvUAC&<YA;PhvZRP_2R*>9e_yOGGg#~en3f(YmiHn3WX<XM
z-wuIZeoVE#W3I{VdbEeM-dDp|O(5R>z6lh3F46^TTe<|77xXJut63Xv+V-GIlLt^I
zmUTf?qvdxpg$KhogMr>0_*Qp;;DGD9_VV=qpedb}b>j!a=hx>*?n2TvngW0o3143^
zet6I^+qF8Ih&Ea+30YH(GXi~mXkzr-D5@x>?DLp;!C2iKmc>qzCspzvwHgyq>mkPF
z>vK2l04QfN&Mj-bGm4br;0mAgGun&#t)?ers-{6iy6k+4J{Eokpbv#_M34+7d*R|v
z5NITd#61`BM|qiF)qt6(`=<i;Ti@B^#a0!5>h88Su3Nt$+$p8@u07MJrh1*vAs}~w
zZk;;GJGD(&QdyB59-Xq3n8*8xtcYa*@gM;fQOF&KL_mzi^Q6#Yu_EqMOJC%~gye42
zukQe(D1wf28>eS$=3F)!^+hOQhciGW#wN)pidf1WZ5Q=q%JhEGh+4X%J{@Dfn8l8v
z#~;qeO8nw+10~wGsapLa727QTKi^=^^2x;T21NXEwGNDb3RWHDDO?3$3&#k^TU+Wx
zYl?1`l|7p!f{a(o97Rm>KL~U>YQ9mbk522A@vHT(OB_U>HnXE#LqsD&1XkWYxjhMR
zX_cQKCO9al-OTKV2)f%)TU1sBHecSfHaS5FF0kQ<4Pm-9<HcZsjNF8FDu49G*#!4@
zKXFiUN2k8bD_XY}w!q!6d@vI_LE9}YI4vzyo+~{>Q?ntIb-x_+KMj8(x>_qTp?#pq
zN3|2+c^@aUQZ4J=>IkA^^Qgc4l0A>qYA5GwNpzo<xh3ooBYw?sfpu1plCcwgSBQ(8
z0*C+>jNl9!tE4F1sjT}9DKlI?m4+0SAap*rCw1}+&90*mm*L}URn)&+Mz1?^@0{xj
z&ZG1_%CW`z)Btp=h_=phNpY1nK!nNu?EYj<ZlI2a)4*@{zIX+yFi$?h1*Koi8^#J|
zyt~uf;kY%dtE<4XrX_mXqz9Tgcm{Z4QEQLWpxokRoJrUR^p|X%u(%$3R$uVdFzw)E
zw9Vc{CuM?iEBx5>GCMURvZvSUm!I-+m(x+t#R+c@dgL`GdlcJyJGoyuF+scXeqk4Q
z6f@C$&d+8<=dc$W(=9<!xPs_|tZ%uK=-uwuU9xVQ?~24{7G1(L|Kmn?%fX(I;x%O>
z(z=4lAn}Gz(%FW`drncw5tqW~9#KUEd#Xy&9(Jr9_bT&b_w0JZ!O+WsxM7OP6-?AW
zO&~>0NH?d>CWjL)5#Q3dq=3BaiOVJ}bbAwvy0}V+AUXY|PfwkJ-xm4hZg#D`Fwf_k
zli5aU9*<o53#YACM6mHtzjqGp&(`eiB6|BL5O{@rNQCdH<wji{m(gYMILrF480Ue$
z8lKyg8xfIl;y;tewrq!uF|UQ60cD@4!!koy?qkgGC2X-de2(!n?hE)bLQ;h5UI^?w
zW>NQ+HbFW#fnXHPsZb-sewoMMzD6}ORdNYX|1pN*C5_cVYvd%R;3!u$;o#3F^pqd6
z@q09^QKj)o<8M1*GQXu;tZLU>$3jIXFmtZb{T%UJIf_>v3`l~-b?ltWG<<pHkGGJC
ze=K`131^=mG_-!_?>X!4P!3bH!&8Ct(|Fj(HF0h3UxRNd3N3Y5Uid}t@DueU8>9GZ
z`d1`omtTxm{Cqi~?P9D5d47I9xzkQAo9a^RJJa`<%k@(K;4qCLomTowT~m50>N4Uw
zmS~Nz%c>z$$#ss6DP<AvD+NarxBN80`M<Q||BT)OF`PXOFwFnW-HKEz4N$F%msIAH
zv!(t)ZbizaB?57Oak8T4OdbUQ)i|7pd=&xMTH682I_ChCb2Cr$!o@G0`^CCu3$@2L
zB9A`aYf$?WqxLewFYSm2s-g=!jsP!H0ATfh{ROx`SW+Z@2_~ZSPJ5;1N~+Zv>=M1l
zP|Yc+>CI}KAu>oKy?=H6B-U2_YUjx`;WDEhwZ6*I)hC@|MHi|8mWUu)KxHj-xG8Ed
z$6*ww7>|b%d~euu816kxovV|>o+8W_MUPb{g%;O($=i#FV?B`wXTNGzblx^RX~-_6
z$T<~UoZfbhjyo83Inu>_(hmEp?^ZoMDpSTw9VQ$<xvH58y1nku{V#ksfSmlJJwyM4
z=&-;*67u8Y7Eb@lpf>?=Y+{}5cr(SP0r9Q;Fe91Kb%kWVo-5+TgJcaV|C_+Y{uhCZ
zakpV;3=TwjPq<{^?Sv<byHs4A+bAM{+JEXz{(7`hro(Ha@Zcjq8bx&qK5O;ko)YXF
z)c8IB19hW|?QIQt&o`zzZv#0Th{0?KiN>V~Pd3J~TpTE7l*_6Q(4#wD`%OEC@7OSI
z=!{`ji(5WI=|GsguHAi?T|V~qocf4hHl&2KGG(l;?h69l4?A+aer~9yq|vetSBbCC
z=Drj4WeK_0W@BglkDZ*O|F1yYnX=-dtS@J|B0d}io^C%o=Ug|3nRBXde;d3Bl%d5>
zzb>399D6?Y*I4&uk06`5gM4CE7>9i|tBO$Tu?ci8-FUSv%N4^qJ5rI5w+FKFO|g1+
z9{|cN5E*6moH}YLMiB3^*b_2`7YBhTEvt(xXrl4@iekVKGl2Nr3e|STTO0xoZ-$?j
zF2@Kfq#go*u~yz((T3l~OntHXUpS}Q#Tf>PUnF0L;hs%gTVh+w3mR!xH@J*srWokH
z$AO3?Vs4B%->GYE@O0OX#^Qo3EJ~u8O!EL}z5Z{;F*FrjJ0+m9(?cCJ2irv$3fOpp
zZw{h97*K?`@U=Z<8ms(0A5^=shx1(`E}@5!eZ?Fa!v`~)vcU*g1A!8@3Wr~eC6B!v
z>jO?1sw_=qtaD$XFo?V@r{_Ahfa1Og>IE;6KA4r!r@EcN0}JJ5qN;XBpA0B<R}3_9
ztu<mz+-Bn?au<Dq+oBeKmn}3}Lbr;8K86}1CvrLcIOE#G%(LGMP8WB0pI&n8VyR;a
zWz|6s;O4IP(35^xx<O`qm78%)(J~W?=?f51xBW+q-y8LWr`kLl$zE2`1^kz4puux2
zg*pqq07plbZ|T-R{fWtXzqxq*&>Js3QB|*GjP^eQp9465%N|(fnt3T|BLDDl(1W6E
zsQ-b!_Sa{LsG0IUolf77csI7X3tIRvw<ZDCDFTrr?ys_X!Mx8;cBU8BoL)_*t)#1h
zQf1Z)9>LMW6DRXWR8I26DsZ1f*HSlT3wpu!j58IctKn0tF99#e@SSWMb%h^r7=Wl=
zhTg<-*^k^GiRyR}aJ<no2h@DP(qZD#M)Y+|-*N{Oz6Ep!xz20TEnyA?QjLhw=o(L;
z{Em#mWdn~ePWW&&Y2#|uda13p8VF>=a>e|ZD`sY_n<0Vi!%ZN?2h8tXL9EvyC4%CO
zuMY7$K_7cM8E=)_hy5u>(wm6sqUD*yNB>|?6hkmVoL$a>@M0$_K_H9Q8>?Az4wNd)
zhOZcKBzI<$?7gS#Mk!ZkASPfOjIT)NM1>mg5<@|6mWYyVM(P#$mJ)ocr}L5Nu6)8q
ziL4ah(nS%3PpQ(p*Y0zS;VVLfEkKlkY|%Rz>|Q57n5fL_OU)@_@qzQnTl7P&zuWA&
z|9EJbDb>sBNnHELxGAKi$f+%UeBZD0jR6#Kxv6Z}6W|c@P<sWK)b)pDokfBLRopL}
zQ|%(`VTsV@lvNg4UPu=9k)5Xkll`RRR*K|FIwf-(vrhIA>rV3Oi*1{@>N0oRa>0~9
zDXf5jT@j~15x50JYsuo|vqglf8-0w27{pa(>$|o+g@48z#+6!dOGKUDbr0yGXF$G-
zVt2k~?u>>h4I4OiO;IJ~?A-xM<Yct{C#Ft%O}9=kP+>MyG|te=xQvJ;N<~YAV`V%B
z#T&Rsn27hN>0avYx>twG?F%fcCP0JBEayf!A1a^#{7h!9jfv{l`my6c`Ksb;8*2gE
zd2BYRV-3IXb*t#Orz+$tYgkr)#y><zZ^yj$o(t<c+sEpcm6FAje9vuug^~vM^^Gk@
z%-n0mur6q6xV@9;p71Jn$wF?9&hXn^n5h58W8_Y5gkvGbK+=oqK)KZy7Ff8dv#k{2
zFVI2IR@lc%A1Cu)7Nvv{qx<ChukcSLm26C=#n-J**{>#-FjS>c6U%txKwT~@)?!Qq
zDo<d+)C%%lugWaDLC^Hoya3TTB=NOi#4iO?@IX+JsCbVJOS%8Cry=N;5pU<>A=Bf>
z$p&E9tgQ9RIKSO+*CS^V8v7I50QX;8M4a|{x4^3^o(x@k3ZSj(Wsa_wCxyV|M@OTE
zzYDg#`O6n{=X>(K_I#JVS867HwU_osPX{IvXYVCXriS}~-DiIu&mmY46|OL?tSC`s
z!Qdlme87E5`_G7lr=$(>KOMIDHmRJg{<A~;(Q@}3spyB!Xp*UMouvd6LAw(d7B~3r
zy^mHraE3K&bJ}U@Lrda}jWO}nQfcj__JX7<Qbi6a{Jc`CaKA5}@=@TKm95gjuICZh
zLDGX2@zTo<ls7pHe1LPoBHsG2tDjBHnDe_I9LR`y;5C3BP|UC{BWt%$=*g94_Djj{
z%%z%4T6)*(ZNs_3yOHa^&)|c18*PWnmA7JIDVD6L!TrlNE;P};HGo6*<uPcpdL#S+
zDL=9qHzs?K+`~DT_R==eeq8m{YRTGParG^)KNL|3&0)K}OBxp6$9(ax*GpkV32x@;
zPkMdD1?lZ7Fq$oim<^0@LjMdGzQ?%(dQT5QVWG%PJnTVbV)Z$1r+1o3-RQrj3WyWc
za3c+K5~%DNN-Mt9r;|p?Rzj2$qQZhY+1}nR$pKD*G9Ui*x*aIF93Q%SYB}3IrbX7E
zV?4k)rQ@OjHo<n&6~!9P%8s_gT2~7y<xF~X6!~}p^gEDgchiNAie@XxIg-k{j46Ff
zVH8}Rx;@@i+l=mf#@T#mQol=k#B1@gba<b>Ej^X5w)ncWQ*7-Sp`phM1FbTdz(U~x
zX?iZHdV7L)d#smDnhp7-hZ$THozlWnsjhmb|I1R8Ol1`GlH-yz$_DnPW|zei>%V*+
zTWIapP_)aG)G;i6uX5+Hv#Y!s(V)}K=|glExD%q2)Y(s|P#zYDgD)uexqBs-;kqZ-
z_jKIhqr5!|X*an3Q)|s+NPZ*M@9e#?)CPE`d4?|iyHc4Sgk(bf1nh;IH@v5ubDvzz
zrIuI+o`z_5uPr9Upug{aLf5bzh`Xx#Z*&{S4|y(t#W#EAtbVJAR#-ebNwxKd&YR9_
zjdZX>5Usi&6*CEGUs_mIvQ+O}{kpcKF$FyxXyH%>6>Me9lmM4Q!y?N*!4JYqczynz
ztBy08za_?E{o<1chgc@Zs<JN3Bv7<_>7wJ+$xCv}Q7ub{Dkhgo;4`%c?VOZx{+*=3
z$-t9|1S}3wJg+BdC@Js&gw9swUU74Z<+I&oO0N|OEip<jm7X+lJv16I7&lWxuu*dy
zn+EwTprE)l?AU3q8mlxz>A-;tN`H5~P|x)E?9O{`y-R{Vc(77SryUyqh$!Bu<mdWY
z&2<%TC>UFaPk2f;6+uPtN#TJdw=^=1a@}}wEt=UqSn_dtCagV_)PP|Ej=dfEHQlQ|
zbiUKNZ7x-gb#0`6&M<}*FjPXqs}a2|Y_XZfnGuKY5p{EQPSstJkxBe=%`P9c%f>h;
zkA#bms$$SmGZfQeebYNLrqi<YBmSTaqg`yL4C4e*p`;=s@ZE9F+exGap<^up?6Cue
zNMyZQELua?#h=tc#^ZV1@`8(UjBLH#yQJ}?;23mUlU_;92yPenEmcirPKILSo7lI>
zRYpS)hfMC<^%_pA3~k4cl+4@JIF+s)x_@@L6PT$w{%n@O8)WAUbj*R<3Qx1HzZ#is
zK5iR4MDZ9qoA{$)>5o==yj5H(sm-+2optWD-<^PARcBv_!Z1bQe5ZTpJ7JYJRuyeA
zw)M@EEj`a_Yu>z%e~7Z^M#oP3$BnJ|Y;Dqu7+YJ?8D+9cwdS{dE;K3~(NhfD$X-;O
zgG&Ftk6Ny0<uB-1Y-0Hj>D*bLiD{uOQdkktI?{tBSQF(V^NekBk=&Oa!E1gm6n5om
zXn~6!&s2+o`I6oocHBR6V(Xu33t<`5zDse^H0`pTlt+m9b-OeQmGzvgR&gD8OJ}hG
z1DET|c92-^^S|cfZyXY>kg&z~DTHMZWJDZ=lI#ud8`I><Pqa(K`&op9`A0|yAMv7R
z_LZJ~9$!Z>lIzx!55Q{nI@GLV5=06x2ey&v*)G4mybU?&y#mhWMPsyX+NcM;LFc?e
zUkH1Mc%+0H>P;IzHn~X3yrVcL5pSj9P~PNTW0mVaeZ_AyU@>~KVz;)M!M4L8!dVR`
zv%jBMy6m<;IO_+baQ1u4gO6n|xuI&W4{b}4Ca$jNpNuZ~c;Hc(tc3i|B-Tamk#XS*
z_1Sv?5Whs$w+hzv@|nGbYlac%c@HjhB~|u4f(*FqsRq}CBs5#K^DX%mXR=%7(VA(C
zu$|gqzl2krdcVKsfOoT((ZjJUJIJ2lm$LP6>6YB!hN?`?^~~(P2T{v}E;kEr*mk&c
zi>Fs2^D5NmI%GMIQ)#QWdj~89XLtb_sy3DOsvBczbGF(ohCj+TwOTH}D4dNw+k#(B
zq9{^SCpa<`FlCU-sxtmfKCx1@J&a4#CP5%d`v@VQ09H0hhpV`qrgwdem%Z`;N>|3A
z{r`%twcd4rZuQ+6c%6r|B$kp+w|Q*}YgjAR5L{bF43^a5h*#)R$p{?QmeM60kpx^6
zw#!g42Tv$QS6R~f?We=)YZE;uV(B;o%Vfe}@D5w7oQ-^6rRs+n!{O~f2D)pi#ouP1
zh_J4!fHG|S7O*u<!V>bhLw|2S1+AaI3*0p*DJ3yxiG}RJZ%sxsJJRZ_GSxKuL`R^I
z2uGooi$lvy+4T~OAhPcE;`FDI0B*{TVkRfniTit7nGUHx?>3Z#=^nx~RBAVL#Qx)e
z$I5CHeMVaA=hfh%<v;Gr^Eyvk)cr7(N_#Uhi=cnI49umLyi}@yd|7b;Qd&pf_R6_}
z6StHHtsF8O9qui8Bhz`YnexW&uKez!{!egEwp{uZEVMn_U!`Uyc3yM7J2c5GcweXL
z(xBOSs{!1M2&j6lY&6&BxV3q;c35gx=}Z{#654Od#?&nK*z<X?{vCQ8<#wU6CIqOe
z%G~ClK>kUGMHFDX>k$+0@nC($oPC;Ug5>DfFgJ_qafm$kAhpKY5~ZxCG<{+l$LP62
zq#6^0*UB=+SO++<UQs(0%x>!G*n6s06n|MfDtA^s2|>1A#aDPaLD%FTQfv@M+@=~+
z;WK>ZA@RJJRyo(OM1U1Q*Yt$%FGRS>TekOb07zycdg4(seSh0x#deKZdfI9IXZRz7
zBVN|!t|~u>KoYfIaz#C9ZDZPQEs|9G@(B-k<nW%E0*T5iiG|;@!$Quxr{m;zYv>!L
z4P;Y3hSpaSC|$O?h8kOc$qvra4mCnRZc1QXt)2QFAh)_r!xukzRuXWFyeLYTRUu6k
z(urTX#zix^&s0VnE30tqhD9Ryc$PP85tkftNg8|?VM0rRhz^w(3u5rkd&4H*6Y~xy
zNt%qEYhbz_=_6sya42GxTJY%nfiwap(nW8h!WJ-`D80ufK9)lCa}L(utrR_|$m@J-
zfzjITf6Qy*+TBl{o!7=`yN-OV>?(Zp?(7xm<l4=zmT8YmS6@y?DMilz#v2EhI?u^J
zsu#dDbF;7oVC#?n6C}=AbmeMa$O|gv6SAU3_Mx?lG)Wm&Xe39$Y=P|*<xAyjeWQ|8
zVgg~ZjL(`O3jmgD&7;qqj7e6{+<q(}I`K*LL>dq<N@J@zS@S~&{ACx`F@T7+3CJ^c
z;gky#tW_rSFwuR8XHwA?D*IA#x#n;Z{15UUNd#4JF~@m24O(1l(h>#smRZo(_zeIy
z#El{{)Dw~cVxSA^dgqdCROlL>nMP+^v?H_*Oz4>!W|CcSv8fR<xKiIERE41UtFB@2
zct-X$dIKcwMN!J{0WjG3mUMA*sev-oX7NUe{v=9+h9${!>cU;MG-B$c!Ewy=L7+4O
zkM*|(HTrjLy((C_APEhqY59KKMHab>7MUt(`9ZHI`Y@-EzmIjvqHjJ%u{JgoebFM8
z;y0y#Yb<reA=S&p77z_c5PYBD*g~lnI7pUk_Ag_DYWEE5F#R3UhUUC15|Y=(Pf-(7
zU&75)RFequ3C*kM;>o*)^2qjyC9uDd-C)NeWpyERm%jOOBXV5kgCvAkS`1OFPH3!!
ztqi6Rz3C*%S7uQ~btVLEJn2A8VC}^PhoS{7fso|ud09GM>3}F|aC4|TD0YzCbNUVa
z&?H9zYQNoh^7ZXX)^bU$+Bi)D`_2nF`peef*Uv=)NjCsTmTA|gbwcmTclJX=eU=2-
zRy_``v_w7Qfa<|4)+IB{kF5g@OX#&BhFJ1Jzd!!NXa{V*#!^1D-<yFT-~L&zKuY|b
z%qW5mgr?lnyk?OwmOVd(n?#dV+ZT`LPo(Z+K51vM(Ongxa%M>>)IKlY$wN8Val;GT
zvUXN>Wr8wdYLy8bi(8GY1<DrIUR0Dzk!?jEX{D=-S>Cn^40ojX?C>8N)5hTw;R#4i
z=<e*8R<G`73mNJZKKwjF;%9j`_0eNt^?x;Y=J8Oj|NozJI&DYVRCc0IDnew<GN)`O
zS+Y*qMuu!5*_knIl>KzdQbdR_7*lp;<b)|ph%qvZkZl-~Wyp--cir?k=kxtNe!u^|
zkMHCACv)BReO=dmUDy4--tXu8b^YtWx!=3L+j_vCa;vjs-n266_z<6!`WYeSNBsmc
zzh!F6q$FDmDZeA2vvms-#jFE9m!AEU>NIkz&MdyD<X0DwcSV^}Zp+#0-W%uxVAn$1
z8<bOPX2E`UK0jGO1?!^QeU2nOfork3qmj9dGd=&1-J~>6IAc>|6JCL&u69K&cTBz*
z=sy?o+{l_K&R2EJ7l3s>!tB-m?3=EzBV&&y8YXXl8m-B$$y^C}_I}coBz0M5{#0IM
z(s*3xdV#o7iK;q^8#OQzT9kHZM)+33jgbXhdG=(|;yA%lOO@p1j^wHshMj(}f6P<P
zY<ETTSTZW+rerT&0fi?#oF^EEXksuHIyPC0)#S>VD+uaKC30<$_JhmKZb=uBVIL9V
zS*YKjp8GRj0_JrnbgVf%^~G*-Ow#n$BKA!G4b>oM?{Z$-m1b?nlYWELmgfFyH`nhu
zVYA1mXAiNEd4()v?}}~>q^hm*AI4V5CldT`+F!-zB_%He3NwCXiDHXPM3Izh0=Ph4
zu?gXu=r$XB)gANI-2NtB!jZlVbFS-RMq8`;F-kSAgKpG9WW2iqmOJ(vW7c2eWFH0@
zykB!S>*-esV95gvu(Pzr3z6;1)h;<O#%=dX;_b%yQB<wUmOZSjvx;sjkH6X8qb#ka
z>-d!_9+eOm(;}byeZSyOcGm1Y%>V$HLm@<?7Asn0$t}OK15!h$P-3QjaU`2rDYy4~
zaPZ`?geF9wU3GE2AGO*Dg-oCdp@u@V>k3+$14wx7Nh@I+j%RLV3;<<!rtQbbNZ%^H
z!x#LH9OQYc0ety(xZYYAzA-(BYkv|GRu}(pPmYPL3Z3Ifv&;X`(!3;mUD#B(NzC4r
zVQrpzQT%Qj?o(~Mo9n~fOIHeqO0Dwy$T@al5_ieXSpYC{k}57}y<9hT!L+pMLRf|Q
z?3*sqx%?8{m4rG!p<a?#CR3|VFVQ{D%b_<(YFsm`wcwMvXoV@nMZCR#xh(MV%!ROE
z;=@M&QB>V#55dHqwd(&Yc2~(o8}#Je(4aG=OH&~}wCl~g^6w=riY-C@)5w`rux;D@
zHoLL+-9;tCcnCl@xvli`I$>($fus{e(6g6%%>63Mo7vmV1Wa@?4xyStAr#{mp<`bp
z<6Id-S&NgMc%WnvW7?WNj;5r+#6+NCi9wq@{F$Fxc6}%3crZ-c2DOL)CeXWA_~7wh
z1m7(m-*0kX8rBxtaAZj=dVkjX(kp6oZBi0;Z48%${(6iExeaHi9-Qj2L@itQ&P-wp
zQ0BN<0)Glga?m&+98kiYWP)eH+P9Nyp|=D}gO%LI(tN5MFQso&UaKX1lk5qPP?dqf
z0viA0p;+LiKZ)p;lR$rKR4-{oIMm!4H^r?ThN7)Hi|uFcg-++Cmd;CVjA85hBm;ny
z0zPa1SHBkj@c+0~c4{eh_GUawl#%5&g%e*4=H<q(4KX=n@pZf3dA;!7uHa73Zq82e
zS1}u54_bz4=nVaKzQwoX;Ju&t?|_dpp88N0q-*_Cx$M+J-~wA5w8ex(sJzW#`&A|%
zfuk5BLf;6_@3}JYKyHPCSs9|LEFK9E0jPnH5<u^XCO|&PKv&ei<o)C_P00PpKbMde
zlJUh(p(9ZpuY8qW{p7qm{=k?LyQ{6&ggA*(8S{0Y)l*^D9{3hK!W<O>d%VZdYtJ4z
z>3s6m_ZMy7)=A!6*?)mKD{Ni&vf@wq8-clnR4&8r0JgJ^UPGS<>0OMN6>bm_XrsFO
z;1^a#lxs~erJWuTr}eD!oiKkE_I2D|EF5OAfb2`^@E?dQR9Kj{YtwUE_UExvksH*=
z<$+*f0Om}T#Os1b-ub^C=_(3h6{8vE*4P)a<?_!)WBW*sp~HGl!`Z4L%-9>1DEJ$H
zYQi5qz~Fi{e)zlBjX*1lq5E%_=C!xJ#a~>%RlJ=)o2x?ph%zLoDa7wWKCq(-H#?L4
znw^tJRFFn4>nO6%NAe}wFVrLe);WWLnJxJI>p7CmZy~oYmkB5r?@d$^;@G=Mn^_6}
zTCP-3WX8;0D!pAN=Nr;;N5MDleCf=@ae)M4OEc|_f2aB`0>1kj0=28HanX5mnYi=#
z+TMJ+xyfRMzJD!_sYxf-lM<WO$ceNJ?(F(ChWS28h{S*36#Xn*n4VDz^Li&)#g*=M
zxAw`g7krGlDXK5`Hb&^{ATm36`tFyw`wOdv&EIXr#$#y2B#vNJT=k!l{P4!$&!G~h
zP3J;XWo{~K*jA2~eY&+rN6l`+kycx($VeY8bEzyS-Kn%e7*m_n^-lkCE+G875VU+O
z<veyESN)1EfgR{>Ennz=|EsXe_SQ6fRO?6e?Wh;GbV35kR)R;4rt*hI#>@4RJ(|XH
z?UL@5`6tQE8>;Uxj$`wS%cUu1@?ClittA<I5|c|b@a1B9N&G8PRc~fQCcKe2KYEq1
z*7fo<TQ&mI*qbFGf+YaHs;h0kRn~@v%Bb^zGFeYptH0V&dMC5UsbneCKJ|ldvM91~
zK6t1&)PBuzY?_*+U$1%FZM3FrbA5aJ;Y*sW<F@;o%^Z9%Xom4+cTh>T!@%ASF7_C?
zNPULZp-k$X&TPD?lG+5QKt091H*C5@5B7?gi0eDhE59BsSM*JNf4;7$N)G!n*}Sor
zW>9~zi-A^|H;$7*s$U@Ny(01NW{Dk1!WhJpIkQ(@X}l5=LK^g1wPw%T$4RzIj%Bsl
zniMbWYxdV9`vS5qkIXF95?Blyu13BT;bY^xwKEW0+v&fke||5z9)&;U-)+jCP}(mt
zti9vr>(IolcX0H@nEmr_nDeF{@NxhBi^e?9o%^dUfwU0jUrc&Po>LRz=BU9Xx2jC%
zZw&rjom)oNFMH$2B<W|C7~A5<pW}C^^|)QAz~fKITQ6UfziG{m3mszAu1~Yug~sHx
zulvK{-Aoa=$q&@jhY1a(nl|UeBr^q}Rhk&@!-%^3{O7>-kWV+f8E>ZP%g@6{oHP8L
zlei*I#Ggi1|L*cDSD`LH)af$?q|@GUgi$^-Hl{3`_q`*UF1pQjiVsifh>qxucq7}i
zt4gtSuu=7n;^eJ={)QVSz&S(~XNqvJWPT|PK|Q5mhBg5S5fb5=_X}4li`pN?gH#`C
zQFCcEzstiy!8!c5X7W#0S6#{u+q?}rqwatFc;$!kyK1NT{@r-9Xi_K>^|HUu?&1pW
zsEx1!Ty!vA(7?=#{h3NaW6n_6o%s&5Ak0J01LKQqX~n_N;vzyX`!`P~n3uRfUYsm{
zqJX&h);N?h!R+ynJ%*P#>4zq3&uJa;O{MMcsn?;F98EV3i?jVLiFa^5y7?!XNBk(M
z%t8Ca=DtQiLs(4Ce#a+rL9)Wy%QwpJ6AMd3@BYrt{GR=-0`K*vpcEU+9mo%EVIF4O
z<4X8l*S570UT;xB#>tMA&J->Litp3V4Q&5=aNaJsYu6%p|3-T*!7uwoPyzXYZ>o?N
zqj^7S_^^riP18LS(uo(T#`{1Y#B42;W;@8OwhoI*t%TFL+M0bFka)-*;*)e#3O*gI
z#HOn)<-e2BK~4sWcLm8Lc{<!#A+3sxBa+QVTXweSkxe$o-soG~?cK5kZs_yx?~pz6
z2F8W9t1zh{kNS6l0r*0kW?5kmGLyP*%XN9Vx&3~WZF;`q;8?Q6`O7=TGztHni&qM>
zY&c!0m!J|Y8uk1&8G2=Lg(2M0N(DO{Ba(7b_d}4A;<B|IhcvBOh<M_x9XJRz`-i!4
zsF^*!B<#X?kT%P>;==H@!bcsGU-uY!Ppkh~Qm}3zmP9^_Uwbz{R<ng=3OL8#h8y@}
zG%<TU8FiVzWU1Aqh5ON#+3D5Ne&s*oRZ8BsmCvly|I9j3M~9O#xAj~gTR@H7GF;3p
z9FS&NdO)h`_kEu?UVd)wrMXV}l^5O|PKK9~(!Yl%v~|dRQ{#GF@}@}U-${H9cxVvZ
z-hIaKZB|Y-DMR;x3j42duH*a><+AW0eVHodM%0_Q!X!hY9Y%dFZXsw0=0O?4k4(8I
zlkwx#^^b)_g(StbprHn0mjBe<hmqUJjtx-BSEd%xuGetx-hp~e&`C*9dC)(|5kLNZ
zwyjvs5qSSboU#Sw7bR!33JG`S=amMHavh`~($4oiz&x+09fHAvC&v}x0N|t|u{;@u
zf)T09X6)w^@3Kva4_ioaQuvDaW}vzC-h(UiGHRJ+Z-b~^3?aT^)%H%viYH1+PGVGI
z3!!AlS}822UonM$I|$}?-;Hr8K5{ul202yaf9LVYzf6eTvX!l|(O0e`<+{Sj^#hZa
zXHkr(JvA+->BR*|J0dGw^+YZS7cbi}b^QVSkLPNc$G9Gctg!Q=%|Ut)M`5{DR{$RP
ziK7%9s>@<(+a4(fQb6E#(}n}J*!WIHZtyX&?8XMfR%xq^StAg+CUNIzAi(@yu-3}~
zWFFo{4e0_AFgT=H7hTWQqC0Jsj-UrDo@>#aZxR4;Zn4!12q<>!aI=_Zl$-W9Pg)}S
zP7Var$L4#JlK?0B^sKur5~#PSTZCxVjzo+0@vzek>vgx#Fj)RaY9{)tUB)oDJbb#_
z2)Pj)&C*&aeGLW5b+8+25mW;E&Q6JbeVW^JCC%4<WusGf<I?zdC=@-3U9p<csA8o`
z#RLB1Zs1}E9xhE<h}oBtumg5`1ik+0L7$Qhj)Es~7%~G2m)51Dhr_<UYE$|dumGC@
zd;hsat+{@E(vZrt!c+<D+I&zheIdoqk@vxG9sT(x3^rvX*W*s-_{^knK6ge23%I4e
zf~4$pcEA_!NS-F|hB-6V(jzulK^Z-QowK<BI9>X350A}S2qBL7{a{nqwT9;_gHH4M
z$-0#>rER@XxV|R8zC=t83WEb7xpm2CcJo;-Kt~jZAfvp+vpEyt4}(SLB7Ey1UR6>0
zO6{5xPf(IKIiB_t%;Ydu_%HdY!PhaNbu&i+M-ofy!d|e-#eIaq-p<?^!XA<<zmSxT
zb&RaA!d=Dk$XN<xpj{UG9ljS?bIVO&KA74F=(FEu;h90dt$%%>Sh{iaH|#VQP{{IH
zyI^PB4{=H1>_#>=mI6;%Sn1;9S<L=9oQ;2(o(}Ct$diw-t4VE2+hJ)U;pqnc%)y#~
z+|#3Cy}g$>Ezy8#{XP)F57^)I?iGO<Z@E;CygDYFGyMty5{`$|L-{uLZHLJuWqxF+
z1~qS-$U@%|m|pH_G$rQY>#Z;MHE1^69Qlh=r-5%+=cqgOgRz+ClI8NkCEF!u-D>vW
z2BEi-GdQVRCDCveyGT2;3_Ekxb?PwrP^T_$+T2fiaQ&Rprtl?GPN$<Q>ayH&!vbO7
z6G0O|ix}VsL!%;x@n!9svX{@EEUE7V#EYN-PcVRVcqc4TyyPP{$%X8cUA3u^DRY8&
zuL!QNxZM)j^bOCdLGkQ9ZmgN_D~5RS@u<VFFI^|sr}(ZdHJ{;-E!QQ&v#VA6w3e;{
z#l_!;z+Pg81vA=Zir`f4!n8eG*KORag#NG|9$%vU6>%8F=$0OTyhlo9-!xo_!RytU
zt48@OuzfDf-v!i;<;nGvdrwK6QebkkTps+2xOe1KRww|^06e(Abcej<58n1-wMhBz
zKLFhmBVJw70D^<?wv79@^KplM0J;bNe;4?Sdr1JHUP^*Gy;@Cg1#lGMv`untc{EP3
z6fSAZNi?TKSPrwpKGg<Jf2zfr>#%#TxA_jZWkATA#y#A+LP;l7t7rJiTdK<Em6!iC
z)y|I;1E#kvgY}6M4`F<(US+sPp2K~o6ANq7IRqfUCc_T##27;LM632wGAavonVeTS
zjxAf7rBW(B{I^s?08*kV;9B0{x$YRd!vg<j6NX)6Mx8lsT(?4xz>hZoZ80i758!cf
zT2CnsnN9+q+>@2c>(f}PlUYa>t3mEEw|6*YDNqj^@dE98ujMyxfw_fa3x`3$&F~xy
z#Zv2$H}Icqu*e9M@PMM^w&ZShcyi0}zGK#gLB+`v>viS>8!yL$r9qeP>NtPT(NrbQ
zLf%TT=ak+_J+J8y;fCst*QczX@#EuPw)O-Q+mMuDOGFruAk%9NXrM?YyX!Gjgsqa3
zkLMPx8OP;Q)X#~eR}V2`U(a`p74Xmj*S7iA5L@vWRbK+p*XqekpKfUzx2e`e^t4Y8
zN%ma}lIs;=le*w_+I}q9(o$<vG`LGF5YXd#?kJ&gzpj|q*AivaTXztO8Z41v1HM*N
zL?(Brb>_8;qjFVIgK}fao{~M~<N{^3GAT#I=%QP-PYMmk!7ZQDsiX#lQ1Z0HOoHy^
z@a%ZjPR!h~Yo{7CO!X9N#J5ISSEC80ZOYB+)V9Io;n#Oc;=Ovi7F<;xJegb$etJ2N
zYk%io;|VCsixZk68#fpgj#;@er7uPARSzpLgN87u?AmfKzv+N1%@BBfEPfaz{OWjx
z9$+k?zTA11Eq^bqZQfh8<giN{?yr(tCE4TS!R24k48$c($Lo%?=r25N-<0;(wja4)
z(*?X!ml+`yHRgnbS`Z_pL@ZjI+uJ&s{*YAp{0We%+YZeN55~E)Lcdd~J*mT9lq}WY
zFujFSIwF*{u!pzm7ON|ttkCj^91N{Cx);^Vxiti8^A2-<bL7_UsC$+YL!<U45no3Z
zBy&p9#z}@YI64NO=Q{4+BPXjDAMBk6#H1J@5Pk`+$;UO8=DQtPQDsN&r03(0KmA&?
zUYtGd!!^Sobn}dIiz-|H6*6>X%5kyOf3a=~qkf_!gi+;*pP3VlBhU+zg0fP2_il<T
ztL%JUTbSxv=62T}dx)_zjpN9%lP7H^q{eNS9hTuLMapH4ICc?o+Uu1~LOQ*W#mTjV
zC;w4X84TTxvn)zUCYQ5VV%D~E?xo!Fm%+QB9?sUBI%z`uO7VGWBzoy7Dpfr&k1m3%
zw~fesl9^CsTVT^*Qj<_NFv1jh<M9IL-xH}k&?<@y=nk$xdMda3uZ4K}i3toHEl5aV
zSW5OzFl<EeO}IVy7BY~LDR5ZvyvW@DV~lhI*w^r~+vl5~54Bzi1}v~i!5+`k=2<o^
zT?^4)t-{@`-ER3j8QLK{@A0gpulmDLIa98UTf8SD54~qCHi19lIK^#@#j`e~nCID`
zf<E2S+U($oqYfQS$M0n!-dQyKkSl(CQ`Jbe0Y8TbkqxsU$?YrpXQ9({S=|@Ot~EyU
z3}oo|c58g~3-uYGe*w{^%OYxQe;V}7{g429MoL8<<j*?TC3LdJxjHMQD-+%6K`TQc
zNYQug&2fV2HBg!R`9Lq(F24LAQaF6R{cPurThIX8WeU`o%TL<i*u|e97TSXWQOJdM
zkXkPEyA@D(mpAi=#D1aD9p^!5jZNX*zwu+~yuWE=K|mw_*FQHK{$CX2Ph4jp!>98o
z?r`pB3Vij%hHDT|Fl9H4H_QRbnAa!`duw@l)@qOSX={^uNEjE={LsGy)h$ANuy=;&
z#yjs4bPWj9`rYC(s}`#yK-+>pHJb1?)`vGDf<SJJL){e&*M6ggLC1byr?9jadT?;=
znqmoL0BnlWA~q(UwgJul3j}ke%Y@5H76c2WPxA|#6fE#ArhCMKwz7d4vaYzXpopj%
z3Z`*J19??EByNM{KIed8elTi{6NSMXoOpORXCLlC<eX%QZz#>4N?1&>_q~c4mqUKm
zw*i$du+L2o7v`k<)_P(rIGHPh^_;fhI%}u5FxZ!l`fo@HJKo!sHD=Hroq&mU&I|%{
zShjA07lYlJfSB5tTgh}Ju8kYD#T|jc$}MK=xsqsB#o-92W-!ZpUjfr>IGg&bPP7Do
zDi7%qT{^LF@B!+5)onr~5zY%r67@Re)L}4gF2a8;k@whKw<e|%bXQk85X@bn+A@+{
zaB%`K*o{s+dKU1~;XvZmwA=ucpceym+jYVdg>xKZHPZMFn6vg$i3SVJt-_;DV^!in
z!}6dKR>cY$oYHYd&Mz=n-Qrov?~Ot6n5Awbbqhf-e8O`LmDn8h$=WYdSq0XDAj_=(
zxgutQ1k`BZdC-`Yg_u)d)04hTH5{|uEFjKXx!T+h{Ixzg3ZTf6At=%bHZGvZUsjHP
zz#ti2c1kP(qP}Gq(09zQ_hkM$7(4dJo9mn;xT6zP2!03!og`rG`9$u%q8cS?7Yw$%
z6SAQSE^Y42ZbKGD@A1tI-dpI50>(iQ<#7Jl8Cnd30GA0c(9KC&yX7R_v`iegkWPq0
z&sNxEPB+#2FQ;y9(_sHIx94n|pUGN_@g7?M>0Np4s5$ODA9aDTBHMXz^^xRvt?_zJ
z?r=4h$b(o+`jZ{B9+64C?6G`KpcpQ&pgV0hG%63~um7f>tFS&A*WfTApkB``p)P!I
zwh2qwmA~%#w%2@H!c$0_U-lYf-~EI?yIV2IEC{k3{=?jQ_5W3d+*Iz=_(6`dB?Uvz
zyHf}kKEv+$4J89+Qnqm#XWLP$Ii(nD{Ff$#U%V|VK=;_OrN{6ko?p1zWYq@qLED<z
zjZr*l>03qmE}x{;UbQ^pq$A7W6%I|w<=ptoWDsA`(3KFsY(u2z%DXrosM1UiPe>hL
zpo^-!_ZU!|KvIz--q_~r@>HRT*osC<*s#g4#zbuqPZ9vJPOP)#kyWaGRPAML+(c??
zTL57ujjZ%RBME3VSl5We`Al{?%wK^PyyP?NoiD=k48J07+O5jIJHj8vCtP&ncFv<%
zZrgWFa}~eC-OjOBNDr}x9I{_mg>fz?OUYUdtICv!iUV)9ksa5zTIaI~qRGE=sp67b
z#7FCZmn@B43D>+6n|v|_a%zRA(&H#ftg~z{gz7%kjmoF+faWpit7!wWkYU0KNf%YZ
zI64z?Ehk0X)2X&}h1Uc>;ZvfjC9}4MMSQ99(L$~kPwRDM-<-OUGvr$`TYH18^Nd!{
z@U<$&1ZB9SKSA*32|`}8LlU{c<P+Ov5kWP?WT~XFG+N%m7$e9gT35?aeXl<@A*OXD
zbRsN`eANe`nRD*CEOJ%VOjbRSuuz9D;SKen-+UR%uPIgIUMa%(n<*YnNci`7xNZ99
zAJdO&<|jRFk1l&KTvJ@a!VKW){SHPdXoCC&Ls^lR=r=DgLxz-VXNBiUt%5Ub<JUdx
zi26bc`wX@FH`Gj3y1lSwf@+D%QS|&s95F{Z{XjAft=InbkL6vI>G{}2XOFtk4rA?>
z0V7)U@+Gp$vRvYu$?QaXn-uOdKZ&!(R+)^nDie&iR()cNA!L3*Z|!JlZJUt6*p0`+
z3ALv6@HmgO96}!ur6UjoltbterWm)f3>iCobd&P^Dx=4Ry4Eu0w1v>G+VVM{<NL1^
zWyzyJtQK=C!p#9i|1}x6FlKt5uj<5byiLCff*ET_j3WPQg1t6C*;Ci<F=ao-s1for
zwhMJwQ1W{kLLCl5mAk5_K_?Xyfvte>Q>ml}Kr1&?SjH#LaCnEgC;bS2QhQ$2zGkkG
zUZ!cTX{x8NYP$RwlOh`_sTYr%KFh{Tn1@+M!&#r`)mXkNcaPyfZCr3gd|(q<x-V}*
z8y_t7@sWa_V(`<rFo&h9v@tPt-JQMpvX~E{+R#S>%&|u?hMx`PM0sliu?S_{Va$jR
z%~|gdzr7lQP$HdgV7Kb>Sx?vt9o5!J8_FcTeq!YIeNlm#z^xJ*kjPcW1vhKU7u7~r
zzKNO<#N^XgE2Wa+biM012dgFnvn%<-WXpchmH~^x<>My2_e}`PQR7<<#}j2_Nv{ud
zm<&>Bm5tG-`jR`6`Ex!8=jo>CJQ<8uaEPSq_Ug+ewSIG#yqD&_C%TdxPqf5$qKbR#
zW))CbG!maz9mDf!cy`Sc{JqU{y_V~99;cyQf%R@HxUzqRjtKrnyeYcs+_Tx%rI_rN
z?*6ih<)CK|ePW>ZNES`QBsfLQbkIUn*}i^jD7w<k2(y{z;hjkej|UCJCUsE_9sIxZ
zRbV_KbksOk^AvX{-1{+*jaqhG)bEiN_{x0r-RQn%Qq`pB&rfrzi%P=-&r0Q{A!w)w
z*&ZRk3;L(^;A8wz^CaUWTC)1GQ3KA5LWmoQzkW97bvcn?t`EI6Z*&DiZt*$!RIZx$
zu+#*Fd0+tFdQ3$kk}2EA3igZ;YcR>!Nnn*C-2$14y}IJB`tjNy9b4!@<I2j$J?oT&
zoPkp1==PXwa`<i!vA^3d!S~42T<txjwX{-GN2I*fq3R)*{WSdPCDGRGCf=AAMv_)q
znFyTm{5VcAFSn+TWxS&Fo?H1ae}LQk>L)P!HHls!DznOe%?lM3hl=5pM?0?4hk1W~
zu&s@3!QlMol$T2{jlD!GIkK18Hnb-xro@4g*BFzf9v@pBCDy@i4aoKepNmL+R}bwY
z%Br9m>J}v~hKolMvg3R9qp~{8Pve!ZTwqr2#+4D2!?5Y)3bnRn_SV;}fTPbqckf}0
zT=%1X3Gp5W+n47i^6`$!-Hv$19?iWr4R&L*%YnjN1v@gC*`BZ4333dShArnqf6VA(
z2ODZ%D?e5?G!yh!rRbw<pGpf$3BLiDZTRS~?ykV)@0GsC9c}p{+!}e~altZT$V+b!
z*-#hnm8HAajR8Ii3DXM94rcdcXnf@<DzlZ0aGMcgvxm4w_XZJI?+k-fz_lL|>Fje*
z$uzIgkwtpy%E%(z#=}!%$wFH*`GJ$`AUfV&ChWtKu+MF!Z<-IcmTE<zl6+48ZsFxM
zh2Y5N_x+^8&ZdQH>_+c}3=*&!@saTt_MFG-eUoiUheG9F7We<SAss$<D7>?VBMQ<}
z-hKEIaNx)PRh9Es_ZdYwedv}`FYc<^yeQz{){jt>`hSf90vyTEF&^yZ3H-+Qp7CbL
zvf8#~`uy-Qllsvh)o1gUe><7oKW2Q^cLaSil>j`j$m*NdFpaAqLC!Ml{LlaRjW=xt
z;B%GabxS%TZ$?f&%2H7L9ZD}UcU+wUdI4dR&(jNl@wtm=0gA~XN{baWR@UFGq;A=k
zSWKt*j1c$RyM2ED7>`I0&Sr0)qz!Y_Z8v6;*7d?Hz$2O6g8LyCFe#A$V)koEroEeH
zZvilB1HrZeuXt`BZxF}@nLs|<tW|Y5DY0bo<8zw6J=BN}k&a$CZ>eX7Z!;T*5w)>!
zIm{p2b_5N!2DVTVe+g;^yWY7GCg20D&g$MOR*DcEftg>u!&;@y;GJ;_-~_R*B&WJn
z?&NS?Vqy3}Xx9<sX>GN!(58s@tr<^Y8%)h%zY*(^`(S0;l$T<&WAh++21Z`0(}4a}
zXkzvi<U4U%5v14aJ5SXL@rGDBZ8+eiwtoS6S+Nte1`K=lO{Z^;99U6%`=;`p!slux
zOH2B}fg5ZblDoR`z$OBAdvEzUJUo_H+M%^%mrd>h*04y?J+KMJkA4@A$mZKRYXmx;
zV`d}qieoPfmePb=r{}TZDW6ulxZZM6^z6xGQBWV+lyg2k9B35UvLc4LrH;ig*b76X
z{fDYSFsBC$cB->eoIZqs{Q(uX0`LrZMGtUh$^~c#ws9&2dm1F2dq_S{+jRDUz7RLu
s@&`)&Hy$?zZu>uSyNlixoQ<DYCk$5m-3A|n!{1;R4b1dQFF4=*KeaXjr~m)}

literal 0
HcmV?d00001

diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 61a0423..53747d4 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -34,8 +34,6 @@
 #include <rte_mbuf.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
 
 #include "gro_tcp4.h"
 
@@ -72,20 +70,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
 	}
 	tbl->max_item_num = entries_num;
 
-	size = sizeof(struct gro_tcp4_key) * entries_num;
-	tbl->keys = rte_zmalloc_socket(__func__,
+	size = sizeof(struct gro_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
 			size,
 			RTE_CACHE_LINE_SIZE,
 			socket_id);
-	if (tbl->keys == NULL) {
+	if (tbl->flows == NULL) {
 		rte_free(tbl->items);
 		rte_free(tbl);
 		return NULL;
 	}
-	/* INVALID_ARRAY_INDEX indicates empty key */
+	/* INVALID_ARRAY_INDEX indicates an empty flow */
 	for (i = 0; i < entries_num; i++)
-		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
-	tbl->max_key_num = entries_num;
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
 
 	return tbl;
 }
@@ -97,116 +95,15 @@ gro_tcp4_tbl_destroy(void *tbl)
 
 	if (tcp_tbl) {
 		rte_free(tcp_tbl->items);
-		rte_free(tcp_tbl->keys);
+		rte_free(tcp_tbl->flows);
 	}
 	rte_free(tcp_tbl);
 }
 
-/*
- * merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
-		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
-		int cmp)
-{
-	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t tcp_datalen;
-
-	if (cmp > 0) {
-		pkt_head = item_src->firstseg;
-		pkt_tail = pkt;
-	} else {
-		pkt_head = pkt;
-		pkt_tail = item_src->firstseg;
-	}
-
-	/* check if the packet length will be beyond the max value */
-	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
-		pkt_tail->l3_len - pkt_tail->l4_len;
-	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
-			TCP4_MAX_L3_LENGTH)
-		return 0;
-
-	/* remove packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail,
-			pkt_tail->l2_len +
-			pkt_tail->l3_len +
-			pkt_tail->l4_len);
-
-	/* chain two packets together */
-	if (cmp > 0) {
-		item_src->lastseg->next = pkt;
-		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
-		/* update IP ID to the larger value */
-		item_src->ip_id = ip_id;
-	} else {
-		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item_src->firstseg;
-		item_src->firstseg = pkt;
-		/* update sent_seq to the smaller value */
-		item_src->sent_seq = sent_seq;
-	}
-	item_src->nb_merged++;
-
-	/* update mbuf metadata for the merged packet */
-	pkt_head->nb_segs += pkt_tail->nb_segs;
-	pkt_head->pkt_len += pkt_tail->pkt_len;
-
-	return 1;
-}
-
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
-		struct tcp_hdr *tcp_hdr,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
-		uint16_t ip_id,
-		uint32_t sent_seq)
-{
-	struct rte_mbuf *pkt0 = item->firstseg;
-	struct ipv4_hdr *ipv4_hdr0;
-	struct tcp_hdr *tcp_hdr0;
-	uint16_t tcp_hl0, tcp_dl0;
-	uint16_t len;
-
-	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
-			pkt0->l2_len);
-	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
-	tcp_hl0 = pkt0->l4_len;
-
-	/* check if TCP option fields equal. If not, return 0. */
-	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
-	if ((tcp_hl != tcp_hl0) ||
-			((len > 0) && (memcmp(tcp_hdr + 1,
-					tcp_hdr0 + 1,
-					len) != 0)))
-		return 0;
-
-	/* check if the two packets are neighbors */
-	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
-	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
-			(ip_id == (item->ip_id + 1)))
-		/* append the new packet */
-		return 1;
-	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
-			((ip_id + item->nb_merged) == item->ip_id))
-		/* pre-pend the new packet */
-		return -1;
-	else
-		return 0;
-}
-
 static inline uint32_t
 find_an_empty_item(struct gro_tcp4_tbl *tbl)
 {
-	uint32_t i;
-	uint32_t max_item_num = tbl->max_item_num;
+	uint32_t max_item_num = tbl->max_item_num, i;
 
 	for (i = 0; i < max_item_num; i++)
 		if (tbl->items[i].firstseg == NULL)
@@ -215,13 +112,12 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
 }
 
 static inline uint32_t
-find_an_empty_key(struct gro_tcp4_tbl *tbl)
+find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 {
-	uint32_t i;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num, i;
 
-	for (i = 0; i < max_key_num; i++)
-		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
 			return i;
 	return INVALID_ARRAY_INDEX;
 }
@@ -229,10 +125,11 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
 static inline uint32_t
 insert_new_item(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
+		uint64_t start_time,
 		uint32_t prev_idx,
-		uint64_t start_time)
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint8_t is_atomic)
 {
 	uint32_t item_idx;
 
@@ -247,9 +144,10 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 	tbl->items[item_idx].sent_seq = sent_seq;
 	tbl->items[item_idx].ip_id = ip_id;
 	tbl->items[item_idx].nb_merged = 1;
+	tbl->items[item_idx].is_atomic = is_atomic;
 	tbl->item_num++;
 
-	/* if the previous packet exists, chain the new one with it */
+	/* If the previous packet exists, chain them together. */
 	if (prev_idx != INVALID_ARRAY_INDEX) {
 		tbl->items[item_idx].next_pkt_idx =
 			tbl->items[prev_idx].next_pkt_idx;
@@ -260,12 +158,13 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 }
 
 static inline uint32_t
-delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
+delete_item(struct gro_tcp4_tbl *tbl,
+		uint32_t item_idx,
 		uint32_t prev_item_idx)
 {
 	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
 
-	/* set NULL to firstseg to indicate it's an empty item */
+	/* NULL indicates an empty item. */
 	tbl->items[item_idx].firstseg = NULL;
 	tbl->item_num--;
 	if (prev_item_idx != INVALID_ARRAY_INDEX)
@@ -275,53 +174,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 }
 
 static inline uint32_t
-insert_new_key(struct gro_tcp4_tbl *tbl,
-		struct tcp4_key *key_src,
+insert_new_flow(struct gro_tcp4_tbl *tbl,
+		struct tcp4_flow_key *src,
 		uint32_t item_idx)
 {
-	struct tcp4_key *key_dst;
-	uint32_t key_idx;
+	struct tcp4_flow_key *dst;
+	uint32_t flow_idx;
 
-	key_idx = find_an_empty_key(tbl);
-	if (key_idx == INVALID_ARRAY_INDEX)
+	flow_idx = find_an_empty_flow(tbl);
+	if (flow_idx == INVALID_ARRAY_INDEX)
 		return INVALID_ARRAY_INDEX;
 
-	key_dst = &(tbl->keys[key_idx].key);
+	dst = &(tbl->flows[flow_idx].key);
 
-	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
-	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
-	key_dst->ip_src_addr = key_src->ip_src_addr;
-	key_dst->ip_dst_addr = key_src->ip_dst_addr;
-	key_dst->recv_ack = key_src->recv_ack;
-	key_dst->src_port = key_src->src_port;
-	key_dst->dst_port = key_src->dst_port;
+	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
+	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+	dst->ip_src_addr = src->ip_src_addr;
+	dst->ip_dst_addr = src->ip_dst_addr;
+	dst->recv_ack = src->recv_ack;
+	dst->src_port = src->src_port;
+	dst->dst_port = src->dst_port;
 
-	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
-	tbl->keys[key_idx].start_index = item_idx;
-	tbl->key_num++;
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
 
-	return key_idx;
+	return flow_idx;
 }
 
-static inline int
-is_same_key(struct tcp4_key k1, struct tcp4_key k2)
-{
-	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
-		return 0;
-
-	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
-		return 0;
-
-	return ((k1.ip_src_addr == k2.ip_src_addr) &&
-			(k1.ip_dst_addr == k2.ip_dst_addr) &&
-			(k1.recv_ack == k2.recv_ack) &&
-			(k1.src_port == k2.src_port) &&
-			(k1.dst_port == k2.dst_port));
-}
-
-/*
- * update packet length for the flushed packet.
- */
 static inline void
 update_header(struct gro_tcp4_item *item)
 {
@@ -343,84 +222,99 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	struct ipv4_hdr *ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
 	uint32_t sent_seq;
-	uint16_t tcp_dl, ip_id;
+	uint16_t tcp_dl, ip_id, frag_off, hdr_len;
+	uint8_t is_atomic;
 
-	struct tcp4_key key;
+	struct tcp4_flow_key key;
 	uint32_t cur_idx, prev_idx, item_idx;
-	uint32_t i, max_key_num;
+	uint32_t i, max_flow_num;
 	int cmp;
 
 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
 	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
 
 	/*
-	 * if FIN, SYN, RST, PSH, URG, ECE or
-	 * CWR is set, return immediately.
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+	 * or CWR set.
 	 */
 	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
 		return -1;
-	/* if payload length is 0, return immediately */
-	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
-		pkt->l4_len;
-	if (tcp_dl == 0)
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
 		return -1;
 
-	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+	/*
+	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IPv4 ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
 	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
 
 	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
 	ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr));
 	key.ip_src_addr = ipv4_hdr->src_addr;
 	key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.recv_ack = tcp_hdr->recv_ack;
 	key.src_port = tcp_hdr->src_port;
 	key.dst_port = tcp_hdr->dst_port;
-	key.recv_ack = tcp_hdr->recv_ack;
 
-	/* search for a key */
-	max_key_num = tbl->max_key_num;
-	for (i = 0; i < max_key_num; i++) {
-		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
-				is_same_key(tbl->keys[i].key, key))
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	for (i = 0; i < max_flow_num; i++) {
+		if ((tbl->flows[i].start_index != INVALID_ARRAY_INDEX) &&
+				is_same_tcp4_flow(tbl->flows[i].key, key))
 			break;
 	}
 
-	/* can't find a key, so insert a new key and a new item. */
-	if (i == tbl->max_key_num) {
-		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
-				INVALID_ARRAY_INDEX, start_time);
+	/*
+	 * Fail to find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (i == tbl->max_flow_num) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, ip_id,
+				is_atomic);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
-		if (insert_new_key(tbl, &key, item_idx) ==
+		if (insert_new_flow(tbl, &key, item_idx) ==
 				INVALID_ARRAY_INDEX) {
-			/*
-			 * fail to insert a new key, so
-			 * delete the inserted item
-			 */
+			/* Fail to insert a new flow. */
 			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
 			return -1;
 		}
 		return 0;
 	}
 
-	/* traverse all packets in the item group to find one to merge */
-	cur_idx = tbl->keys[i].start_index;
+	/*
+	 * Check all packets in the flow and try to find a neighbor for
+	 * the input packet.
+	 */
+	cur_idx = tbl->flows[i].start_index;
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				pkt->l4_len, tcp_dl, ip_id, sent_seq);
+				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
+				is_atomic);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, ip_id,
-						sent_seq, cmp))
+						pkt, cmp, sent_seq, ip_id, 0))
 				return 1;
 			/*
-			 * fail to merge two packets since the packet
-			 * length will be greater than the max value.
-			 * So insert the packet into the item group.
+			 * Fail to merge the two packets, as the packet
+			 * length is greater than the max value. Store
+			 * the packet into the flow.
 			 */
-			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
-						prev_idx, start_time) ==
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, ip_id,
+						is_atomic) ==
 					INVALID_ARRAY_INDEX)
 				return -1;
 			return 0;
@@ -429,12 +323,9 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		cur_idx = tbl->items[cur_idx].next_pkt_idx;
 	} while (cur_idx != INVALID_ARRAY_INDEX);
 
-	/*
-	 * can't find a packet in the item group to merge,
-	 * so insert the packet into the item group.
-	 */
-	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
-				start_time) == INVALID_ARRAY_INDEX)
+	/* Fail to find a neighbor, so store the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
 		return -1;
 
 	return 0;
@@ -446,46 +337,35 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf **out,
 		uint16_t nb_out)
 {
-	uint16_t k = 0;
+	uint32_t max_flow_num = tbl->max_flow_num;
 	uint32_t i, j;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint16_t k = 0;
 
-	for (i = 0; i < max_key_num; i++) {
-		/* all keys have been checked, return immediately */
-		if (tbl->key_num == 0)
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
 			return k;
 
-		j = tbl->keys[i].start_index;
+		j = tbl->flows[i].start_index;
 		while (j != INVALID_ARRAY_INDEX) {
 			if (tbl->items[j].start_time <= flush_timestamp) {
 				out[k++] = tbl->items[j].firstseg;
 				if (tbl->items[j].nb_merged > 1)
 					update_header(&(tbl->items[j]));
 				/*
-				 * delete the item and get
-				 * the next packet index
+				 * Delete the packet and get the next
+				 * packet in the flow.
 				 */
-				j = delete_item(tbl, j,
-						INVALID_ARRAY_INDEX);
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
 
-				/*
-				 * delete the key as all of
-				 * packets are flushed
-				 */
-				if (j == INVALID_ARRAY_INDEX) {
-					tbl->keys[i].start_index =
-						INVALID_ARRAY_INDEX;
-					tbl->key_num--;
-				} else
-					/* update start_index of the key */
-					tbl->keys[i].start_index = j;
-
-				if (k == nb_out)
+				if (unlikely(k == nb_out))
 					return k;
 			} else
 				/*
-				 * left packets of this key won't be
-				 * timeout, so go to check other keys.
+				 * The left packets in this flow won't be
+				 * timeout. Go to check other flows.
 				 */
 				break;
 		}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 0a81716..66d6ce9 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -33,17 +33,20 @@
 #ifndef _GRO_TCP4_H_
 #define _GRO_TCP4_H_
 
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
 /*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of the L3
+ * header, the L4 header and the data payload.
  */
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
 
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow */
+struct tcp4_flow_key {
 	struct ether_addr eth_saddr;
 	struct ether_addr eth_daddr;
 	uint32_t ip_src_addr;
@@ -54,77 +57,76 @@ struct tcp4_key {
 	uint16_t dst_port;
 };
 
-struct gro_tcp4_key {
-	struct tcp4_key key;
+struct gro_tcp4_flow {
+	struct tcp4_flow_key key;
 	/*
-	 * the index of the first packet in the item group.
-	 * If the value is INVALID_ARRAY_INDEX, it means
-	 * the key is empty.
+	 * The index of the first packet in the flow.
+	 * INVALID_ARRAY_INDEX indicates an empty flow.
 	 */
 	uint32_t start_index;
 };
 
 struct gro_tcp4_item {
 	/*
-	 * first segment of the packet. If the value
+	 * The first MBUF segment of the packet. If the value
 	 * is NULL, it means the item is empty.
 	 */
 	struct rte_mbuf *firstseg;
-	/* last segment of the packet */
+	/* The last MBUF segment of the packet */
 	struct rte_mbuf *lastseg;
 	/*
-	 * the time when the first packet is inserted
-	 * into the table. If a packet in the table is
-	 * merged with an incoming packet, this value
-	 * won't be updated. We set this value only
-	 * when the first packet is inserted into the
-	 * table.
+	 * The time when the first packet is inserted into the table.
+	 * This value won't be updated, even if the packet is merged
+	 * with other packets.
 	 */
 	uint64_t start_time;
 	/*
-	 * we use next_pkt_idx to chain the packets that
-	 * have same key value but can't be merged together.
+	 * next_pkt_idx is used to chain the packets that
+	 * are in the same flow but can't be merged together
+	 * (e.g. caused by packet reordering).
 	 */
 	uint32_t next_pkt_idx;
-	/* the sequence number of the packet */
+	/* TCP sequence number of the packet */
 	uint32_t sent_seq;
-	/* the IP ID of the packet */
+	/* IPv4 ID of the packet */
 	uint16_t ip_id;
-	/* the number of merged packets */
+	/* The number of merged packets */
 	uint16_t nb_merged;
+	/* Indicate if IPv4 ID can be ignored */
+	uint8_t is_atomic;
 };
 
 /*
- * TCP/IPv4 reassembly table structure.
+ * TCP/IPv4 reassembly table structure
  */
 struct gro_tcp4_tbl {
 	/* item array */
 	struct gro_tcp4_item *items;
-	/* key array */
-	struct gro_tcp4_key *keys;
+	/* flow array */
+	struct gro_tcp4_flow *flows;
 	/* current item number */
 	uint32_t item_num;
-	/* current key num */
-	uint32_t key_num;
+	/* current flow num */
+	uint32_t flow_num;
 	/* item array size */
 	uint32_t max_item_num;
-	/* key array size */
-	uint32_t max_key_num;
+	/* flow array size */
+	uint32_t max_flow_num;
 };
 
 /**
  * This function creates a TCP/IPv4 reassembly table.
  *
  * @param socket_id
- *  socket index for allocating TCP/IPv4 reassemble table
+ *  Socket index for allocating the TCP/IPv4 reassemble table
  * @param max_flow_num
- *  the maximum number of flows in the TCP/IPv4 GRO table
+ *  The maximum number of flows in the TCP/IPv4 GRO table
  * @param max_item_per_flow
- *  the maximum packet number per flow.
+ *  The maximum number of packets per flow
  *
  * @return
- *  if create successfully, return a pointer which points to the
- *  created TCP/IPv4 GRO table. Otherwise, return NULL.
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
  */
 void *gro_tcp4_tbl_create(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -134,62 +136,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
  * This function destroys a TCP/IPv4 reassembly table.
  *
  * @param tbl
- *  a pointer points to the TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table.
  */
 void gro_tcp4_tbl_destroy(void *tbl);
 
 /**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload.
  *
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==0). It returns the
+ * packet, if the packet has invalid parameters (e.g. SYN bit is set)
+ * or there is no available space in the table.
  *
  * @param pkt
- *  packet to reassemble.
+ *  Packet to reassemble
  * @param tbl
- *  a pointer that points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table
  * @start_time
- *  the start time that the packet is inserted into the table
+ *  The time when the packet is inserted into the table
  *
  * @return
- *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- *  or URG bit is set, or there is no available space in the table to
- *  insert a new item or a new key, return a negative value. If the
- *  packet is merged successfully, return an positive value. If the
- *  packet is inserted into the table, return 0.
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
  */
 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		struct gro_tcp4_tbl *tbl,
 		uint64_t start_time);
 
 /**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
  *
  * @param tbl
- *  a pointer that points to a TCP GRO table.
+ *  TCP/IPv4 reassembly table pointer
  * @param flush_timestamp
- *  this function flushes packets which are inserted into the table
- *  before or at the flush_timestamp.
+ *  Flush packets which are inserted into the table before or at the
+ *  flush_timestamp.
  * @param out
- *  pointer array which is used to keep flushed packets.
+ *  Pointer array used to keep flushed packets
  * @param nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number in 'out'. It also determines the maximum number of
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of packets that are returned.
+ *  The number of flushed packets
  */
 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		uint64_t flush_timestamp,
@@ -201,10 +197,131 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
  * reassembly table.
  *
  * @param tbl
- *  pointer points to a TCP/IPv4 reassembly table.
+ *  TCP/IPv4 reassembly table pointer
  *
  * @return
- *  the number of packets in the table
+ *  The number of packets in the table
  */
 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
+			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
+			(k1.ip_src_addr == k2.ip_src_addr) &&
+			(k1.ip_dst_addr == k2.ip_dst_addr) &&
+			(k1.recv_ack == k2.recv_ack) &&
+			(k1.src_port == k2.src_port) &&
+			(k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+		struct tcp_hdr *tcph,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint16_t l2_offset,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	struct ipv4_hdr *iph_orig;
+	struct tcp_hdr *tcph_orig;
+	uint16_t len, l4_len_orig;
+
+	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			l2_offset + pkt_orig->l2_len);
+	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+	l4_len_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
+	if ((tcp_hl != l4_len_orig) || ((len > 0) &&
+				(memcmp(tcph + 1, tcph_orig + 1,
+					len) != 0)))
+		return 0;
+
+	/* Don't merge packets whose DF bits are different */
+	if (unlikely(item->is_atomic ^ is_atomic))
+		return 0;
+
+	/* Check if the two packets are neighbors */
+	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+		pkt_orig->l3_len - l4_len_orig;
+	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
+				(ip_id == item->ip_id + item->nb_merged)))
+		/* Append the new packet */
+		return 1;
+	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
+				(ip_id + 1 == item->ip_id)))
+		/* Pre-pend the new packet */
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+	uint16_t hdr_len, l2_len;
+
+	if (cmp > 0) {
+		pkt_head = item->firstseg;
+		pkt_tail = pkt;
+	} else {
+		pkt_head = pkt;
+		pkt_tail = item->firstseg;
+	}
+
+	/* Check if the IPv4 packet length is greater than the max value */
+	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+		pkt_head->l4_len;
+	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
+	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - hdr_len >
+			MAX_IPV4_PKT_LENGTH))
+		return 0;
+
+	/* Remove the packet header */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+	/* Chain two packets together */
+	if (cmp > 0) {
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
+	} else {
+		lastseg = rte_pktmbuf_lastseg(pkt);
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
+		/* Update sent_seq and ip_id */
+		item->sent_seq = sent_seq;
+		item->ip_id = ip_id;
+	}
+	item->nb_merged++;
+
+	/* Update MBUF metadata for the merged packet */
+	pkt_head->nb_segs += pkt_tail->nb_segs;
+	pkt_head->pkt_len += pkt_tail->pkt_len;
+
+	return 1;
+}
 #endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 7853246..b3931a8 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -51,11 +51,14 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, NULL};
 
+#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
+
 /*
- * GRO context structure, which is used to merge packets. It keeps
- * many reassembly tables of desired GRO types. Applications need to
- * create GRO context objects before using rte_gro_reassemble to
- * perform GRO.
+ * GRO context structure. It keeps the table structures, which are
+ * used to merge packets, for different GRO types. Before using
+ * rte_gro_reassemble(), applications need to create the GRO context
+ * first.
  */
 struct gro_ctx {
 	/* GRO types to perform */
@@ -93,7 +96,7 @@ rte_gro_ctx_create(const struct rte_gro_param *param)
 				param->max_flow_num,
 				param->max_item_per_flow);
 		if (gro_ctx->tbls[i] == NULL) {
-			/* destroy all created tables */
+			/* Destroy all created tables */
 			gro_ctx->gro_types = gro_types;
 			rte_gro_ctx_destroy(gro_ctx);
 			return NULL;
@@ -113,8 +116,6 @@ rte_gro_ctx_destroy(void *ctx)
 	uint64_t gro_type_flag;
 	uint8_t i;
 
-	if (gro_ctx == NULL)
-		return;
 	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
 		gro_type_flag = 1ULL << i;
 		if ((gro_ctx->gro_types & gro_type_flag) == 0)
@@ -131,62 +132,54 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		const struct rte_gro_param *param)
 {
-	uint16_t i;
-	uint16_t nb_after_gro = nb_pkts;
-	uint32_t item_num;
-
-	/* allocate a reassembly table for TCP/IPv4 GRO */
+	/* Allocate a reassembly table for TCP/IPv4 GRO */
 	struct gro_tcp4_tbl tcp_tbl;
-	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
-	uint16_t unprocess_num = 0;
+	uint32_t item_num;
 	int32_t ret;
-	uint64_t current_time;
+	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
 
-	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
 		return nb_pkts;
 
-	/* get the actual number of packets */
+	/* Get the maximum number of packets */
 	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
-			param->max_item_per_flow));
+				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
 	for (i = 0; i < item_num; i++)
-		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
 
-	tcp_tbl.keys = tcp_keys;
+	tcp_tbl.flows = tcp_flows;
 	tcp_tbl.items = tcp_items;
-	tcp_tbl.key_num = 0;
+	tcp_tbl.flow_num = 0;
 	tcp_tbl.item_num = 0;
-	tcp_tbl.max_key_num = item_num;
+	tcp_tbl.max_flow_num = item_num;
 	tcp_tbl.max_item_num = item_num;
 
-	current_time = rte_rdtsc();
-
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			ret = gro_tcp4_reassemble(pkts[i],
-					&tcp_tbl,
-					current_time);
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			/*
+			 * The timestamp is ignored, since all packets
+			 * will be flushed from the tables.
+			 */
+			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
 			if (ret > 0)
-				/* merge successfully */
+				/* Merge successfully */
 				nb_after_gro--;
-			else if (ret < 0) {
-				unprocess_pkts[unprocess_num++] =
-					pkts[i];
-			}
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 
-	/* re-arrange GROed packets */
 	if (nb_after_gro < nb_pkts) {
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
-				pkts, nb_pkts);
+		/* Flush all packets from the tables */
+		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
+		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
 					sizeof(struct rte_mbuf *) *
@@ -202,31 +195,28 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		void *ctx)
 {
-	uint16_t i, unprocess_num = 0;
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
+	void *tcp_tbl;
 	uint64_t current_time;
+	uint16_t i, unprocess_num = 0;
 
-	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
 		return nb_pkts;
 
+	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			if (gro_tcp4_reassemble(pkts[i],
-						gro_ctx->tbls
-						[RTE_GRO_TCP_IPV4_INDEX],
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 	if (unprocess_num > 0) {
-		memcpy(pkts, unprocess_pkts,
-				sizeof(struct rte_mbuf *) *
+		memcpy(pkts, unprocess_pkts, sizeof(struct rte_mbuf *) *
 				unprocess_num);
 	}
 
@@ -252,6 +242,7 @@ rte_gro_timeout_flush(void *ctx,
 				flush_timestamp,
 				out, max_nb_out);
 	}
+
 	return 0;
 }
 
@@ -262,7 +253,7 @@ rte_gro_get_pkt_count(void *ctx)
 	gro_tbl_pkt_count_fn pkt_count_fn;
 	uint64_t item_num = 0;
 	uint64_t gro_type_flag;
-	uint8_t i;
+	uint8_t gro_type_num = RTE_GRO_TYPE_SUPPORT_NUM, i;
 
 	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
 		gro_type_flag = 1ULL << i;
@@ -270,9 +261,12 @@ rte_gro_get_pkt_count(void *ctx)
 			continue;
 
 		pkt_count_fn = tbl_pkt_count_fn[i];
-		if (pkt_count_fn == NULL)
-			continue;
-		item_num += pkt_count_fn(gro_ctx->tbls[i]);
+		if (pkt_count_fn) {
+			item_num += pkt_count_fn(gro_ctx->tbls[i]);
+			if (--gro_type_num == 0)
+				break;
+		}
 	}
+
 	return item_num;
 }
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index d57e0c5..36a1e60 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -59,8 +59,8 @@ extern "C" {
 /**< TCP/IPv4 GRO flag */
 
 /**
- * A structure which is used to create GRO context objects or tell
- * rte_gro_reassemble_burst() what reassembly rules are demanded.
+ * Structure used to create GRO context objects or used to pass
+ * application-determined parameters to rte_gro_reassemble_burst().
  */
 struct rte_gro_param {
 	uint64_t gro_types;
@@ -106,26 +106,23 @@ void rte_gro_ctx_destroy(void *ctx);
 
 /**
  * This is one of the main reassembly APIs, which merges numbers of
- * packets at a time. It assumes that all inputted packets are with
- * correct checksums. That is, applications should guarantee all
- * inputted packets are correct. Besides, it doesn't re-calculate
- * checksums for merged packets. If inputted packets are IP fragmented,
- * this function assumes them are complete (i.e. with L4 header). After
- * finishing processing, it returns all GROed packets to applications
- * immediately.
+ * packets at a time. It doesn't check if input packets have correct
+ * checksums and doesn't re-calculate checksums for merged packets.
+ * It assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==1). The GROed packets
+ * are returned as soon as the function finishes.
  *
  * @param pkts
- *  a pointer array which points to the packets to reassemble. Besides,
- *  it keeps mbuf addresses for the GROed packets.
+ *  Pointer array pointing to the packets to reassemble. Besides, it
+ *  keeps MBUF addresses for the GROed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param param
- *  applications use it to tell rte_gro_reassemble_burst() what rules
- *  are demanded.
+ *  Application-determined parameters for reassembling packets.
  *
  * @return
- *  the number of packets after been GROed. If no packets are merged,
- *  the returned value is nb_pkts.
+ *  The number of packets after been GROed. If no packets are merged,
+ *  the return value is equals to nb_pkts.
  */
 uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -135,32 +132,28 @@ uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * Reassembly function, which tries to merge inputted packets with
- * the packets in the reassembly tables of a given GRO context. This
- * function assumes all inputted packets are with correct checksums.
- * And it won't update checksums if two packets are merged. Besides,
- * if inputted packets are IP fragmented, this function assumes they
- * are complete packets (i.e. with L4 header).
+ * Reassembly function, which tries to merge input packets with the
+ * existed packets in the reassembly tables of a given GRO context.
+ * It doesn't check if input packets have correct checksums and doesn't
+ * re-calculate checksums for merged packets. Additionally, it assumes
+ * the packets are complete (i.e., MF==0 && frag_off==0), when IP
+ * fragmentation is possible (i.e., DF==1).
  *
- * If the inputted packets don't have data or are with unsupported GRO
- * types etc., they won't be processed and are returned to applications.
- * Otherwise, the inputted packets are either merged or inserted into
- * the table. If applications want get packets in the table, they need
- * to call flush API.
+ * If the input packets have invalid parameters (e.g. no data payload,
+ * unsupported GRO types), they are returned to applications. Otherwise,
+ * they are either merged or inserted into the table. Applications need
+ * to flush packets from the tables by flush API, if they want to get the
+ * GROed packets.
  *
  * @param pkts
- *  packet to reassemble. Besides, after this function finishes, it
- *  keeps the unprocessed packets (e.g. without data or unsupported
- *  GRO types).
+ *  Packets to reassemble. It's also used to store the unprocessed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer
  *
  * @return
- *  return the number of unprocessed packets (e.g. without data or
- *  unsupported GRO types). If all packets are processed (merged or
- *  inserted into the table), return 0.
+ *  The number of unprocessed packets.
  */
 uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -170,29 +163,28 @@ uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * This function flushes the timeout packets from reassembly tables of
- * desired GRO types. The max number of flushed timeout packets is the
- * element number of the array which is used to keep the flushed packets.
+ * This function flushes the timeout packets from the reassembly tables
+ * of desired GRO types. The max number of flushed packets is the
+ * element number of 'out'.
  *
- * Besides, this function won't re-calculate checksums for merged
- * packets in the tables. That is, the returned packets may be with
- * wrong checksums.
+ * Additionally, the flushed packets may have incorrect checksums, since
+ * this function doesn't re-calculate checksums for merged packets.
  *
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer.
  * @param timeout_cycles
- *  max TTL for packets in reassembly tables, measured in nanosecond.
+ *  The max TTL for packets in reassembly tables, measured in nanosecond.
  * @param gro_types
- *  this function only flushes packets which belong to the GRO types
- *  specified by gro_types.
+ *  This function flushes packets whose GRO types are specified by
+ *  gro_types.
  * @param out
- *  a pointer array that is used to keep flushed timeout packets.
+ *  Pointer array used to keep flushed packets.
  * @param max_nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number of 'out'. It's also the max number of timeout
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of flushed packets. If no packets are flushed, return 0.
+ *  The number of flushed packets.
  */
 uint16_t rte_gro_timeout_flush(void *ctx,
 		uint64_t timeout_cycles,
@@ -208,10 +200,10 @@ uint16_t rte_gro_timeout_flush(void *ctx,
  * of a given GRO context.
  *
  * @param ctx
- *  pointer points to a GRO context object.
+ *  GRO context object pointer.
  *
  * @return
- *  the number of packets in all reassembly tables.
+ *  The number of packets in the tables.
  */
 uint64_t rte_gro_get_pkt_count(void *ctx);
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 2/2] gro: support VxLAN GRO
  2017-12-22  7:25   ` [PATCH v3 0/2] Support " Jiayu Hu
  2017-12-22  7:25     ` [PATCH v3 1/2] gro: code cleanup Jiayu Hu
@ 2017-12-22  7:25     ` Jiayu Hu
  2017-12-22  8:17       ` Chen, Junjie J
  2017-12-29  3:53       ` Chen, Junjie J
  2017-12-29  3:52     ` [PATCH v3 0/2] Support " Chen, Junjie J
  2018-01-05  6:12     ` [PATCH v4 " Jiayu Hu
  3 siblings, 2 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-12-22  7:25 UTC (permalink / raw)
  To: dev
  Cc: jianfeng.tan, junjie.j.chen, konstantin.ananyev, stephen,
	ferruh.yigit, lei.a.yao, Jiayu Hu

This patch adds a framework that allows GRO on tunneled packets.
Furthermore, it leverages that framework to provide GRO support for
VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
IPv4 header, and contain an inner TCP/IPv4 packet.

VxLAN GRO doesn't check if input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it assumes
the packets are complete (i.e., MF==0 && frag_off==0), when IP
fragmentation is possible (i.e., DF==0).

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     |  31 +-
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_vxlan_tcp4.c                    | 515 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
 lib/librte_gro/rte_gro.c                           | 129 +++++-
 lib/librte_gro/rte_gro.h                           |   5 +-
 6 files changed, 837 insertions(+), 28 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index c2d7a41..078bec0 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
 fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
 6864 to process the IPv4 ID field.
 
-Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets and
+VxLAN packets which contain an outer IPv4 header and an inner TCP/IPv4
+packet.
 
 Two Sets of API
 ---------------
@@ -108,7 +110,8 @@ Reassembly Algorithm
 
 The reassembly algorithm is used for reassembling packets. In the GRO
 library, different GRO types can use different algorithms. In this
-section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
+and VxLAN GRO.
 
 Challenges
 ~~~~~~~~~~
@@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors include:
 - IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
   be increased by 1.
 
+VxLAN GRO
+---------
+
+The table structure used by VxLAN GRO, which is in charge of processing
+VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
+similar with that of TCP/IPv4 GRO. Differently, the header fields used
+to define a VxLAN flow include:
+
+- outer source and destination: Ethernet and IP address, UDP port
+
+- VxLAN header (VNI and flag)
+
+- inner source and destination: Ethernet and IP address, TCP port
+
+Header fields deciding if packets are neighbors include:
+
+- outer IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
+  outer IPv4 header is 0, should be increased by 1.
+
+- inner TCP sequence number
+
+- inner IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
+  inner IPv4 header is 0, should be increased by 1.
+
 .. note::
         We comply RFC 6864 to process the IPv4 ID field. Specifically,
         we check IPv4 ID fields for the packets whose DF bit is 0 and
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index eb423cc..0110455 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -45,6 +45,7 @@ LIBABIVER := 1
 # source files
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c
new file mode 100644
index 0000000..6567779
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.c
@@ -0,0 +1,515 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_udp.h>
+
+#include "gro_vxlan_tcp4.h"
+
+void *
+gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_vxlan_tcp4_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_vxlan_tcp4_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_vxlan_tcp4_tbl_destroy(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
+
+	if (vxlan_tbl) {
+		rte_free(vxlan_tbl->items);
+		rte_free(vxlan_tbl->flows);
+	}
+	rte_free(vxlan_tbl);
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_item_num = tbl->max_item_num, i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (tbl->items[i].inner_item.firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_flow_num = tbl->max_flow_num, i;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
+		struct rte_mbuf *pkt,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(tbl);
+	if (item_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	tbl->items[item_idx].inner_item.firstseg = pkt;
+	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
+	tbl->items[item_idx].inner_item.start_time = start_time;
+	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
+	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
+	tbl->items[item_idx].inner_item.ip_id = ip_id;
+	tbl->items[item_idx].inner_item.nb_merged = 1;
+	tbl->items[item_idx].inner_item.is_atomic = is_atomic;
+	tbl->items[item_idx].outer_ip_id = outer_ip_id;
+	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
+	tbl->item_num++;
+
+	/* If the previous packet exists, chain the new one with it. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		tbl->items[item_idx].inner_item.next_pkt_idx =
+			tbl->items[prev_idx].inner_item.next_pkt_idx;
+		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_vxlan_tcp4_tbl *tbl,
+		uint32_t item_idx,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
+
+	/* NULL indicates an empty item. */
+	tbl->items[item_idx].inner_item.firstseg = NULL;
+	tbl->item_num--;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
+		struct vxlan_tcp4_flow_key *src,
+		uint32_t item_idx)
+{
+	struct vxlan_tcp4_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (flow_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	ether_addr_copy(&(src->inner_key.eth_saddr),
+			&(dst->inner_key.eth_saddr));
+	ether_addr_copy(&(src->inner_key.eth_daddr),
+			&(dst->inner_key.eth_daddr));
+	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
+	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
+	dst->inner_key.recv_ack = src->inner_key.recv_ack;
+	dst->inner_key.src_port = src->inner_key.src_port;
+	dst->inner_key.dst_port = src->inner_key.dst_port;
+
+	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
+	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
+	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
+	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
+	dst->outer_ip_src_addr = src->outer_ip_src_addr;
+	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
+	dst->outer_src_port = src->outer_src_port;
+	dst->outer_dst_port = src->outer_dst_port;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+static inline int
+is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
+		struct vxlan_tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr) &&
+			is_same_ether_addr(&k1.outer_eth_daddr,
+				&k2.outer_eth_daddr) &&
+			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
+			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
+			(k1.outer_src_port == k2.outer_src_port) &&
+			(k1.outer_dst_port == k2.outer_dst_port) &&
+			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
+			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
+			is_same_tcp4_flow(k1.inner_key, k2.inner_key));
+}
+
+static inline int
+check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
+		struct tcp_hdr *tcp_hdr,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	int cmp;
+	uint16_t l2_offset;
+
+	/* Don't merge packets whose outer DF bits are different. */
+	if (unlikely(item->outer_is_atomic ^ outer_is_atomic))
+		return 0;
+
+	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
+	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
+			tcp_hl, tcp_dl, l2_offset, is_atomic);
+	if ((cmp == 1) && (outer_is_atomic ||
+				(outer_ip_id == item->outer_ip_id +
+				 item->inner_item.nb_merged)))
+		/* Append the packet. */
+		return 1;
+	else if ((cmp == -1) && (outer_is_atomic ||
+				(outer_ip_id + 1 == item->outer_ip_id)))
+		/* Prepend the packet. */
+		return -1;
+
+	return 0;
+}
+
+static inline int
+merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id)
+{
+	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
+				ip_id, pkt->outer_l2_len +
+				pkt->outer_l3_len)) {
+		item->outer_ip_id = cmp < 0 ? outer_ip_id : item->outer_ip_id;
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void
+update_vxlan_header(struct gro_vxlan_tcp4_item *item)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct udp_hdr *udp_hdr;
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	uint16_t len;
+
+	/* Update the outer IPv4 header. */
+	len = pkt->pkt_len - pkt->outer_l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Update the outer UDP header. */
+	len -= pkt->outer_l3_len;
+	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
+
+	/* Update the inner IPv4 header. */
+	len -= pkt->l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+}
+
+int32_t
+gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time)
+{
+	struct ether_hdr *outer_eth_hdr, *eth_hdr;
+	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
+	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
+	struct vxlan_hdr *vxlan_hdr;
+	uint32_t sent_seq;
+	uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
+	uint8_t outer_is_atomic, is_atomic;
+
+	struct vxlan_tcp4_flow_key key;
+	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t i, max_flow_num;
+	int cmp;
+	uint16_t hdr_len;
+
+	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
+			pkt->outer_l2_len);
+	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
+			pkt->outer_l3_len);
+	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
+			sizeof(struct udp_hdr));
+	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
+			sizeof(struct vxlan_hdr));
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+
+	/*
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG,
+	 * ECE or CWR set.
+	 */
+	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
+		return -1;
+
+	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len + pkt->l4_len;
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
+		return -1;
+
+	/*
+	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IPv4 ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
+	outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	outer_ip_id = outer_is_atomic ? 0 :
+		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
+	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
+	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
+	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.inner_key.recv_ack = tcp_hdr->recv_ack;
+	key.inner_key.src_port = tcp_hdr->src_port;
+	key.inner_key.dst_port = tcp_hdr->dst_port;
+
+	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
+	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
+	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
+	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
+	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
+	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
+	key.outer_src_port = udp_hdr->src_port;
+	key.outer_dst_port = udp_hdr->dst_port;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	for (i = 0; i < max_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX &&
+				is_same_vxlan_tcp4_flow(tbl->flows[i].key,
+					key))
+			break;
+	}
+
+	/*
+	 * Can't find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (i == tbl->max_flow_num) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
+				ip_id, outer_is_atomic, is_atomic);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+				INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so
+			 * delete the inserted packet.
+			 */
+			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+
+	/* Check all packets in the flow and try to find a neighbor. */
+	cur_idx = tbl->flows[i].start_index;
+	prev_idx = cur_idx;
+	do {
+		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
+				sent_seq, outer_ip_id, ip_id, pkt->l4_len,
+				tcp_dl, outer_is_atomic, is_atomic);
+		if (cmp) {
+			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
+						pkt, cmp, sent_seq,
+						outer_ip_id, ip_id))
+				return 1;
+			/*
+			 * Can't merge two packets, as the packet
+			 * length will be greater than the max value.
+			 * Insert the packet into the flow.
+			 */
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, outer_ip_id,
+						ip_id, outer_is_atomic,
+						is_atomic) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+		prev_idx = cur_idx;
+		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Can't find neighbor. Insert the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				outer_ip_id, ip_id, outer_is_atomic,
+				is_atomic) == INVALID_ARRAY_INDEX)
+		return -1;
+
+	return 0;
+}
+
+uint16_t
+gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].inner_item.start_time <=
+					flush_timestamp) {
+				out[k++] = tbl->items[j].inner_item.firstseg;
+				if (tbl->items[j].inner_item.nb_merged > 1)
+					update_vxlan_header(&(tbl->items[j]));
+				/*
+				 * Delete the item and get the next packet
+				 * index.
+				 */
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in the flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_vxlan_tcp4_tbl_pkt_count(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
new file mode 100644
index 0000000..66baf73
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.h
@@ -0,0 +1,184 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GRO_VXLAN_TCP4_H_
+#define _GRO_VXLAN_TCP4_H_
+
+#include "gro_tcp4.h"
+
+#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a VxLAN flow */
+struct vxlan_tcp4_flow_key {
+	struct tcp4_flow_key inner_key;
+	struct vxlan_hdr vxlan_hdr;
+
+	struct ether_addr outer_eth_saddr;
+	struct ether_addr outer_eth_daddr;
+
+	uint32_t outer_ip_src_addr;
+	uint32_t outer_ip_dst_addr;
+
+	/* Outer UDP ports */
+	uint16_t outer_src_port;
+	uint16_t outer_dst_port;
+
+};
+
+struct gro_vxlan_tcp4_flow {
+	struct vxlan_tcp4_flow_key key;
+	/*
+	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
+	 * indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+struct gro_vxlan_tcp4_item {
+	struct gro_tcp4_item inner_item;
+	/* IPv4 ID in the outer IPv4 header */
+	uint16_t outer_ip_id;
+	/* Indicate if outer IPv4 ID can be ignored */
+	uint8_t outer_is_atomic;
+};
+
+/*
+ * VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet)
+ * reassembly table structure
+ */
+struct gro_vxlan_tcp4_tbl {
+	/* item array */
+	struct gro_vxlan_tcp4_item *items;
+	/* flow array */
+	struct gro_vxlan_tcp4_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow number */
+	uint32_t flow_num;
+	/* the maximum item number */
+	uint32_t max_item_num;
+	/* the maximum flow number */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a VxLAN reassembly table for VxLAN packets
+ * which have an outer IPv4 header and an inner TCP/IPv4 packet.
+ *
+ * @param socket_id
+ *  Socket index for allocating the table
+ * @param max_flow_num
+ *  The maximum number of flows in the table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a VxLAN reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ */
+void gro_vxlan_tcp4_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a VxLAN packet which has an outer IPv4 header and
+ * an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP
+ * header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which
+ * doesn't have payload.
+ *
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0), when
+ * IP fragmentation is possible (i.e., DF==0). It returns the packet, if
+ * the packet has invalid parameters (e.g. SYN bit is set) or there is no
+ * available space in the table.
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
+ */
+int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in the VxLAN reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  Pointer pointing to a VxLAN GRO table
+ * @param flush_timestamp
+ *  This function flushes packets which are inserted into the table
+ *  before or at the flush_timestamp.
+ * @param out
+ *  Pointer array used to keep flushed packets
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets
+ */
+uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a VxLAN
+ * reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ *
+ * @return
+ *  The number of packets in the table
+ */
+uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index b3931a8..5a26893 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -37,6 +37,7 @@
 
 #include "rte_gro.h"
 #include "gro_tcp4.h"
+#include "gro_vxlan_tcp4.h"
 
 typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -45,15 +46,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
 typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
-		gro_tcp4_tbl_create, NULL};
+		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_destroy, NULL};
+			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
+			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_pkt_count, NULL};
+			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
+			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
 		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
 
+#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
+		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
+		 RTE_PTYPE_TUNNEL_VXLAN) && \
+		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
+		  RTE_PTYPE_INNER_L4_TCP) && \
+		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
+		    (RTE_PTYPE_INNER_L3_IPV4 | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
+
 /*
  * GRO context structure. It keeps the table structures, which are
  * used to merge packets, for different GRO types. Before using
@@ -137,12 +151,20 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
+	/* Allocate a reassembly table for VXLAN GRO */
+	struct gro_vxlan_tcp4_tbl vxlan_tbl;
+	struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
+		{{0}, 0, 0} };
+
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	uint32_t item_num;
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
+	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0;
 
-	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
+	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0))
 		return nb_pkts;
 
 	/* Get the maximum number of packets */
@@ -150,22 +172,47 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
-	for (i = 0; i < item_num; i++)
-		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_tbl.flows = vxlan_flows;
+		vxlan_tbl.items = vxlan_items;
+		vxlan_tbl.flow_num = 0;
+		vxlan_tbl.item_num = 0;
+		vxlan_tbl.max_flow_num = item_num;
+		vxlan_tbl.max_item_num = item_num;
+		do_vxlan_gro = 1;
+	}
 
-	tcp_tbl.flows = tcp_flows;
-	tcp_tbl.items = tcp_items;
-	tcp_tbl.flow_num = 0;
-	tcp_tbl.item_num = 0;
-	tcp_tbl.max_flow_num = item_num;
-	tcp_tbl.max_item_num = item_num;
+	if (param->gro_types & RTE_GRO_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		tcp_tbl.flows = tcp_flows;
+		tcp_tbl.items = tcp_items;
+		tcp_tbl.flow_num = 0;
+		tcp_tbl.item_num = 0;
+		tcp_tbl.max_flow_num = item_num;
+		tcp_tbl.max_item_num = item_num;
+		do_tcp4_gro = 1;
+	}
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
-			/*
-			 * The timestamp is ignored, since all packets
-			 * will be flushed from the tables.
-			 */
+		/*
+		 * The timestamp is ignored, since all packets
+		 * will be flushed from the tables.
+		 */
+		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_gro) {
+			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0);
+			if (ret > 0)
+				/* Merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp4_gro) {
 			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
 			if (ret > 0)
 				/* Merge successfully */
@@ -177,8 +224,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	}
 
 	if (nb_after_gro < nb_pkts) {
+		i = 0;
 		/* Flush all packets from the tables */
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
+		if (do_vxlan_gro) {
+			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
+					0, pkts, nb_pkts);
+		}
+		if (do_tcp4_gro) {
+			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
+					&pkts[i], nb_pkts - i);
+		}
 		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
@@ -197,18 +252,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 {
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
-	void *tcp_tbl;
+	void *tcp_tbl, *vxlan_tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
+	uint8_t do_tcp4_gro, do_vxlan_gro;
 
-	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
+	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0))
 		return nb_pkts;
 
 	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
+	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
+
+	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
+		RTE_GRO_TCP_IPV4;
+	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
+		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
+
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_gro) {
+			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp4_gro) {
 			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
@@ -232,18 +302,27 @@ rte_gro_timeout_flush(void *ctx,
 {
 	struct gro_ctx *gro_ctx = ctx;
 	uint64_t flush_timestamp;
+	uint16_t num = 0;
 
 	gro_types = gro_types & gro_ctx->gro_types;
 	flush_timestamp = rte_rdtsc() - timeout_cycles;
 
-	if (gro_types & RTE_GRO_TCP_IPV4) {
-		return gro_tcp4_tbl_timeout_flush(
+	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
+				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
+				flush_timestamp, out, max_nb_out);
+		max_nb_out -= num;
+	}
+
+	/* If no available space in 'out', stop flushing. */
+	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
+		num += gro_tcp4_tbl_timeout_flush(
 				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
 				flush_timestamp,
-				out, max_nb_out);
+				&out[num], max_nb_out);
 	}
 
-	return 0;
+	return num;
 }
 
 uint64_t
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index 36a1e60..5ed72d7 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -51,12 +51,15 @@ extern "C" {
  */
 #define RTE_GRO_TYPE_MAX_NUM 64
 /**< the max number of supported GRO types */
-#define RTE_GRO_TYPE_SUPPORT_NUM 1
+#define RTE_GRO_TYPE_SUPPORT_NUM 2
 /**< the number of currently supported GRO types */
 
 #define RTE_GRO_TCP_IPV4_INDEX 0
 #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)
 /**< TCP/IPv4 GRO flag */
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
+/**< VxLAN GRO flag. */
 
 /**
  * Structure used to create GRO context objects or used to pass
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 2/2] gro: support VxLAN GRO
  2017-12-22  7:25     ` [PATCH v3 2/2] gro: support VxLAN GRO Jiayu Hu
@ 2017-12-22  8:17       ` Chen, Junjie J
  2017-12-25  6:36         ` Jiayu Hu
  2017-12-29  3:53       ` Chen, Junjie J
  1 sibling, 1 reply; 31+ messages in thread
From: Chen, Junjie J @ 2017-12-22  8:17 UTC (permalink / raw)
  To: Hu, Jiayu, dev
  Cc: Tan, Jianfeng, Ananyev, Konstantin, stephen, Yigit, Ferruh, Yao, Lei A

Hi Jiayu

> -----Original Message-----
> From: Hu, Jiayu
> Sent: Friday, December 22, 2017 3:26 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Chen, Junjie J
> <junjie.j.chen@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org; Yigit,
> Ferruh <ferruh.yigit@intel.com>; Yao, Lei A <lei.a.yao@intel.com>; Hu, Jiayu
> <jiayu.hu@intel.com>
> Subject: [PATCH v3 2/2] gro: support VxLAN GRO
> 
> This patch adds a framework that allows GRO on tunneled packets.
> Furthermore, it leverages that framework to provide GRO support for
> VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
> IPv4 header, and contain an inner TCP/IPv4 packet.
> 
> VxLAN GRO doesn't check if input packets have correct checksums and
> doesn't update checksums for output packets. Additionally, it assumes the
> packets are complete (i.e., MF==0 && frag_off==0), when IP fragmentation is
> possible (i.e., DF==0).
> 
> Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> ---
>  .../prog_guide/generic_receive_offload_lib.rst     |  31 +-
>  lib/librte_gro/Makefile                            |   1 +
>  lib/librte_gro/gro_vxlan_tcp4.c                    | 515
> +++++++++++++++++++++
>  lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
>  lib/librte_gro/rte_gro.c                           | 129 +++++-
>  lib/librte_gro/rte_gro.h                           |   5 +-
>  6 files changed, 837 insertions(+), 28 deletions(-)  create mode 100644
> lib/librte_gro/gro_vxlan_tcp4.c  create mode 100644
> lib/librte_gro/gro_vxlan_tcp4.h
> 
> diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> index c2d7a41..078bec0 100644
> --- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> +++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> @@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 &&
> frag_off==0), when IP  fragmentation is possible (i.e., DF==0). Additionally, it
> complies RFC
>  6864 to process the IPv4 ID field.
> 
> -Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
> +Currently, the GRO library provides GRO supports for TCP/IPv4 packets
> +and VxLAN packets which contain an outer IPv4 header and an inner
> +TCP/IPv4 packet.
> 
>  Two Sets of API
>  ---------------
> @@ -108,7 +110,8 @@ Reassembly Algorithm
> 
>  The reassembly algorithm is used for reassembling packets. In the GRO
> library, different GRO types can use different algorithms. In this -section, we
> will introduce an algorithm, which is used by TCP/IPv4 GRO.
> +section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
> +and VxLAN GRO.
> 
>  Challenges
>  ~~~~~~~~~~
> @@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors
> include:
>  - IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
>    be increased by 1.
> 
> +VxLAN GRO
> +---------
> +
> +The table structure used by VxLAN GRO, which is in charge of processing
> +VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
> +similar with that of TCP/IPv4 GRO. Differently, the header fields used
> +to define a VxLAN flow include:
> +
> +- outer source and destination: Ethernet and IP address, UDP port
> +
> +- VxLAN header (VNI and flag)
> +
> +- inner source and destination: Ethernet and IP address, TCP port
> +
> +Header fields deciding if packets are neighbors include:
> +
> +- outer IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
> +  outer IPv4 header is 0, should be increased by 1.
> +
> +- inner TCP sequence number
> +
> +- inner IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
> +  inner IPv4 header is 0, should be increased by 1.
> +
>  .. note::
>          We comply RFC 6864 to process the IPv4 ID field. Specifically,
>          we check IPv4 ID fields for the packets whose DF bit is 0 and diff
> --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile index
> eb423cc..0110455 100644
> --- a/lib/librte_gro/Makefile
> +++ b/lib/librte_gro/Makefile
> @@ -45,6 +45,7 @@ LIBABIVER := 1
>  # source files
>  SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
>  SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
> +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
> 
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h diff --git
> a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c new file
> mode 100644 index 0000000..6567779
> --- /dev/null
> +++ b/lib/librte_gro/gro_vxlan_tcp4.c
> @@ -0,0 +1,515 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products
> derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <rte_malloc.h>
> +#include <rte_mbuf.h>
> +#include <rte_cycles.h>
> +#include <rte_ethdev.h>
> +#include <rte_udp.h>
> +
> +#include "gro_vxlan_tcp4.h"
> +
> +void *
> +gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
> +		uint16_t max_flow_num,
> +		uint16_t max_item_per_flow)
> +{
> +	struct gro_vxlan_tcp4_tbl *tbl;
> +	size_t size;
> +	uint32_t entries_num, i;
> +
> +	entries_num = max_flow_num * max_item_per_flow;
> +	entries_num = RTE_MIN(entries_num,
> GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
> +
> +	if (entries_num == 0)
> +		return NULL;
> +
> +	tbl = rte_zmalloc_socket(__func__,
> +			sizeof(struct gro_vxlan_tcp4_tbl),
> +			RTE_CACHE_LINE_SIZE,
> +			socket_id);
> +	if (tbl == NULL)
> +		return NULL;
> +
> +	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
> +	tbl->items = rte_zmalloc_socket(__func__,
> +			size,
> +			RTE_CACHE_LINE_SIZE,
> +			socket_id);
> +	if (tbl->items == NULL) {
> +		rte_free(tbl);
> +		return NULL;
> +	}
> +	tbl->max_item_num = entries_num;
> +
> +	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
> +	tbl->flows = rte_zmalloc_socket(__func__,
> +			size,
> +			RTE_CACHE_LINE_SIZE,
> +			socket_id);
> +	if (tbl->flows == NULL) {
> +		rte_free(tbl->items);
> +		rte_free(tbl);
> +		return NULL;
> +	}
> +
> +	for (i = 0; i < entries_num; i++)
> +		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
> +	tbl->max_flow_num = entries_num;
> +
> +	return tbl;
> +}
> +
> +void
> +gro_vxlan_tcp4_tbl_destroy(void *tbl)
> +{
> +	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
> +
> +	if (vxlan_tbl) {
> +		rte_free(vxlan_tbl->items);
> +		rte_free(vxlan_tbl->flows);
> +	}
> +	rte_free(vxlan_tbl);

What if vxlan_tbl=NULL, free? This should be moved into if block and may also add NULL check for items and flows also.

> +}
> +
> +static inline uint32_t
> +find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl) {
> +	uint32_t max_item_num = tbl->max_item_num, i;
> +
> +	for (i = 0; i < max_item_num; i++)
> +		if (tbl->items[i].inner_item.firstseg == NULL)
> +			return i;
> +	return INVALID_ARRAY_INDEX;
> +}
> +
> +static inline uint32_t
> +find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl) {
> +	uint32_t max_flow_num = tbl->max_flow_num, i;
> +
> +	for (i = 0; i < max_flow_num; i++)
> +		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
> +			return i;
> +	return INVALID_ARRAY_INDEX;
> +}
> +
> +static inline uint32_t
> +insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
> +		struct rte_mbuf *pkt,
> +		uint64_t start_time,
> +		uint32_t prev_idx,
> +		uint32_t sent_seq,
> +		uint16_t outer_ip_id,
> +		uint16_t ip_id,
> +		uint8_t outer_is_atomic,
> +		uint8_t is_atomic)
> +{
> +	uint32_t item_idx;
> +
> +	item_idx = find_an_empty_item(tbl);
> +	if (item_idx == INVALID_ARRAY_INDEX)
> +		return INVALID_ARRAY_INDEX;
> +
> +	tbl->items[item_idx].inner_item.firstseg = pkt;
> +	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
> +	tbl->items[item_idx].inner_item.start_time = start_time;
> +	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
> +	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
> +	tbl->items[item_idx].inner_item.ip_id = ip_id;
> +	tbl->items[item_idx].inner_item.nb_merged = 1;
> +	tbl->items[item_idx].inner_item.is_atomic = is_atomic;
> +	tbl->items[item_idx].outer_ip_id = outer_ip_id;
> +	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
> +	tbl->item_num++;
> +
> +	/* If the previous packet exists, chain the new one with it. */
> +	if (prev_idx != INVALID_ARRAY_INDEX) {
> +		tbl->items[item_idx].inner_item.next_pkt_idx =
> +			tbl->items[prev_idx].inner_item.next_pkt_idx;
> +		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
> +	}
> +
> +	return item_idx;
> +}
> +
> +static inline uint32_t
> +delete_item(struct gro_vxlan_tcp4_tbl *tbl,
> +		uint32_t item_idx,
> +		uint32_t prev_item_idx)
> +{
> +	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
> +
> +	/* NULL indicates an empty item. */
> +	tbl->items[item_idx].inner_item.firstseg = NULL;
> +	tbl->item_num--;
> +	if (prev_item_idx != INVALID_ARRAY_INDEX)
> +		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
> +
> +	return next_idx;
> +}
> +
> +static inline uint32_t
> +insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
> +		struct vxlan_tcp4_flow_key *src,
> +		uint32_t item_idx)
> +{
> +	struct vxlan_tcp4_flow_key *dst;
> +	uint32_t flow_idx;
> +
> +	flow_idx = find_an_empty_flow(tbl);
> +	if (flow_idx == INVALID_ARRAY_INDEX)
> +		return INVALID_ARRAY_INDEX;
> +
> +	dst = &(tbl->flows[flow_idx].key);
> +
> +	ether_addr_copy(&(src->inner_key.eth_saddr),
> +			&(dst->inner_key.eth_saddr));
> +	ether_addr_copy(&(src->inner_key.eth_daddr),
> +			&(dst->inner_key.eth_daddr));
> +	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
> +	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
> +	dst->inner_key.recv_ack = src->inner_key.recv_ack;
> +	dst->inner_key.src_port = src->inner_key.src_port;
> +	dst->inner_key.dst_port = src->inner_key.dst_port;
> +
> +	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
> +	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
> +	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
> +	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
> +	dst->outer_ip_src_addr = src->outer_ip_src_addr;
> +	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
> +	dst->outer_src_port = src->outer_src_port;
> +	dst->outer_dst_port = src->outer_dst_port;
> +
> +	tbl->flows[flow_idx].start_index = item_idx;
> +	tbl->flow_num++;
> +
> +	return flow_idx;
> +}
> +
> +static inline int
> +is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
> +		struct vxlan_tcp4_flow_key k2)
> +{
> +	return (is_same_ether_addr(&k1.outer_eth_saddr,
> &k2.outer_eth_saddr) &&
> +			is_same_ether_addr(&k1.outer_eth_daddr,
> +				&k2.outer_eth_daddr) &&
> +			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
> +			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
> +			(k1.outer_src_port == k2.outer_src_port) &&
> +			(k1.outer_dst_port == k2.outer_dst_port) &&
> +			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
> +			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
> +			is_same_tcp4_flow(k1.inner_key, k2.inner_key)); }
> +
> +static inline int
> +check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
> +		struct tcp_hdr *tcp_hdr,
> +		uint32_t sent_seq,
> +		uint16_t outer_ip_id,
> +		uint16_t ip_id,
> +		uint16_t tcp_hl,
> +		uint16_t tcp_dl,
> +		uint8_t outer_is_atomic,
> +		uint8_t is_atomic)
> +{
> +	struct rte_mbuf *pkt = item->inner_item.firstseg;
> +	int cmp;
> +	uint16_t l2_offset;
> +
> +	/* Don't merge packets whose outer DF bits are different. */
> +	if (unlikely(item->outer_is_atomic ^ outer_is_atomic))
> +		return 0;
> +
> +	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> +	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
> +			tcp_hl, tcp_dl, l2_offset, is_atomic);
> +	if ((cmp == 1) && (outer_is_atomic ||
> +				(outer_ip_id == item->outer_ip_id +
> +				 item->inner_item.nb_merged)))
> +		/* Append the packet. */
> +		return 1;
> +	else if ((cmp == -1) && (outer_is_atomic ||
> +				(outer_ip_id + 1 == item->outer_ip_id)))
> +		/* Prepend the packet. */
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static inline int
> +merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
> +		struct rte_mbuf *pkt,
> +		int cmp,
> +		uint32_t sent_seq,
> +		uint16_t outer_ip_id,
> +		uint16_t ip_id)
> +{
> +	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
> +				ip_id, pkt->outer_l2_len +
> +				pkt->outer_l3_len)) {
> +		item->outer_ip_id = cmp < 0 ? outer_ip_id : item->outer_ip_id;
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline void
> +update_vxlan_header(struct gro_vxlan_tcp4_item *item) {
> +	struct ipv4_hdr *ipv4_hdr;
> +	struct udp_hdr *udp_hdr;
> +	struct rte_mbuf *pkt = item->inner_item.firstseg;
> +	uint16_t len;
> +
> +	/* Update the outer IPv4 header. */
> +	len = pkt->pkt_len - pkt->outer_l2_len;
> +	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> +			pkt->outer_l2_len);
> +	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
> +
> +	/* Update the outer UDP header. */
> +	len -= pkt->outer_l3_len;
> +	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
> +	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
> +
> +	/* Update the inner IPv4 header. */
> +	len -= pkt->l2_len;
> +	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
> +	ipv4_hdr->total_length = rte_cpu_to_be_16(len); }
> +
> +int32_t
> +gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> +		struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t start_time)
> +{
> +	struct ether_hdr *outer_eth_hdr, *eth_hdr;
> +	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
> +	struct tcp_hdr *tcp_hdr;
> +	struct udp_hdr *udp_hdr;
> +	struct vxlan_hdr *vxlan_hdr;
> +	uint32_t sent_seq;
> +	uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
> +	uint8_t outer_is_atomic, is_atomic;
> +
> +	struct vxlan_tcp4_flow_key key;
> +	uint32_t cur_idx, prev_idx, item_idx;
> +	uint32_t i, max_flow_num;
> +	int cmp;
> +	uint16_t hdr_len;
> +
> +	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
> +	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
> +			pkt->outer_l2_len);
> +	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
> +			pkt->outer_l3_len);
> +	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
> +			sizeof(struct udp_hdr));
> +	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
> +			sizeof(struct vxlan_hdr));
> +	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
> +	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +
> +	/*
> +	 * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> +	 * ECE or CWR set.
> +	 */
> +	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
> +		return -1;
> +
> +	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
> +		pkt->l3_len + pkt->l4_len;
> +	/*
> +	 * Don't process the packet whose payload length is less than or
> +	 * equal to 0.
> +	 */
> +	tcp_dl = pkt->pkt_len - hdr_len;
> +	if (tcp_dl <= 0)
> +		return -1;
> +
> +	/*
> +	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
> +	 * whose DF bit is 1, IPv4 ID is ignored.
> +	 */
> +	frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
> +	outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) ==
> IPV4_HDR_DF_FLAG;
> +	outer_ip_id = outer_is_atomic ? 0 :
> +		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
> +	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
> +	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
> +	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
> +
> +	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +
> +	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
> +	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
> +	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
> +	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
> +	key.inner_key.recv_ack = tcp_hdr->recv_ack;
> +	key.inner_key.src_port = tcp_hdr->src_port;
> +	key.inner_key.dst_port = tcp_hdr->dst_port;
> +
> +	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
> +	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
> +	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
> +	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
> +	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
> +	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
> +	key.outer_src_port = udp_hdr->src_port;
> +	key.outer_dst_port = udp_hdr->dst_port;
> +
> +	/* Search for a matched flow. */
> +	max_flow_num = tbl->max_flow_num;
> +	for (i = 0; i < max_flow_num; i++) {
> +		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX &&
> +				is_same_vxlan_tcp4_flow(tbl->flows[i].key,
> +					key))
> +			break;
> +	}
> +
> +	/*
> +	 * Can't find a matched flow. Insert a new flow and store the
> +	 * packet into the flow.
> +	 */
> +	if (i == tbl->max_flow_num) {
> +		item_idx = insert_new_item(tbl, pkt, start_time,
> +				INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
> +				ip_id, outer_is_atomic, is_atomic);
> +		if (item_idx == INVALID_ARRAY_INDEX)
> +			return -1;
> +		if (insert_new_flow(tbl, &key, item_idx) ==
> +				INVALID_ARRAY_INDEX) {
> +			/*
> +			 * Fail to insert a new flow, so
> +			 * delete the inserted packet.
> +			 */
> +			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
> +			return -1;
> +		}
> +		return 0;
> +	}
> +
> +	/* Check all packets in the flow and try to find a neighbor. */
> +	cur_idx = tbl->flows[i].start_index;
> +	prev_idx = cur_idx;
> +	do {
> +		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> +				sent_seq, outer_ip_id, ip_id, pkt->l4_len,
> +				tcp_dl, outer_is_atomic, is_atomic);
> +		if (cmp) {
> +			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
> +						pkt, cmp, sent_seq,
> +						outer_ip_id, ip_id))
> +				return 1;
> +			/*
> +			 * Can't merge two packets, as the packet
> +			 * length will be greater than the max value.
> +			 * Insert the packet into the flow.
> +			 */
> +			if (insert_new_item(tbl, pkt, start_time, prev_idx,
> +						sent_seq, outer_ip_id,
> +						ip_id, outer_is_atomic,
> +						is_atomic) ==
> +					INVALID_ARRAY_INDEX)
> +				return -1;
> +			return 0;
> +		}
> +		prev_idx = cur_idx;
> +		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
> +	} while (cur_idx != INVALID_ARRAY_INDEX);
> +
> +	/* Can't find neighbor. Insert the packet into the flow. */
> +	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> +				outer_ip_id, ip_id, outer_is_atomic,
> +				is_atomic) == INVALID_ARRAY_INDEX)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +uint16_t
> +gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t flush_timestamp,
> +		struct rte_mbuf **out,
> +		uint16_t nb_out)
> +{
> +	uint16_t k = 0;
> +	uint32_t i, j;
> +	uint32_t max_flow_num = tbl->max_flow_num;
> +
> +	for (i = 0; i < max_flow_num; i++) {
> +		if (unlikely(tbl->flow_num == 0))
> +			return k;
> +
> +		j = tbl->flows[i].start_index;
> +		while (j != INVALID_ARRAY_INDEX) {
> +			if (tbl->items[j].inner_item.start_time <=
> +					flush_timestamp) {
> +				out[k++] = tbl->items[j].inner_item.firstseg;
> +				if (tbl->items[j].inner_item.nb_merged > 1)
> +					update_vxlan_header(&(tbl->items[j]));
> +				/*
> +				 * Delete the item and get the next packet
> +				 * index.
> +				 */
> +				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
> +				tbl->flows[i].start_index = j;
> +				if (j == INVALID_ARRAY_INDEX)
> +					tbl->flow_num--;
> +
> +				if (unlikely(k == nb_out))
> +					return k;
> +			} else
> +				/*
> +				 * The left packets in the flow won't be
> +				 * timeout. Go to check other flows.
> +				 */
> +				break;
> +		}
> +	}
> +	return k;
> +}
> +
> +uint32_t
> +gro_vxlan_tcp4_tbl_pkt_count(void *tbl) {
> +	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
> +
> +	if (gro_tbl)
> +		return gro_tbl->item_num;
> +
> +	return 0;
> +}
> diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
> new file mode 100644 index 0000000..66baf73
> --- /dev/null
> +++ b/lib/librte_gro/gro_vxlan_tcp4.h
> @@ -0,0 +1,184 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products
> derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#ifndef _GRO_VXLAN_TCP4_H_
> +#define _GRO_VXLAN_TCP4_H_
> +
> +#include "gro_tcp4.h"
> +
> +#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> +
> +/* Header fields representing a VxLAN flow */ struct
> +vxlan_tcp4_flow_key {
> +	struct tcp4_flow_key inner_key;
> +	struct vxlan_hdr vxlan_hdr;
> +
> +	struct ether_addr outer_eth_saddr;
> +	struct ether_addr outer_eth_daddr;
> +
> +	uint32_t outer_ip_src_addr;
> +	uint32_t outer_ip_dst_addr;
> +
> +	/* Outer UDP ports */
> +	uint16_t outer_src_port;
> +	uint16_t outer_dst_port;
> +
> +};
> +
> +struct gro_vxlan_tcp4_flow {
> +	struct vxlan_tcp4_flow_key key;
> +	/*
> +	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
> +	 * indicates an empty flow.
> +	 */
> +	uint32_t start_index;
> +};
> +
> +struct gro_vxlan_tcp4_item {
> +	struct gro_tcp4_item inner_item;
> +	/* IPv4 ID in the outer IPv4 header */
> +	uint16_t outer_ip_id;
> +	/* Indicate if outer IPv4 ID can be ignored */
> +	uint8_t outer_is_atomic;
> +};
> +
> +/*
> + * VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet)
> + * reassembly table structure
> + */
> +struct gro_vxlan_tcp4_tbl {
> +	/* item array */
> +	struct gro_vxlan_tcp4_item *items;
> +	/* flow array */
> +	struct gro_vxlan_tcp4_flow *flows;
> +	/* current item number */
> +	uint32_t item_num;
> +	/* current flow number */
> +	uint32_t flow_num;
> +	/* the maximum item number */
> +	uint32_t max_item_num;
> +	/* the maximum flow number */
> +	uint32_t max_flow_num;
> +};
> +
> +/**
> + * This function creates a VxLAN reassembly table for VxLAN packets
> + * which have an outer IPv4 header and an inner TCP/IPv4 packet.
> + *
> + * @param socket_id
> + *  Socket index for allocating the table
> + * @param max_flow_num
> + *  The maximum number of flows in the table
> + * @param max_item_per_flow
> + *  The maximum number of packets per flow
> + *
> + * @return
> + *  - Return the table pointer on success.
> + *  - Return NULL on failure.
> + */
> +void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
> +		uint16_t max_flow_num,
> +		uint16_t max_item_per_flow);
> +
> +/**
> + * This function destroys a VxLAN reassembly table.
> + *
> + * @param tbl
> + *  Pointer pointing to the VxLAN reassembly table  */ void
> +gro_vxlan_tcp4_tbl_destroy(void *tbl);
> +
> +/**
> + * This function merges a VxLAN packet which has an outer IPv4 header
> +and
> + * an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP
> + * header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which
> + * doesn't have payload.
> + *
> + * This function doesn't check if the packet has correct checksums and
> + * doesn't re-calculate checksums for the merged packet. Additionally,
> + * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
> +when
> + * IP fragmentation is possible (i.e., DF==0). It returns the packet,
> +if
> + * the packet has invalid parameters (e.g. SYN bit is set) or there is
> +no
> + * available space in the table.
> + *
> + * @param pkt
> + *  Packet to reassemble
> + * @param tbl
> + *  Pointer pointing to the VxLAN reassembly table
> + * @start_time
> + *  The time when the packet is inserted into the table
> + *
> + * @return
> + *  - Return a positive value if the packet is merged.
> + *  - Return zero if the packet isn't merged but stored in the table.
> + *  - Return a negative value for invalid parameters or no available
> + *    space in the table.
> + */
> +int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> +		struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t start_time);
> +
> +/**
> + * This function flushes timeout packets in the VxLAN reassembly table,
> + * and without updating checksums.
> + *
> + * @param tbl
> + *  Pointer pointing to a VxLAN GRO table
> + * @param flush_timestamp
> + *  This function flushes packets which are inserted into the table
> + *  before or at the flush_timestamp.
> + * @param out
> + *  Pointer array used to keep flushed packets
> + * @param nb_out
> + *  The element number in 'out'. It also determines the maximum number
> +of
> + *  packets that can be flushed finally.
> + *
> + * @return
> + *  The number of flushed packets
> + */
> +uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t flush_timestamp,
> +		struct rte_mbuf **out,
> +		uint16_t nb_out);
> +
> +/**
> + * This function returns the number of the packets in a VxLAN
> + * reassembly table.
> + *
> + * @param tbl
> + *  Pointer pointing to the VxLAN reassembly table
> + *
> + * @return
> + *  The number of packets in the table
> + */
> +uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl); #endif
> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c index
> b3931a8..5a26893 100644
> --- a/lib/librte_gro/rte_gro.c
> +++ b/lib/librte_gro/rte_gro.c
> @@ -37,6 +37,7 @@
> 
>  #include "rte_gro.h"
>  #include "gro_tcp4.h"
> +#include "gro_vxlan_tcp4.h"
> 
>  typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
>  		uint16_t max_flow_num,
> @@ -45,15 +46,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
> typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
> 
>  static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
> -		gro_tcp4_tbl_create, NULL};
> +		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
>  static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
> -			gro_tcp4_tbl_destroy, NULL};
> +			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
> +			NULL};
>  static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM]
> = {
> -			gro_tcp4_tbl_pkt_count, NULL};
> +			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
> +			NULL};
> 
>  #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
>  		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
> 
> +#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype)
> && \
> +		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
> +		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
> +		 RTE_PTYPE_TUNNEL_VXLAN) && \
> +		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
> +		  RTE_PTYPE_INNER_L4_TCP) && \
> +		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
> +		    (RTE_PTYPE_INNER_L3_IPV4 | \
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
> +
>  /*
>   * GRO context structure. It keeps the table structures, which are
>   * used to merge packets, for different GRO types. Before using @@ -137,12
> +151,20 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>  	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] =
> {{0} };
> 
> +	/* Allocate a reassembly table for VXLAN GRO */
> +	struct gro_vxlan_tcp4_tbl vxlan_tbl;
> +	struct gro_vxlan_tcp4_flow
> vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> +	struct gro_vxlan_tcp4_item
> vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
> +		{{0}, 0, 0} };
> +
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	uint32_t item_num;
>  	int32_t ret;
>  	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> +	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0;
> 
> -	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
> +	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> +					RTE_GRO_TCP_IPV4)) == 0))
>  		return nb_pkts;
> 
>  	/* Get the maximum number of packets */ @@ -150,22 +172,47 @@
> rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  				param->max_item_per_flow));
>  	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
> 
> -	for (i = 0; i < item_num; i++)
> -		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> +	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> +		for (i = 0; i < item_num; i++)
> +			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
> +
> +		vxlan_tbl.flows = vxlan_flows;
> +		vxlan_tbl.items = vxlan_items;
> +		vxlan_tbl.flow_num = 0;
> +		vxlan_tbl.item_num = 0;
> +		vxlan_tbl.max_flow_num = item_num;
> +		vxlan_tbl.max_item_num = item_num;
> +		do_vxlan_gro = 1;
> +	}
> 
> -	tcp_tbl.flows = tcp_flows;
> -	tcp_tbl.items = tcp_items;
> -	tcp_tbl.flow_num = 0;
> -	tcp_tbl.item_num = 0;
> -	tcp_tbl.max_flow_num = item_num;
> -	tcp_tbl.max_item_num = item_num;
> +	if (param->gro_types & RTE_GRO_TCP_IPV4) {
> +		for (i = 0; i < item_num; i++)
> +			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> +
> +		tcp_tbl.flows = tcp_flows;
> +		tcp_tbl.items = tcp_items;
> +		tcp_tbl.flow_num = 0;
> +		tcp_tbl.item_num = 0;
> +		tcp_tbl.max_flow_num = item_num;
> +		tcp_tbl.max_item_num = item_num;
> +		do_tcp4_gro = 1;
> +	}
> 
>  	for (i = 0; i < nb_pkts; i++) {
> -		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> -			/*
> -			 * The timestamp is ignored, since all packets
> -			 * will be flushed from the tables.
> -			 */
> +		/*
> +		 * The timestamp is ignored, since all packets
> +		 * will be flushed from the tables.
> +		 */
> +		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> +				do_vxlan_gro) {
> +			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0);
> +			if (ret > 0)
> +				/* Merge successfully */
> +				nb_after_gro--;
> +			else if (ret < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
> +		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> +				do_tcp4_gro) {
>  			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
>  			if (ret > 0)
>  				/* Merge successfully */
> @@ -177,8 +224,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  	}
> 
>  	if (nb_after_gro < nb_pkts) {
> +		i = 0;
>  		/* Flush all packets from the tables */
> -		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
> +		if (do_vxlan_gro) {
> +			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
> +					0, pkts, nb_pkts);
> +		}
> +		if (do_tcp4_gro) {
> +			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
> +					&pkts[i], nb_pkts - i);
> +		}
>  		/* Copy unprocessed packets */
>  		if (unprocess_num > 0) {
>  			memcpy(&pkts[i], unprocess_pkts,
> @@ -197,18 +252,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,  {
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	struct gro_ctx *gro_ctx = ctx;
> -	void *tcp_tbl;
> +	void *tcp_tbl, *vxlan_tbl;
>  	uint64_t current_time;
>  	uint16_t i, unprocess_num = 0;
> +	uint8_t do_tcp4_gro, do_vxlan_gro;
> 
> -	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
> +	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> +					RTE_GRO_TCP_IPV4)) == 0))
>  		return nb_pkts;
> 
>  	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
> +	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> +
> +	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
> +		RTE_GRO_TCP_IPV4;
> +	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4)
> ==
> +		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
> +
>  	current_time = rte_rdtsc();
> 
>  	for (i = 0; i < nb_pkts; i++) {
> -		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> +		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> +				do_vxlan_gro) {
> +			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
> +						current_time) < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
> +		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> +				do_tcp4_gro) {
>  			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
>  						current_time) < 0)
>  				unprocess_pkts[unprocess_num++] = pkts[i]; @@ -232,18
> +302,27 @@ rte_gro_timeout_flush(void *ctx,  {
>  	struct gro_ctx *gro_ctx = ctx;
>  	uint64_t flush_timestamp;
> +	uint16_t num = 0;
> 
>  	gro_types = gro_types & gro_ctx->gro_types;
>  	flush_timestamp = rte_rdtsc() - timeout_cycles;
> 
> -	if (gro_types & RTE_GRO_TCP_IPV4) {
> -		return gro_tcp4_tbl_timeout_flush(
> +	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> +		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
> +				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
> +				flush_timestamp, out, max_nb_out);
> +		max_nb_out -= num;
> +	}
> +
> +	/* If no available space in 'out', stop flushing. */
> +	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
> +		num += gro_tcp4_tbl_timeout_flush(
>  				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
>  				flush_timestamp,
> -				out, max_nb_out);
> +				&out[num], max_nb_out);
>  	}
> 
> -	return 0;
> +	return num;
>  }
> 
>  uint64_t
> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h index
> 36a1e60..5ed72d7 100644
> --- a/lib/librte_gro/rte_gro.h
> +++ b/lib/librte_gro/rte_gro.h
> @@ -51,12 +51,15 @@ extern "C" {
>   */
>  #define RTE_GRO_TYPE_MAX_NUM 64
>  /**< the max number of supported GRO types */ -#define
> RTE_GRO_TYPE_SUPPORT_NUM 1
> +#define RTE_GRO_TYPE_SUPPORT_NUM 2
>  /**< the number of currently supported GRO types */
> 
>  #define RTE_GRO_TCP_IPV4_INDEX 0
>  #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)  /**<
> TCP/IPv4 GRO flag */
> +#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1 #define
> +RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL <<
> RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
> +/**< VxLAN GRO flag. */
> 
>  /**
>   * Structure used to create GRO context objects or used to pass
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 2/2] gro: support VxLAN GRO
  2017-12-22  8:17       ` Chen, Junjie J
@ 2017-12-25  6:36         ` Jiayu Hu
  0 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2017-12-25  6:36 UTC (permalink / raw)
  To: Chen, Junjie J
  Cc: dev, Tan, Jianfeng, Ananyev, Konstantin, stephen, Yigit, Ferruh,
	Yao, Lei A

Hi Junjie,

On Fri, Dec 22, 2017 at 04:17:50PM +0800, Chen, Junjie J wrote:
> Hi Jiayu
> 
> > -----Original Message-----
> > From: Hu, Jiayu
> > Sent: Friday, December 22, 2017 3:26 PM
> > To: dev@dpdk.org
> > Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Chen, Junjie J
> > <junjie.j.chen@intel.com>; Ananyev, Konstantin
> > <konstantin.ananyev@intel.com>; stephen@networkplumber.org; Yigit,
> > Ferruh <ferruh.yigit@intel.com>; Yao, Lei A <lei.a.yao@intel.com>; Hu, Jiayu
> > <jiayu.hu@intel.com>
> > Subject: [PATCH v3 2/2] gro: support VxLAN GRO
> > 
> > This patch adds a framework that allows GRO on tunneled packets.
> > Furthermore, it leverages that framework to provide GRO support for
> > VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
> > IPv4 header, and contain an inner TCP/IPv4 packet.
> > 
> > VxLAN GRO doesn't check if input packets have correct checksums and
> > doesn't update checksums for output packets. Additionally, it assumes the
> > packets are complete (i.e., MF==0 && frag_off==0), when IP fragmentation is
> > possible (i.e., DF==0).
> > 
> > Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> > ---
> >  .../prog_guide/generic_receive_offload_lib.rst     |  31 +-
> >  lib/librte_gro/Makefile                            |   1 +
> >  lib/librte_gro/gro_vxlan_tcp4.c                    | 515
> > +++++++++++++++++++++
> >  lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
> >  lib/librte_gro/rte_gro.c                           | 129 +++++-
> >  lib/librte_gro/rte_gro.h                           |   5 +-
> >  6 files changed, 837 insertions(+), 28 deletions(-)  create mode 100644
> > lib/librte_gro/gro_vxlan_tcp4.c  create mode 100644
> > lib/librte_gro/gro_vxlan_tcp4.h
> > 
> > diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> > b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> > index c2d7a41..078bec0 100644
> > --- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> > +++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> > @@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 &&
> > frag_off==0), when IP  fragmentation is possible (i.e., DF==0). Additionally, it
> > complies RFC
> >  6864 to process the IPv4 ID field.
> > 
> > -Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
> > +Currently, the GRO library provides GRO supports for TCP/IPv4 packets
> > +and VxLAN packets which contain an outer IPv4 header and an inner
> > +TCP/IPv4 packet.
> > 
> >  Two Sets of API
> >  ---------------
> > @@ -108,7 +110,8 @@ Reassembly Algorithm
> > 
> >  The reassembly algorithm is used for reassembling packets. In the GRO
> > library, different GRO types can use different algorithms. In this -section, we
> > will introduce an algorithm, which is used by TCP/IPv4 GRO.
> > +section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
> > +and VxLAN GRO.
> > 
> >  Challenges
> >  ~~~~~~~~~~
> > @@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors
> > include:
> >  - IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
> >    be increased by 1.
> > 
> > +VxLAN GRO
> > +---------
> > +
> > +The table structure used by VxLAN GRO, which is in charge of processing
> > +VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
> > +similar with that of TCP/IPv4 GRO. Differently, the header fields used
> > +to define a VxLAN flow include:
> > +
> > +- outer source and destination: Ethernet and IP address, UDP port
> > +
> > +- VxLAN header (VNI and flag)
> > +
> > +- inner source and destination: Ethernet and IP address, TCP port
> > +
> > +Header fields deciding if packets are neighbors include:
> > +
> > +- outer IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
> > +  outer IPv4 header is 0, should be increased by 1.
> > +
> > +- inner TCP sequence number
> > +
> > +- inner IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
> > +  inner IPv4 header is 0, should be increased by 1.
> > +
> >  .. note::
> >          We comply RFC 6864 to process the IPv4 ID field. Specifically,
> >          we check IPv4 ID fields for the packets whose DF bit is 0 and diff
> > --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile index
> > eb423cc..0110455 100644
> > --- a/lib/librte_gro/Makefile
> > +++ b/lib/librte_gro/Makefile
> > @@ -45,6 +45,7 @@ LIBABIVER := 1
> >  # source files
> >  SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
> >  SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
> > 
> >  # install this header file
> >  SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h diff --git
> > a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c new file
> > mode 100644 index 0000000..6567779
> > --- /dev/null
> > +++ b/lib/librte_gro/gro_vxlan_tcp4.c
> > @@ -0,0 +1,515 @@
> > +/*-
> > + *   BSD LICENSE
> > + *
> > + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copyright
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of Intel Corporation nor the names of its
> > + *       contributors may be used to endorse or promote products
> > derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> > BUT NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> > LOSS OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> > AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> > TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> > OF THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> > + */
> > +
> > +#include <rte_malloc.h>
> > +#include <rte_mbuf.h>
> > +#include <rte_cycles.h>
> > +#include <rte_ethdev.h>
> > +#include <rte_udp.h>
> > +
> > +#include "gro_vxlan_tcp4.h"
> > +
> > +void *
> > +gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
> > +		uint16_t max_flow_num,
> > +		uint16_t max_item_per_flow)
> > +{
> > +	struct gro_vxlan_tcp4_tbl *tbl;
> > +	size_t size;
> > +	uint32_t entries_num, i;
> > +
> > +	entries_num = max_flow_num * max_item_per_flow;
> > +	entries_num = RTE_MIN(entries_num,
> > GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
> > +
> > +	if (entries_num == 0)
> > +		return NULL;
> > +
> > +	tbl = rte_zmalloc_socket(__func__,
> > +			sizeof(struct gro_vxlan_tcp4_tbl),
> > +			RTE_CACHE_LINE_SIZE,
> > +			socket_id);
> > +	if (tbl == NULL)
> > +		return NULL;
> > +
> > +	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
> > +	tbl->items = rte_zmalloc_socket(__func__,
> > +			size,
> > +			RTE_CACHE_LINE_SIZE,
> > +			socket_id);
> > +	if (tbl->items == NULL) {
> > +		rte_free(tbl);
> > +		return NULL;
> > +	}
> > +	tbl->max_item_num = entries_num;
> > +
> > +	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
> > +	tbl->flows = rte_zmalloc_socket(__func__,
> > +			size,
> > +			RTE_CACHE_LINE_SIZE,
> > +			socket_id);
> > +	if (tbl->flows == NULL) {
> > +		rte_free(tbl->items);
> > +		rte_free(tbl);
> > +		return NULL;
> > +	}
> > +
> > +	for (i = 0; i < entries_num; i++)
> > +		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
> > +	tbl->max_flow_num = entries_num;
> > +
> > +	return tbl;
> > +}
> > +
> > +void
> > +gro_vxlan_tcp4_tbl_destroy(void *tbl)
> > +{
> > +	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
> > +
> > +	if (vxlan_tbl) {
> > +		rte_free(vxlan_tbl->items);
> > +		rte_free(vxlan_tbl->flows);
> > +	}
> > +	rte_free(vxlan_tbl);
> 
> What if vxlan_tbl=NULL, free? This should be moved into if block and may also add NULL check for items and flows also.

Since rte_free() supports to check the NULL parameter, we don't
need to check that for vxlan_tbl/items/flows.

Thanks,
Jiayu

> 
> > +}
> > +
> > +static inline uint32_t
> > +find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl) {
> > +	uint32_t max_item_num = tbl->max_item_num, i;
> > +
> > +	for (i = 0; i < max_item_num; i++)
> > +		if (tbl->items[i].inner_item.firstseg == NULL)
> > +			return i;
> > +	return INVALID_ARRAY_INDEX;
> > +}
> > +
> > +static inline uint32_t
> > +find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl) {
> > +	uint32_t max_flow_num = tbl->max_flow_num, i;
> > +
> > +	for (i = 0; i < max_flow_num; i++)
> > +		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
> > +			return i;
> > +	return INVALID_ARRAY_INDEX;
> > +}
> > +
> > +static inline uint32_t
> > +insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
> > +		struct rte_mbuf *pkt,
> > +		uint64_t start_time,
> > +		uint32_t prev_idx,
> > +		uint32_t sent_seq,
> > +		uint16_t outer_ip_id,
> > +		uint16_t ip_id,
> > +		uint8_t outer_is_atomic,
> > +		uint8_t is_atomic)
> > +{
> > +	uint32_t item_idx;
> > +
> > +	item_idx = find_an_empty_item(tbl);
> > +	if (item_idx == INVALID_ARRAY_INDEX)
> > +		return INVALID_ARRAY_INDEX;
> > +
> > +	tbl->items[item_idx].inner_item.firstseg = pkt;
> > +	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
> > +	tbl->items[item_idx].inner_item.start_time = start_time;
> > +	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
> > +	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
> > +	tbl->items[item_idx].inner_item.ip_id = ip_id;
> > +	tbl->items[item_idx].inner_item.nb_merged = 1;
> > +	tbl->items[item_idx].inner_item.is_atomic = is_atomic;
> > +	tbl->items[item_idx].outer_ip_id = outer_ip_id;
> > +	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
> > +	tbl->item_num++;
> > +
> > +	/* If the previous packet exists, chain the new one with it. */
> > +	if (prev_idx != INVALID_ARRAY_INDEX) {
> > +		tbl->items[item_idx].inner_item.next_pkt_idx =
> > +			tbl->items[prev_idx].inner_item.next_pkt_idx;
> > +		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
> > +	}
> > +
> > +	return item_idx;
> > +}
> > +
> > +static inline uint32_t
> > +delete_item(struct gro_vxlan_tcp4_tbl *tbl,
> > +		uint32_t item_idx,
> > +		uint32_t prev_item_idx)
> > +{
> > +	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
> > +
> > +	/* NULL indicates an empty item. */
> > +	tbl->items[item_idx].inner_item.firstseg = NULL;
> > +	tbl->item_num--;
> > +	if (prev_item_idx != INVALID_ARRAY_INDEX)
> > +		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
> > +
> > +	return next_idx;
> > +}
> > +
> > +static inline uint32_t
> > +insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
> > +		struct vxlan_tcp4_flow_key *src,
> > +		uint32_t item_idx)
> > +{
> > +	struct vxlan_tcp4_flow_key *dst;
> > +	uint32_t flow_idx;
> > +
> > +	flow_idx = find_an_empty_flow(tbl);
> > +	if (flow_idx == INVALID_ARRAY_INDEX)
> > +		return INVALID_ARRAY_INDEX;
> > +
> > +	dst = &(tbl->flows[flow_idx].key);
> > +
> > +	ether_addr_copy(&(src->inner_key.eth_saddr),
> > +			&(dst->inner_key.eth_saddr));
> > +	ether_addr_copy(&(src->inner_key.eth_daddr),
> > +			&(dst->inner_key.eth_daddr));
> > +	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
> > +	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
> > +	dst->inner_key.recv_ack = src->inner_key.recv_ack;
> > +	dst->inner_key.src_port = src->inner_key.src_port;
> > +	dst->inner_key.dst_port = src->inner_key.dst_port;
> > +
> > +	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
> > +	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
> > +	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
> > +	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
> > +	dst->outer_ip_src_addr = src->outer_ip_src_addr;
> > +	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
> > +	dst->outer_src_port = src->outer_src_port;
> > +	dst->outer_dst_port = src->outer_dst_port;
> > +
> > +	tbl->flows[flow_idx].start_index = item_idx;
> > +	tbl->flow_num++;
> > +
> > +	return flow_idx;
> > +}
> > +
> > +static inline int
> > +is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
> > +		struct vxlan_tcp4_flow_key k2)
> > +{
> > +	return (is_same_ether_addr(&k1.outer_eth_saddr,
> > &k2.outer_eth_saddr) &&
> > +			is_same_ether_addr(&k1.outer_eth_daddr,
> > +				&k2.outer_eth_daddr) &&
> > +			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
> > +			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
> > +			(k1.outer_src_port == k2.outer_src_port) &&
> > +			(k1.outer_dst_port == k2.outer_dst_port) &&
> > +			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
> > +			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
> > +			is_same_tcp4_flow(k1.inner_key, k2.inner_key)); }
> > +
> > +static inline int
> > +check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
> > +		struct tcp_hdr *tcp_hdr,
> > +		uint32_t sent_seq,
> > +		uint16_t outer_ip_id,
> > +		uint16_t ip_id,
> > +		uint16_t tcp_hl,
> > +		uint16_t tcp_dl,
> > +		uint8_t outer_is_atomic,
> > +		uint8_t is_atomic)
> > +{
> > +	struct rte_mbuf *pkt = item->inner_item.firstseg;
> > +	int cmp;
> > +	uint16_t l2_offset;
> > +
> > +	/* Don't merge packets whose outer DF bits are different. */
> > +	if (unlikely(item->outer_is_atomic ^ outer_is_atomic))
> > +		return 0;
> > +
> > +	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> > +	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
> > +			tcp_hl, tcp_dl, l2_offset, is_atomic);
> > +	if ((cmp == 1) && (outer_is_atomic ||
> > +				(outer_ip_id == item->outer_ip_id +
> > +				 item->inner_item.nb_merged)))
> > +		/* Append the packet. */
> > +		return 1;
> > +	else if ((cmp == -1) && (outer_is_atomic ||
> > +				(outer_ip_id + 1 == item->outer_ip_id)))
> > +		/* Prepend the packet. */
> > +		return -1;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> > +merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
> > +		struct rte_mbuf *pkt,
> > +		int cmp,
> > +		uint32_t sent_seq,
> > +		uint16_t outer_ip_id,
> > +		uint16_t ip_id)
> > +{
> > +	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
> > +				ip_id, pkt->outer_l2_len +
> > +				pkt->outer_l3_len)) {
> > +		item->outer_ip_id = cmp < 0 ? outer_ip_id : item->outer_ip_id;
> > +		return 1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static inline void
> > +update_vxlan_header(struct gro_vxlan_tcp4_item *item) {
> > +	struct ipv4_hdr *ipv4_hdr;
> > +	struct udp_hdr *udp_hdr;
> > +	struct rte_mbuf *pkt = item->inner_item.firstseg;
> > +	uint16_t len;
> > +
> > +	/* Update the outer IPv4 header. */
> > +	len = pkt->pkt_len - pkt->outer_l2_len;
> > +	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> > +			pkt->outer_l2_len);
> > +	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
> > +
> > +	/* Update the outer UDP header. */
> > +	len -= pkt->outer_l3_len;
> > +	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
> > +	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
> > +
> > +	/* Update the inner IPv4 header. */
> > +	len -= pkt->l2_len;
> > +	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
> > +	ipv4_hdr->total_length = rte_cpu_to_be_16(len); }
> > +
> > +int32_t
> > +gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> > +		struct gro_vxlan_tcp4_tbl *tbl,
> > +		uint64_t start_time)
> > +{
> > +	struct ether_hdr *outer_eth_hdr, *eth_hdr;
> > +	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
> > +	struct tcp_hdr *tcp_hdr;
> > +	struct udp_hdr *udp_hdr;
> > +	struct vxlan_hdr *vxlan_hdr;
> > +	uint32_t sent_seq;
> > +	uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
> > +	uint8_t outer_is_atomic, is_atomic;
> > +
> > +	struct vxlan_tcp4_flow_key key;
> > +	uint32_t cur_idx, prev_idx, item_idx;
> > +	uint32_t i, max_flow_num;
> > +	int cmp;
> > +	uint16_t hdr_len;
> > +
> > +	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
> > +	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
> > +			pkt->outer_l2_len);
> > +	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
> > +			pkt->outer_l3_len);
> > +	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
> > +			sizeof(struct udp_hdr));
> > +	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
> > +			sizeof(struct vxlan_hdr));
> > +	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
> > +	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> > +
> > +	/*
> > +	 * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> > +	 * ECE or CWR set.
> > +	 */
> > +	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
> > +		return -1;
> > +
> > +	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
> > +		pkt->l3_len + pkt->l4_len;
> > +	/*
> > +	 * Don't process the packet whose payload length is less than or
> > +	 * equal to 0.
> > +	 */
> > +	tcp_dl = pkt->pkt_len - hdr_len;
> > +	if (tcp_dl <= 0)
> > +		return -1;
> > +
> > +	/*
> > +	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
> > +	 * whose DF bit is 1, IPv4 ID is ignored.
> > +	 */
> > +	frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
> > +	outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) ==
> > IPV4_HDR_DF_FLAG;
> > +	outer_ip_id = outer_is_atomic ? 0 :
> > +		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
> > +	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
> > +	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
> > +	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
> > +
> > +	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> > +
> > +	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
> > +	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
> > +	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
> > +	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
> > +	key.inner_key.recv_ack = tcp_hdr->recv_ack;
> > +	key.inner_key.src_port = tcp_hdr->src_port;
> > +	key.inner_key.dst_port = tcp_hdr->dst_port;
> > +
> > +	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
> > +	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
> > +	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
> > +	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
> > +	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
> > +	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
> > +	key.outer_src_port = udp_hdr->src_port;
> > +	key.outer_dst_port = udp_hdr->dst_port;
> > +
> > +	/* Search for a matched flow. */
> > +	max_flow_num = tbl->max_flow_num;
> > +	for (i = 0; i < max_flow_num; i++) {
> > +		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX &&
> > +				is_same_vxlan_tcp4_flow(tbl->flows[i].key,
> > +					key))
> > +			break;
> > +	}
> > +
> > +	/*
> > +	 * Can't find a matched flow. Insert a new flow and store the
> > +	 * packet into the flow.
> > +	 */
> > +	if (i == tbl->max_flow_num) {
> > +		item_idx = insert_new_item(tbl, pkt, start_time,
> > +				INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
> > +				ip_id, outer_is_atomic, is_atomic);
> > +		if (item_idx == INVALID_ARRAY_INDEX)
> > +			return -1;
> > +		if (insert_new_flow(tbl, &key, item_idx) ==
> > +				INVALID_ARRAY_INDEX) {
> > +			/*
> > +			 * Fail to insert a new flow, so
> > +			 * delete the inserted packet.
> > +			 */
> > +			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
> > +			return -1;
> > +		}
> > +		return 0;
> > +	}
> > +
> > +	/* Check all packets in the flow and try to find a neighbor. */
> > +	cur_idx = tbl->flows[i].start_index;
> > +	prev_idx = cur_idx;
> > +	do {
> > +		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> > +				sent_seq, outer_ip_id, ip_id, pkt->l4_len,
> > +				tcp_dl, outer_is_atomic, is_atomic);
> > +		if (cmp) {
> > +			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
> > +						pkt, cmp, sent_seq,
> > +						outer_ip_id, ip_id))
> > +				return 1;
> > +			/*
> > +			 * Can't merge two packets, as the packet
> > +			 * length will be greater than the max value.
> > +			 * Insert the packet into the flow.
> > +			 */
> > +			if (insert_new_item(tbl, pkt, start_time, prev_idx,
> > +						sent_seq, outer_ip_id,
> > +						ip_id, outer_is_atomic,
> > +						is_atomic) ==
> > +					INVALID_ARRAY_INDEX)
> > +				return -1;
> > +			return 0;
> > +		}
> > +		prev_idx = cur_idx;
> > +		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
> > +	} while (cur_idx != INVALID_ARRAY_INDEX);
> > +
> > +	/* Can't find neighbor. Insert the packet into the flow. */
> > +	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> > +				outer_ip_id, ip_id, outer_is_atomic,
> > +				is_atomic) == INVALID_ARRAY_INDEX)
> > +		return -1;
> > +
> > +	return 0;
> > +}
> > +
> > +uint16_t
> > +gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
> > +		uint64_t flush_timestamp,
> > +		struct rte_mbuf **out,
> > +		uint16_t nb_out)
> > +{
> > +	uint16_t k = 0;
> > +	uint32_t i, j;
> > +	uint32_t max_flow_num = tbl->max_flow_num;
> > +
> > +	for (i = 0; i < max_flow_num; i++) {
> > +		if (unlikely(tbl->flow_num == 0))
> > +			return k;
> > +
> > +		j = tbl->flows[i].start_index;
> > +		while (j != INVALID_ARRAY_INDEX) {
> > +			if (tbl->items[j].inner_item.start_time <=
> > +					flush_timestamp) {
> > +				out[k++] = tbl->items[j].inner_item.firstseg;
> > +				if (tbl->items[j].inner_item.nb_merged > 1)
> > +					update_vxlan_header(&(tbl->items[j]));
> > +				/*
> > +				 * Delete the item and get the next packet
> > +				 * index.
> > +				 */
> > +				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
> > +				tbl->flows[i].start_index = j;
> > +				if (j == INVALID_ARRAY_INDEX)
> > +					tbl->flow_num--;
> > +
> > +				if (unlikely(k == nb_out))
> > +					return k;
> > +			} else
> > +				/*
> > +				 * The left packets in the flow won't be
> > +				 * timeout. Go to check other flows.
> > +				 */
> > +				break;
> > +		}
> > +	}
> > +	return k;
> > +}
> > +
> > +uint32_t
> > +gro_vxlan_tcp4_tbl_pkt_count(void *tbl) {
> > +	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
> > +
> > +	if (gro_tbl)
> > +		return gro_tbl->item_num;
> > +
> > +	return 0;
> > +}
> > diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
> > new file mode 100644 index 0000000..66baf73
> > --- /dev/null
> > +++ b/lib/librte_gro/gro_vxlan_tcp4.h
> > @@ -0,0 +1,184 @@
> > +/*-
> > + *   BSD LICENSE
> > + *
> > + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copyright
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of Intel Corporation nor the names of its
> > + *       contributors may be used to endorse or promote products
> > derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> > CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> > NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> > FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> > COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> > INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> > BUT NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> > LOSS OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> > AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> > TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> > OF THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> > + */
> > +
> > +#ifndef _GRO_VXLAN_TCP4_H_
> > +#define _GRO_VXLAN_TCP4_H_
> > +
> > +#include "gro_tcp4.h"
> > +
> > +#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> > +
> > +/* Header fields representing a VxLAN flow */ struct
> > +vxlan_tcp4_flow_key {
> > +	struct tcp4_flow_key inner_key;
> > +	struct vxlan_hdr vxlan_hdr;
> > +
> > +	struct ether_addr outer_eth_saddr;
> > +	struct ether_addr outer_eth_daddr;
> > +
> > +	uint32_t outer_ip_src_addr;
> > +	uint32_t outer_ip_dst_addr;
> > +
> > +	/* Outer UDP ports */
> > +	uint16_t outer_src_port;
> > +	uint16_t outer_dst_port;
> > +
> > +};
> > +
> > +struct gro_vxlan_tcp4_flow {
> > +	struct vxlan_tcp4_flow_key key;
> > +	/*
> > +	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
> > +	 * indicates an empty flow.
> > +	 */
> > +	uint32_t start_index;
> > +};
> > +
> > +struct gro_vxlan_tcp4_item {
> > +	struct gro_tcp4_item inner_item;
> > +	/* IPv4 ID in the outer IPv4 header */
> > +	uint16_t outer_ip_id;
> > +	/* Indicate if outer IPv4 ID can be ignored */
> > +	uint8_t outer_is_atomic;
> > +};
> > +
> > +/*
> > + * VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet)
> > + * reassembly table structure
> > + */
> > +struct gro_vxlan_tcp4_tbl {
> > +	/* item array */
> > +	struct gro_vxlan_tcp4_item *items;
> > +	/* flow array */
> > +	struct gro_vxlan_tcp4_flow *flows;
> > +	/* current item number */
> > +	uint32_t item_num;
> > +	/* current flow number */
> > +	uint32_t flow_num;
> > +	/* the maximum item number */
> > +	uint32_t max_item_num;
> > +	/* the maximum flow number */
> > +	uint32_t max_flow_num;
> > +};
> > +
> > +/**
> > + * This function creates a VxLAN reassembly table for VxLAN packets
> > + * which have an outer IPv4 header and an inner TCP/IPv4 packet.
> > + *
> > + * @param socket_id
> > + *  Socket index for allocating the table
> > + * @param max_flow_num
> > + *  The maximum number of flows in the table
> > + * @param max_item_per_flow
> > + *  The maximum number of packets per flow
> > + *
> > + * @return
> > + *  - Return the table pointer on success.
> > + *  - Return NULL on failure.
> > + */
> > +void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
> > +		uint16_t max_flow_num,
> > +		uint16_t max_item_per_flow);
> > +
> > +/**
> > + * This function destroys a VxLAN reassembly table.
> > + *
> > + * @param tbl
> > + *  Pointer pointing to the VxLAN reassembly table  */ void
> > +gro_vxlan_tcp4_tbl_destroy(void *tbl);
> > +
> > +/**
> > + * This function merges a VxLAN packet which has an outer IPv4 header
> > +and
> > + * an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP
> > + * header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which
> > + * doesn't have payload.
> > + *
> > + * This function doesn't check if the packet has correct checksums and
> > + * doesn't re-calculate checksums for the merged packet. Additionally,
> > + * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
> > +when
> > + * IP fragmentation is possible (i.e., DF==0). It returns the packet,
> > +if
> > + * the packet has invalid parameters (e.g. SYN bit is set) or there is
> > +no
> > + * available space in the table.
> > + *
> > + * @param pkt
> > + *  Packet to reassemble
> > + * @param tbl
> > + *  Pointer pointing to the VxLAN reassembly table
> > + * @start_time
> > + *  The time when the packet is inserted into the table
> > + *
> > + * @return
> > + *  - Return a positive value if the packet is merged.
> > + *  - Return zero if the packet isn't merged but stored in the table.
> > + *  - Return a negative value for invalid parameters or no available
> > + *    space in the table.
> > + */
> > +int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> > +		struct gro_vxlan_tcp4_tbl *tbl,
> > +		uint64_t start_time);
> > +
> > +/**
> > + * This function flushes timeout packets in the VxLAN reassembly table,
> > + * and without updating checksums.
> > + *
> > + * @param tbl
> > + *  Pointer pointing to a VxLAN GRO table
> > + * @param flush_timestamp
> > + *  This function flushes packets which are inserted into the table
> > + *  before or at the flush_timestamp.
> > + * @param out
> > + *  Pointer array used to keep flushed packets
> > + * @param nb_out
> > + *  The element number in 'out'. It also determines the maximum number
> > +of
> > + *  packets that can be flushed finally.
> > + *
> > + * @return
> > + *  The number of flushed packets
> > + */
> > +uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
> > +		uint64_t flush_timestamp,
> > +		struct rte_mbuf **out,
> > +		uint16_t nb_out);
> > +
> > +/**
> > + * This function returns the number of the packets in a VxLAN
> > + * reassembly table.
> > + *
> > + * @param tbl
> > + *  Pointer pointing to the VxLAN reassembly table
> > + *
> > + * @return
> > + *  The number of packets in the table
> > + */
> > +uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl); #endif
> > diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c index
> > b3931a8..5a26893 100644
> > --- a/lib/librte_gro/rte_gro.c
> > +++ b/lib/librte_gro/rte_gro.c
> > @@ -37,6 +37,7 @@
> > 
> >  #include "rte_gro.h"
> >  #include "gro_tcp4.h"
> > +#include "gro_vxlan_tcp4.h"
> > 
> >  typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
> >  		uint16_t max_flow_num,
> > @@ -45,15 +46,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
> > typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
> > 
> >  static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
> > -		gro_tcp4_tbl_create, NULL};
> > +		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
> >  static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
> > -			gro_tcp4_tbl_destroy, NULL};
> > +			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
> > +			NULL};
> >  static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM]
> > = {
> > -			gro_tcp4_tbl_pkt_count, NULL};
> > +			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
> > +			NULL};
> > 
> >  #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
> >  		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
> > 
> > +#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype)
> > && \
> > +		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
> > +		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
> > +		 RTE_PTYPE_TUNNEL_VXLAN) && \
> > +		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
> > +		  RTE_PTYPE_INNER_L4_TCP) && \
> > +		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
> > +		    (RTE_PTYPE_INNER_L3_IPV4 | \
> > +		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
> > +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
> > +
> >  /*
> >   * GRO context structure. It keeps the table structures, which are
> >   * used to merge packets, for different GRO types. Before using @@ -137,12
> > +151,20 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
> >  	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> >  	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] =
> > {{0} };
> > 
> > +	/* Allocate a reassembly table for VXLAN GRO */
> > +	struct gro_vxlan_tcp4_tbl vxlan_tbl;
> > +	struct gro_vxlan_tcp4_flow
> > vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> > +	struct gro_vxlan_tcp4_item
> > vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
> > +		{{0}, 0, 0} };
> > +
> >  	struct rte_mbuf *unprocess_pkts[nb_pkts];
> >  	uint32_t item_num;
> >  	int32_t ret;
> >  	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> > +	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0;
> > 
> > -	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
> > +	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> > +					RTE_GRO_TCP_IPV4)) == 0))
> >  		return nb_pkts;
> > 
> >  	/* Get the maximum number of packets */ @@ -150,22 +172,47 @@
> > rte_gro_reassemble_burst(struct rte_mbuf **pkts,
> >  				param->max_item_per_flow));
> >  	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
> > 
> > -	for (i = 0; i < item_num; i++)
> > -		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> > +	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> > +		for (i = 0; i < item_num; i++)
> > +			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
> > +
> > +		vxlan_tbl.flows = vxlan_flows;
> > +		vxlan_tbl.items = vxlan_items;
> > +		vxlan_tbl.flow_num = 0;
> > +		vxlan_tbl.item_num = 0;
> > +		vxlan_tbl.max_flow_num = item_num;
> > +		vxlan_tbl.max_item_num = item_num;
> > +		do_vxlan_gro = 1;
> > +	}
> > 
> > -	tcp_tbl.flows = tcp_flows;
> > -	tcp_tbl.items = tcp_items;
> > -	tcp_tbl.flow_num = 0;
> > -	tcp_tbl.item_num = 0;
> > -	tcp_tbl.max_flow_num = item_num;
> > -	tcp_tbl.max_item_num = item_num;
> > +	if (param->gro_types & RTE_GRO_TCP_IPV4) {
> > +		for (i = 0; i < item_num; i++)
> > +			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> > +
> > +		tcp_tbl.flows = tcp_flows;
> > +		tcp_tbl.items = tcp_items;
> > +		tcp_tbl.flow_num = 0;
> > +		tcp_tbl.item_num = 0;
> > +		tcp_tbl.max_flow_num = item_num;
> > +		tcp_tbl.max_item_num = item_num;
> > +		do_tcp4_gro = 1;
> > +	}
> > 
> >  	for (i = 0; i < nb_pkts; i++) {
> > -		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> > -			/*
> > -			 * The timestamp is ignored, since all packets
> > -			 * will be flushed from the tables.
> > -			 */
> > +		/*
> > +		 * The timestamp is ignored, since all packets
> > +		 * will be flushed from the tables.
> > +		 */
> > +		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> > +				do_vxlan_gro) {
> > +			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0);
> > +			if (ret > 0)
> > +				/* Merge successfully */
> > +				nb_after_gro--;
> > +			else if (ret < 0)
> > +				unprocess_pkts[unprocess_num++] = pkts[i];
> > +		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> > +				do_tcp4_gro) {
> >  			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
> >  			if (ret > 0)
> >  				/* Merge successfully */
> > @@ -177,8 +224,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
> >  	}
> > 
> >  	if (nb_after_gro < nb_pkts) {
> > +		i = 0;
> >  		/* Flush all packets from the tables */
> > -		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
> > +		if (do_vxlan_gro) {
> > +			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
> > +					0, pkts, nb_pkts);
> > +		}
> > +		if (do_tcp4_gro) {
> > +			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
> > +					&pkts[i], nb_pkts - i);
> > +		}
> >  		/* Copy unprocessed packets */
> >  		if (unprocess_num > 0) {
> >  			memcpy(&pkts[i], unprocess_pkts,
> > @@ -197,18 +252,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,  {
> >  	struct rte_mbuf *unprocess_pkts[nb_pkts];
> >  	struct gro_ctx *gro_ctx = ctx;
> > -	void *tcp_tbl;
> > +	void *tcp_tbl, *vxlan_tbl;
> >  	uint64_t current_time;
> >  	uint16_t i, unprocess_num = 0;
> > +	uint8_t do_tcp4_gro, do_vxlan_gro;
> > 
> > -	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
> > +	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> > +					RTE_GRO_TCP_IPV4)) == 0))
> >  		return nb_pkts;
> > 
> >  	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
> > +	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> > +
> > +	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
> > +		RTE_GRO_TCP_IPV4;
> > +	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4)
> > ==
> > +		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
> > +
> >  	current_time = rte_rdtsc();
> > 
> >  	for (i = 0; i < nb_pkts; i++) {
> > -		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> > +		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> > +				do_vxlan_gro) {
> > +			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
> > +						current_time) < 0)
> > +				unprocess_pkts[unprocess_num++] = pkts[i];
> > +		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> > +				do_tcp4_gro) {
> >  			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
> >  						current_time) < 0)
> >  				unprocess_pkts[unprocess_num++] = pkts[i]; @@ -232,18
> > +302,27 @@ rte_gro_timeout_flush(void *ctx,  {
> >  	struct gro_ctx *gro_ctx = ctx;
> >  	uint64_t flush_timestamp;
> > +	uint16_t num = 0;
> > 
> >  	gro_types = gro_types & gro_ctx->gro_types;
> >  	flush_timestamp = rte_rdtsc() - timeout_cycles;
> > 
> > -	if (gro_types & RTE_GRO_TCP_IPV4) {
> > -		return gro_tcp4_tbl_timeout_flush(
> > +	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> > +		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
> > +				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
> > +				flush_timestamp, out, max_nb_out);
> > +		max_nb_out -= num;
> > +	}
> > +
> > +	/* If no available space in 'out', stop flushing. */
> > +	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
> > +		num += gro_tcp4_tbl_timeout_flush(
> >  				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
> >  				flush_timestamp,
> > -				out, max_nb_out);
> > +				&out[num], max_nb_out);
> >  	}
> > 
> > -	return 0;
> > +	return num;
> >  }
> > 
> >  uint64_t
> > diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h index
> > 36a1e60..5ed72d7 100644
> > --- a/lib/librte_gro/rte_gro.h
> > +++ b/lib/librte_gro/rte_gro.h
> > @@ -51,12 +51,15 @@ extern "C" {
> >   */
> >  #define RTE_GRO_TYPE_MAX_NUM 64
> >  /**< the max number of supported GRO types */ -#define
> > RTE_GRO_TYPE_SUPPORT_NUM 1
> > +#define RTE_GRO_TYPE_SUPPORT_NUM 2
> >  /**< the number of currently supported GRO types */
> > 
> >  #define RTE_GRO_TCP_IPV4_INDEX 0
> >  #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)  /**<
> > TCP/IPv4 GRO flag */
> > +#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1 #define
> > +RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL <<
> > RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
> > +/**< VxLAN GRO flag. */
> > 
> >  /**
> >   * Structure used to create GRO context objects or used to pass
> > --
> > 2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 0/2] Support VxLAN GRO
  2017-12-22  7:25   ` [PATCH v3 0/2] Support " Jiayu Hu
  2017-12-22  7:25     ` [PATCH v3 1/2] gro: code cleanup Jiayu Hu
  2017-12-22  7:25     ` [PATCH v3 2/2] gro: support VxLAN GRO Jiayu Hu
@ 2017-12-29  3:52     ` Chen, Junjie J
  2018-01-05  6:12     ` [PATCH v4 " Jiayu Hu
  3 siblings, 0 replies; 31+ messages in thread
From: Chen, Junjie J @ 2017-12-29  3:52 UTC (permalink / raw)
  To: Hu, Jiayu, dev
  Cc: Tan, Jianfeng, Ananyev, Konstantin, stephen, Yigit, Ferruh, Yao, Lei A

> -----Original Message-----
> From: Hu, Jiayu
> Sent: Friday, December 22, 2017 3:26 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Chen, Junjie J
> <junjie.j.chen@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org; Yigit,
> Ferruh <ferruh.yigit@intel.com>; Yao, Lei A <lei.a.yao@intel.com>; Hu, Jiayu
> <jiayu.hu@intel.com>
> Subject: [PATCH v3 0/2] Support VxLAN GRO
> 
> VxLAN is one of the most widely used tunneled protocols. Providing GRO
> support for VxLAN-encapsulated packets can benefit many per-packet based
> applications, like Open vSwitch.
> 
> This patchset is to support VxLAN GRO. The first patch cleans up current gro
> codes for the sake of supporting tunneled GRO. The second patch supports
> GRO on the VxLAN packets which have an outer IPv4 header and an inner
> TCP/IPv4 packet.
> 
> Change log
> ===========
> v3:
> - remove needless check
> - add "likely()" and "unlikely()" to optimize branch prediction
> - fix a bug in merge_two_tcp4_packets(): for VxLAN packets, check if
>   the outer IPv4 packet length is less than or equal to UINT16_MAX,
>   rather than the inner IPv4 packet length.
> - fix a bug in rte_gro.h: change RTE_GRO_TYPE_SUPPORT_NUM to 2
> - Avoid inserting timestamp in rte_gro_reassemble_burst(), since all
>   packets in the tables will be flushed.
> - fix typos
> v2:
> - comply RFC 6848 to process IP ID fields. Specifically, we require the
>   IP ID fields of neighbor packets whose DF bit is 0 to be increased by
>   1. We don't check IP ID for the packets whose DF bit is 1.
>   Additionally, packets whose DF bits are different cannot be merged.
> - update the programmer guide and function comments
> 
> Jiayu Hu (2):
>   gro: code cleanup
>   gro: support VxLAN GRO
> 
>  .../prog_guide/generic_receive_offload_lib.rst     | 269 ++++++-----
>  doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231
> bytes
>  lib/librte_gro/Makefile                            |   1 +
>  lib/librte_gro/gro_tcp4.c                          | 330 +++++--------
>  lib/librte_gro/gro_tcp4.h                          | 253 +++++++---
>  lib/librte_gro/gro_vxlan_tcp4.c                    | 515
> +++++++++++++++++++++
>  lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
>  lib/librte_gro/rte_gro.c                           | 199 +++++---
>  lib/librte_gro/rte_gro.h                           |  97 ++--
>  9 files changed, 1337 insertions(+), 511 deletions(-)  create mode 100644
> doc/guides/prog_guide/img/gro-key-algorithm.png
>  create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c  create mode 100644
> lib/librte_gro/gro_vxlan_tcp4.h
> 
> --
> 2.7.4

Reviewed-by: Junjie Chen<junjie.j.chen@intel.com>

Thanks

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/2] gro: code cleanup
  2017-12-22  7:25     ` [PATCH v3 1/2] gro: code cleanup Jiayu Hu
@ 2017-12-29  3:53       ` Chen, Junjie J
  2018-01-02 11:26       ` Bruce Richardson
  1 sibling, 0 replies; 31+ messages in thread
From: Chen, Junjie J @ 2017-12-29  3:53 UTC (permalink / raw)
  To: Hu, Jiayu, dev
  Cc: Tan, Jianfeng, Ananyev, Konstantin, stephen, Yigit, Ferruh, Yao, Lei A


> -----Original Message-----
> From: Hu, Jiayu
> Sent: Friday, December 22, 2017 3:26 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Chen, Junjie J
> <junjie.j.chen@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org; Yigit,
> Ferruh <ferruh.yigit@intel.com>; Yao, Lei A <lei.a.yao@intel.com>; Hu, Jiayu
> <jiayu.hu@intel.com>
> Subject: [PATCH v3 1/2] gro: code cleanup
> 
> - Remove needless check and variants
> - For better understanding, update the programmer guide and rename
>   internal functions and variants
> - For supporting tunneled gro, move common internal functions from
>   gro_tcp4.c to gro_tcp4.h
> - Comply RFC 6864 to process the IPv4 ID field
> 
> Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> ---
>  .../prog_guide/generic_receive_offload_lib.rst     | 246 ++++++++-------
>  doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231
> bytes
>  lib/librte_gro/gro_tcp4.c                          | 330
> +++++++--------------
>  lib/librte_gro/gro_tcp4.h                          | 253
> +++++++++++-----
>  lib/librte_gro/rte_gro.c                           |  98 +++---
>  lib/librte_gro/rte_gro.h                           |  92 +++---
>  6 files changed, 518 insertions(+), 501 deletions(-)
>  create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.png
> 
> diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> index 22e50ec..c2d7a41 100644
> --- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> +++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> @@ -32,128 +32,162 @@ Generic Receive Offload Library
>  ===============================
> 
>  Generic Receive Offload (GRO) is a widely used SW-based offloading
> -technique to reduce per-packet processing overhead. It gains performance
> -by reassembling small packets into large ones. To enable more flexibility
> -to applications, DPDK implements GRO as a standalone library. Applications
> -explicitly use the GRO library to merge small packets into large ones.
> -
> -The GRO library assumes all input packets have correct checksums. In
> -addition, the GRO library doesn't re-calculate checksums for merged
> -packets. If input packets are IP fragmented, the GRO library assumes
> -they are complete packets (i.e. with L4 headers).
> -
> -Currently, the GRO library implements TCP/IPv4 packet reassembly.
> -
> -Reassembly Modes
> -----------------
> -
> -The GRO library provides two reassembly modes: lightweight and
> -heavyweight mode. If applications want to merge packets in a simple way,
> -they can use the lightweight mode API. If applications want more
> -fine-grained controls, they can choose the heavyweight mode API.
> -
> -Lightweight Mode
> -~~~~~~~~~~~~~~~~
> -
> -The ``rte_gro_reassemble_burst()`` function is used for reassembly in
> -lightweight mode. It tries to merge N input packets at a time, where
> -N should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
> -
> -In each invocation, ``rte_gro_reassemble_burst()`` allocates temporary
> -reassembly tables for the desired GRO types. Note that the reassembly
> -table is a table structure used to reassemble packets and different GRO
> -types (e.g. TCP/IPv4 GRO and TCP/IPv6 GRO) have different reassembly table
> -structures. The ``rte_gro_reassemble_burst()`` function uses the reassembly
> -tables to merge the N input packets.
> -
> -For applications, performing GRO in lightweight mode is simple. They
> -just need to invoke ``rte_gro_reassemble_burst()``. Applications can get
> -GROed packets as soon as ``rte_gro_reassemble_burst()`` returns.
> -
> -Heavyweight Mode
> -~~~~~~~~~~~~~~~~
> -
> -The ``rte_gro_reassemble()`` function is used for reassembly in heavyweight
> -mode. Compared with the lightweight mode, performing GRO in heavyweight
> mode
> -is relatively complicated.
> -
> -Before performing GRO, applications need to create a GRO context object
> -by calling ``rte_gro_ctx_create()``. A GRO context object holds the
> -reassembly tables of desired GRO types. Note that all update/lookup
> -operations on the context object are not thread safe. So if different
> -processes or threads want to access the same context object simultaneously,
> -some external syncing mechanisms must be used.
> -
> -Once the GRO context is created, applications can then use the
> -``rte_gro_reassemble()`` function to merge packets. In each invocation,
> -``rte_gro_reassemble()`` tries to merge input packets with the packets
> -in the reassembly tables. If an input packet is an unsupported GRO type,
> -or other errors happen (e.g. SYN bit is set), ``rte_gro_reassemble()``
> -returns the packet to applications. Otherwise, the input packet is either
> -merged or inserted into a reassembly table.
> -
> -When applications want to get GRO processed packets, they need to use
> -``rte_gro_timeout_flush()`` to flush them from the tables manually.
> +technique to reduce per-packet processing overheads. By reassembling
> +small packets into larger ones, GRO enables applications to process
> +fewer large packets directly, thus reducing the number of packets to
> +be processed. To benefit DPDK-based applications, like Open vSwitch,
> +DPDK also provides own GRO implementation. In DPDK, GRO is implemented
> +as a standalone library. Applications explicitly use the GRO library to
> +reassemble packets.
> +
> +Overview
> +--------
> +
> +In the GRO library, there are many GRO types which are defined by packet
> +types. One GRO type is in charge of process one kind of packets. For
> +example, TCP/IPv4 GRO processes TCP/IPv4 packets.
> +
> +Each GRO type has a reassembly function, which defines own algorithm and
> +table structure to reassemble packets. We assign input packets to the
> +corresponding GRO functions by MBUF->packet_type.
> +
> +The GRO library doesn't check if input packets have correct checksums and
> +doesn't re-calculate checksums for merged packets. The GRO library
> +assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
> +fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
> +6864 to process the IPv4 ID field.
> 
> -TCP/IPv4 GRO
> -------------
> +Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
> +
> +Two Sets of API
> +---------------
> +
> +For different usage scenarios, the GRO library provides two sets of API.
> +The one is called the lightweight mode API, which enables applications to
> +merge a small number of packets rapidly; the other is called the
> +heavyweight mode API, which provides fine-grained controls to
> +applications and supports to merge a large number of packets.
> +
> +Lightweight Mode API
> +~~~~~~~~~~~~~~~~~~~~
> +
> +The lightweight mode only has one function ``rte_gro_reassemble_burst()``,
> +which process N packets at a time. Using the lightweight mode API to
> +merge packets is very simple. Calling ``rte_gro_reassemble_burst()`` is
> +enough. The GROed packets are returned to applications as soon as it
> +finishes.
> +
> +In ``rte_gro_reassemble_burst()``, table structures of different GRO
> +types are allocated in the stack. This design simplifies applications'
> +operations. However, limited by the stack size, the maximum number of
> +packets that ``rte_gro_reassemble_burst()`` can process in an invocation
> +should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
> +
> +Heavyweight Mode API
> +~~~~~~~~~~~~~~~~~~~~
> +
> +Compared with the lightweight mode, using the heavyweight mode API is
> +relatively complex. Firstly, applications need to create a GRO context
> +by ``rte_gro_ctx_create()``. ``rte_gro_ctx_create()`` allocates tables
> +structures in the heap and stores their pointers in the GRO context.
> +Secondly, applications use ``rte_gro_reassemble()`` to merge packets.
> +If input packets have invalid parameters, ``rte_gro_reassemble()``
> +returns them to applications. For example, packets of unsupported GRO
> +types or TCP SYN packets are returned. Otherwise, the input packets are
> +either merged with the existed packets in the tables or inserted into the
> +tables. Finally, applications use ``rte_gro_timeout_flush()`` to flush
> +packets from the tables, when they want to get the GROed packets.
> +
> +Note that all update/lookup operations on the GRO context are not thread
> +safe. So if different processes or threads want to access the same
> +context object simultaneously, some external syncing mechanisms must be
> +used.
> +
> +Reassembly Algorithm
> +--------------------
> +
> +The reassembly algorithm is used for reassembling packets. In the GRO
> +library, different GRO types can use different algorithms. In this
> +section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
> 
> -TCP/IPv4 GRO supports merging small TCP/IPv4 packets into large ones,
> -using a table structure called the TCP/IPv4 reassembly table.
> +Challenges
> +~~~~~~~~~~
> 
> -TCP/IPv4 Reassembly Table
> -~~~~~~~~~~~~~~~~~~~~~~~~~
> +The reassembly algorithm determines the efficiency of GRO. There are two
> +challenges in the algorithm design:
> 
> -A TCP/IPv4 reassembly table includes a "key" array and an "item" array.
> -The key array keeps the criteria to merge packets and the item array
> -keeps the packet information.
> +- a high cost algorithm/implementation would cause packet dropping in a
> +  high speed network.
> 
> -Each key in the key array points to an item group, which consists of
> -packets which have the same criteria values but can't be merged. A key
> -in the key array includes two parts:
> +- packet reordering makes it hard to merge packets. For example, Linux
> +  GRO fails to merge packets when encounters packet reordering.
> 
> -* ``criteria``: the criteria to merge packets. If two packets can be
> -  merged, they must have the same criteria values.
> +The above two challenges require our algorithm is:
> 
> -* ``start_index``: the item array index of the first packet in the item
> -  group.
> +- lightweight enough to scale fast networking speed
> 
> -Each element in the item array keeps the information of a packet. An item
> -in the item array mainly includes three parts:
> +- capable of handling packet reordering
> 
> -* ``firstseg``: the mbuf address of the first segment of the packet.
> +In DPDK GRO, we use a key-based algorithm to address the two challenges.
> 
> -* ``lastseg``: the mbuf address of the last segment of the packet.
> +Key-based Reassembly Algorithm
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +:numref:`figure_gro-key-algorithm` illustrates the procedure of the
> +key-based algorithm. Packets are classified into "flows" by some header
> +fields (we call them as "key"). To process an input packet, the algorithm
> +searches for a matched "flow" (i.e., the same value of key) for the
> +packet first, then checks all packets in the "flow" and tries to find a
> +"neighbor" for it. If find a "neighbor", merge the two packets together.
> +If can't find a "neighbor", store the packet into its "flow". If can't
> +find a matched "flow", insert a new "flow" and store the packet into the
> +"flow".
> +
> +.. note::
> +        Packets in the same "flow" that can't merge are always caused
> +        by packet reordering.
> +
> +The key-based algorithm has two characters:
> +
> +- classifying packets into "flows" to accelerate packet aggregation is
> +  simple (address challenge 1).
> +
> +- storing out-of-order packets makes it possible to merge later (address
> +  challenge 2).
> +
> +.. _figure_gro-key-algorithm:
> +
> +.. figure:: img/gro-key-algorithm.*
> +   :align: center
> +
> +   Key-based Reassembly Algorithm
> +
> +TCP/IPv4 GRO
> +------------
> 
> -* ``next_pkt_index``: the item array index of the next packet in the same
> -  item group. TCP/IPv4 GRO uses ``next_pkt_index`` to chain the packets
> -  that have the same criteria value but can't be merged together.
> +The table structure used by TCP/IPv4 GRO contains two arrays: flow array
> +and item array. The flow array keeps flow information, and the item array
> +keeps packet information.
> 
> -Procedure to Reassemble a Packet
> -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +Header fields used to define a TCP/IPv4 flow include:
> 
> -To reassemble an incoming packet needs three steps:
> +- source and destination: Ethernet and IP address, TCP port
> 
> -#. Check if the packet should be processed. Packets with one of the
> -   following properties aren't processed and are returned immediately:
> +- TCP acknowledge number
> 
> -   * FIN, SYN, RST, URG, PSH, ECE or CWR bit is set.
> +TCP/IPv4 packets whose FIN, SYN, RST, URG, PSH, ECE or CWR bit is set
> +won't be processed.
> 
> -   * L4 payload length is 0.
> +Header fields deciding if two packets are neighbors include:
> 
> -#.  Traverse the key array to find a key which has the same criteria
> -    value with the incoming packet. If found, go to the next step.
> -    Otherwise, insert a new key and a new item for the packet.
> +- TCP sequence number
> 
> -#. Locate the first packet in the item group via ``start_index``. Then
> -   traverse all packets in the item group via ``next_pkt_index``. If a
> -   packet is found which can be merged with the incoming one, merge them
> -   together. If one isn't found, insert the packet into this item group.
> -   Note that to merge two packets is to link them together via mbuf's
> -   ``next`` field.
> +- IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
> +  be increased by 1.
> 
> -When packets are flushed from the reassembly table, TCP/IPv4 GRO updates
> -packet header fields for the merged packets. Note that before reassembling
> -the packet, TCP/IPv4 GRO doesn't check if the checksums of packets are
> -correct. Also, TCP/IPv4 GRO doesn't re-calculate checksums for merged
> -packets.
> +.. note::
> +        We comply RFC 6864 to process the IPv4 ID field. Specifically,
> +        we check IPv4 ID fields for the packets whose DF bit is 0 and
> +        ignore IPv4 ID fields for the packets whose DF bit is 1.
> +        Additionally, packets which have different value of DF bit can't
> +        be merged.
> diff --git a/doc/guides/prog_guide/img/gro-key-algorithm.png
> b/doc/guides/prog_guide/img/gro-key-algorithm.png
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..89cf427b3c7c406d7ddfb
> 485d3cc5122b2206c1f
> GIT binary patch
> literal 28231
> zcmd?Qc|4n2`~RD^yTjJ%q@_x!c9)i-F;k)~rGufZA&SsSwWd%*43R{42Q_QADn
> %)3
> zN=mjCF-8XxLy0M-ASDP%QA2`6_}yvu^L?J*bAIQ%&iVWN(N`L|?{%;1TG#bi*IM
> g+
> zM_;ot7Z*D$1_FV^FJCgf0RnC2f<T*aJGKFTi4CaH06sPa-Y~xaDruLU2EP1u@4V
> %C
> z5U4b9=i2S9!1v$pU%D9x0__hH{o6EhS3VR3LJVCtJ#QQ8GE?VQsLBc(IpS6qKP2
> 1t
> zSMeX8B`5j~w2tXDgHMi@75Z84FTcVh#b-=iBwv<Mvh&7YK1|<XK6k#@G?}d5
> bN_<H
> z-yaon{D1tZ74P*}_ThfhYZrE^UTTO$@4c`oVsF{8huHb;7zY9&4|#-9jWpK$(3@7
> i
> zoc4;b)tKk9Xt=nRes8vF^3{NMM5~y><GmMlC}?@!X72+&AMZ`8Ji`@zUf5yjoQ
> nGS
> z<6}iN3!|SuUXYN=m;3o+cJxl0MA4m(I~tx8ng6`QW9)O!@n6mP|KBucu%yaZP{
> r5G
> zR(mf_kLU5Xidwjqa)0Lz<c^>1WuMwXd~ogQuRf$b{D1ht<A9Oi`v>pEu+@bL@2
> t7{
> zjdfZvaRk+t%3i*jqE_sJo^M2ELh2S;R5%O1^+KMv`%ow|9v?ne7lP!nX~;}xzpE_-
> zVOf3ajVdqdBGtIQnS1w?YikeQf>>l{W=CiV|6WSb(H}X@Z4A>a2z&G=U~V?^V7
> ^>@
> zjz@|6ix1Lm{V#}rB+16#f3j=V#@eWSz0RF&XH{G3k*x#0;pqO>y(Kwc<`G-*qV6A
> q
> z>b=i9NwhZ5FZa>KY!c0$Th>8o?oIAixuaZmow!$PzqyGY)3?ZQ=3eeBU)d(BwG*
> pB
> zf7w-@wOEL_IilNwxi(x{Td23YdQ%kh&AJD7WV%@6kBNOheGmJr`sVr_8}&OL
> H7=#s
> zU1X+fZ>T=<{xZUiiCGxXi}hhYeR;g<bn<iY3SMG9HYyvcj|ux@_|D0!UD&Hvr2gH
> +
> z0ZR<`%H@}?uFa7Z<#ji-W(RuwY^+jjBQ1q{P1MVM9h9rS_F6vc`t4r>=WzhI0-l4
> w
> z=H7Wzj)+otS*x6BS=Ty^ayw(HY~n7iy+luVGzcwc+(<hodJKEN@22=;=>sqIi|;Ux
> z*daJK@FVQV+fqflax0M1sAgShdd_<l{Uf3%E4*5{RVyZ;t;d_LGf_4kV9HRgd;Bxl
> zIy`|eF?-%t?qVa21H07E*DMb7)MRv{gm=@^)76Sjiv~-f=fu$F!xwcvxJ5mOEXlW
> ^
> z8qr^kUm#yqOM4Cn6<S{LE3WlZp(*{0;_Amv=W<V`@v~hgPL{d8uq^GWwihN-
> @vLL%
> zd<QEAu4)0-W(S#(7q6z5ybq+Srbi6F32Nrz<oR`fwft{GFxTgD2j%X|jHy^KJwBpQ
> zH%JMy&0N2kWJ$=}+z<hwxeo~$cVX=g8|W*-9}s2D*1N+3GVkhOJ;CMb^yYt?
> ?ZbAr
> z&z(ke5@U?s?vlAHba{pHyre(Ym3KAWXyjy`yN&vE<i`nnq&b{%5%1Ev`k!W?F6
> Z0V
> zo5S%mWbu_fLM9EjOWLzeY3|^Hv)_s2-1Hr<0#lCLmRKO6;V%dp+`o8%bVvM*
> R{<|k
> zS8~vN)oE_RJWO7c$k}S6k{u@xyOsRZhK}2S^K*aYsX`Fyb5HK|U!$?t!00x+^W;
> C-
> z|2p#w*W<uH0$^qRe-;r{s}KfenVlIvJAf92(%v)@aF>5q&Bjj`c&L3Ic+>d%FG;BS
> z{oW%<rC$;>shA<|nDLK1q~F)^@c;Ex58wa4-4LWVJyeZeU6_;r#wlrk`?fY0_1Yh
> w
> z+202~^!;6wNC#Y7W$F9410r%z$22f(lG!zLBthCMs&j3qf6`8cGD47*XqcwP0R@
> kJ
> zW<U1fl`hqA@vEZL1rOKhjGy;412Qj}>s6j<htdiW2`;CqB#UaivBn+=pMwnpipoI
> %
> zs_R{_3A!!-MA#9VEzCl(i!2lT*d>6>zf^j-_rla2(}s(&*UKHL%0_85K5TVZb(s!J
> zW1djE&{AGT_p8SshCY<I)K!mIuet)d&DEJHm{BemG+%%Kp<!j8iYhzVNJH6AckT
> xy
> zZYlhwJu^8!>!tlzx#c#}Ukfe$95aQhf`6NG!@2lgvYhI99d2<l^73?|5wCx;+e_$Z
> z1;sY2thzK3%fv;IrKE0Q<d{(=>Xl=R54$EV*kZW_!yQ4taA~|w3e`ONv&Enww|p0
> =
> zmrvzT9*BJ3nwbikxbD#3yVpUWt7BhZY2pJ}?ZxoXEqe_APKYPe@dg!dB&esOgg
> %$2
> zb;e8<ocxXPgOPv>ei#%MZSec*N!0-pE$qp3KbP$&5GYhrcQ~k<-DxflqkJtbFD|;5
> z2h6YeMUP(bl`=!e#5d{|hZ*xc8WsI*JG6coFMm@wz>4tXmMa1O%etqw1$0`h
> k+iW+
> z&vKxAj+nPre+SQHzLh*j`td~FCud4;u(#)`O;~{oX6*MfAXynDp=qx{R8e0{(pla}
> zNPQr;(R?E;%zq2$K`ZCbDuy+JRP}8sOvr;-)YS{~%M<OVtvPhR(9OF*HsjB%?-X}h
> z3f0#>tG|=kcu7Qa*iZ=XPnzq-d_imMArPqcxk}c`H-lj1NbHn}{GcgKSQcB9k}x06
> z4ih!-?QK?*V{RIX&`~e^>I8M=f5o_)Zvwq(%UYYH5=e_j{G2FV+vU;sC14?aCHXF0
> zka^<{F(IqJce(Fs@q6Au>j#@|wa6|+ztc`D>Jy7OTx6%X%Lw#wwy8h7aeba5&oC
> XB
> zh*A0oIwQ4v-n92{V-H7o)Of$$$6|v`AkdfV-=G$>+(D=-1}T9^QOX2u`8ae)05ld;
> z)F-c6bQ|?Yi<lVbqh9Ssjo(d5?^AhfC~(~g(RI%H4H5&_0hc^^5F)z?L}nJL0K;XE
> zv12OKgGUcFO56gao`oAfAEx&R68~OGsk3{u33MxD={H+2Z@M6B#B|s_3h3A0
> zmd_$
> zYKTRBu5&mr-?0=|3IhFoHf!xiBQmZINKBpUA=08@I{ha&t7#U&<}%**9KuN5qd
> 9;;
> z{jZHH*8(|*!bd+pvFv$WFB+1oZts-dhXbMJ?W=9Nv6Aa`5(N6*zF03PLdMYq6Ib
> _%
> z9CkU5YZ~llL!fN++Og5R2XqVVvcAA^oemXp`}}fd*F>Fn*bg@nbsoPj$LkNEjYQ`6
> z8$s)9tH=m$vL6j7Tfat^W;VI8H-QplDFc|G4dJq@3X4s{<E32D{zzuLZi%SCvP}3g
> zy)e-yIAgo6+)8l4{4Ki&^gtinD{<2T7^de;y&C-5(*lup@IUMlg40pU#KSv*E5<5`
> ziD$THf3WJg^Rz9-+%I_f)j#V4i#NuM=1?rQQpA)1S}*i7uI1y^jhtu0cMK&M^Wz1r
> zODE<9^}G*wnGs$ucbSwq4(^YvUl~&;uwn*_s%$3)o<?kR$~#fF3*U9!M(L5iD;Iy
> A
> z`>;X!+_MD~#QgbCr=yxL<Iy{%b)>)IGD<)7L?nfx6Oe{J)r|^qy7^daDCs*6Vae_P
> zk6X5M#^B<r`M4~{q7=_K{HK;&JJd$V8GlBiYp99-Q;^Mur9%;=hq{lF1RKX3*BG!`
> zoa=&x440dA1=po9F|&2cBi@8LTNF@Sjy!X%_f9XnV>7qnoYyAqMx8%KJvN5Cj
> =i|3
> z>8irLIdTFe7yhd9zD{zOs^0;+55X33)^g}2=5dkyIjtpB-QB30WmP?pr#ie;RMh?3
> zujLIg-Js<b<rQ}ME|Jd0Ts6{Ciwfc0;`EU<pYO@}Yhy(PnS(yC;>BsRb99ji+X;pl
> zUZ?Z7<;z*gq&u!k`6VSjH}BkR!_!(g4cUJDO8TRszRzhD6htaNekcm}XW+RcSdt57
> zs5gUi_nZp(sY87HEw;kvCrGSb`z#J&QCAUG4!NobGacw?9fEcdXQ%GoO6RE%3
> dwF>
> zx7ed)rI!<3#`}y{B{&Gj;>55gM%HGYfoHfQa>Uzg-GJk`ZS9-37ll4Ot61%M?Z|xc
> zkzpgj9^Yh&gvPcO!^@iOQk}y^a`lbXK@051j7hQy5|6>|R|v{cxwMkNpf8a28S?
> |B
> zM+(F=6kW7>b}wHP-j{bySoR;l*uu~Brn(Q!RjSIf4BPodsJfPC{sLuN`*Nxp<(5qF
> zYX*&npunv{zX#JQ@?98lHqChM%MB1QnNiK{bs~DzFT?<TGDDV755C5;y&);1L
> D7oC
> z6h*QSU0Vw~8SI@-m{~>bTRwZY2Mg2GlJu`3Sk5v$$H2qpseC!KhU{m(rUQd-$
> ~u03
> z3k_QeH=Oqgb!<RA-ev0w!~E%x<dTmY?CW>tgp}7Qn@@L++f`=N5(W#GWF5w
> $*-I-`
> zd!$2Hy70K0JBh}=v5LJaDhIu6v~}!Ua^J&8FbSWomCcnbI(<JY7x)=7Y)4PyzYNgF
> z*5zB*Vl^4wcN*ry`YLxk{lu~{)R{k=mI(bSPslqqtK>EaW>!f{y+h9wm24FJNePml
> zAuj8Vo7swI7-%f8)8}j{e;k&Rktry0u{2~{>qnPw&a-~+GZd#(^6V59>G@<B;Ym|4
> znF~HiSZI_vk~|&UfNj)xJohrWWW6YhWYTMmN#Nf3Fe%YJJ^P_`Z*jGzPP~`-kZT
> <H
> zE-dujSVg~_(00`yZA%ak4E-5Vi)Dj(3q1X8CWqkIL~XgibC?4)UNSZe6xXa}Vrhct
> z$R)n5%{d%fbp16atY)%o@;!BHHba}Sh~3wykR{KTrhX86tyyCj7UYDqA{*ExbiwS
> O
> zz?XD9vQ})Z3f0PCm}{B6mBFK-x64$?XTf)do-8btgL4q=N;pG<ZC7kwB?RuHO$&
> +9
> zVI$^x?Wnae9m<f{1Xb{R-}CD?)1YDKF$rN)HpLv%Gap0$?u2`9wS|cJ50_<r%lrY)
> z{(byS;qNXJ`&y(PS>lgCnsMKXG|)GYTX7E1gF|-2t``NI-=dU~ADwGJ(3(f=5}<eO
> zT!&_Zqj2>(b|GZS(G<<zROG0C6y1>Vc+h^U_GqammjUn5bqzM!hAI3Co?Lcdg0
> ~y)
> zu3=xx$4*@_%3p88%m+6nc%U`X7-6pa;J4o%{7_bhm+O?$T<fkx_&eRDFLtp4c
> XH2T
> zJmbBk!oIq#HJ6cL{wj$k7Y_>VD8F_o-c2DvDW!fJm}szWE=0w8HeAhwDM7zE3
> Qt`&
> z;?3)8`3fL|esg|J@r8%=kcOX}{`7?gy~jISepEKvFMh-IWAQ_l4sz8>f4gS+d8KR*
> zX?&FR>Ft}CH9lM=U%$}@GSDp97d3Hr0Rty&nHuC&?kHT6*!W-&sqj5LW!5PfL
> IzL4
> z->n+;hJM|Y_@%_)MbYwEF3c5q>UHl~ZkkNym$ZAyod#O8Dmh~KItLvcyTyPt#
> Phio
> zG~y9%5sPVK#kK@sX%f_Xjc4Lx#Uqe%RmB&OD+X_Z8_=-QdpmRxUa>7veKOor
> `Qbxj
> z>zGQ6Ni(x>9ToTV8Ra~s#Z!Nqz<?7(ljz12gZ+KT7liRhQ?zA#;Q4Wl3h69ELLvse
> zJDyHT<k1B(B9XE>V*LZQ#dc@b0TXD*ktoNR`(gT^#<Farz6TjWI=V}{L?>bQX0otJ
> zDPU)x%v>eRtPd7y7t%0e^PoxU1XyIh!I%z}DEm$Wv-sYd6z9K--ygi@ICI3?aPk3P
> zDVUaRQP;CPql?YLrt{85XiJ*Rhh;`b^8P4n=T^eXCQIUD7RWQ1i(xfmuPPcVCEKp`
> zg%Qv?cUFgxPEO$Nvi1puQ1Cg#l*XOTm1t$_okaC986$3M>Cm`$=GZN(M0g$p
> *?nMf
> z%0b^qE;i$(;Dp<NoP4L0#PW#>1^HGzzl#>F%-q`9NBj~9c|A7tU4aJV>E{`SxnM>
> p
> zUbW9|OM#Wjo7q_F80F#|YFTA%=;lKR#uJB^TSj%HOeUzzURt}@>u*)E#5`@u9
> NgKl
> zV7=6PFDiDL2u=(|R<=>9@H3xR9pN3?W+fq6@D6htQO!{({V&rMEbHnET=4U>
> <sJ?_
> zU@7pQ>Ek41j|Tng1lZ&Co{^EhOs|Nn6vwunIj}1H!v|`TIWc=~lBB|0x~(Y{?-=Xx
> z4Ywp2W;KpQIa?a)=ojM?JNsD%OI24E51G4FXIVs*q<;WMA)16=XO?&+rLj{hW
> VDTJ
> zmdPk@E?3A~kNi2+eU!-ap19>?sE;p;fpm{8DCg$S)a-2yIQp)|FMDnE?X1_mCyz`
> |
> zQxMZ)g?7VasYmwJ*Z68anh?3zN=t#a6O{3Iv7G<ETPgP+R*pwlod}$(Xc_lT9`U4
> M
> zRjcB>PDhR{9u4eTe2avHhBMqQdq)sryzJg{(7bh%ukZgZilGCWGAg{F(ZJBA_@~#
> `
> zhuR?v8HMBqYn8|RnHj8Cm1_0P=!qJFl}#MC&6X^<d;A^7?G;>t(?aPA{1h54ms
> O%K
> z9WOXQVO^^f3OwrT>J@ZzyuQia-`lU)zf_8NR=y9h^-2o#wW(Dro%kuSE^?b!)-C
> W+
> zvBZgv8jtPKp}Mmg)q^)^s=$=H<76F25fKOG<l?@3dgqt%n!wJTI()ofCU6)3K7;p~
> z0gtE1UfXE6F}`jO2fsjP>te`3K92JZbbRFz&l&X`{Tn29Mj*eUAy^#Y4gEgtHdQ4z
> zeK<wZH>+r&(m9(^XEawMUW$GWO=fzlEF1<xm4$GdFR4^}o_JYSesso!+`BX(lB
> WZ+
> zUw}34^3mf{zyGm#eZn&vgs)kb^+32q4xMb^FE+bZZ!WRUB;6W%GWQoPK_T
> QL<dfUE
> z1Ei+i7)F~gM(Xx*fhbI!JZc5JgTNdsKkGE4v-EiqZ6|-R?@&CDn}@uAO-MqCtIq{a
> z@r^njzM&I#i#}6r-&1W_s6JlYVv)LCb;a4lCd?8K5zXb6e+p{MRI{C}5QrbCkD}kh
> z8Ty~*)(}}3qQXmczJ=GjtB4<NFZ80*Z}gp507o;A*J7Ib(@3=x2#{!)exPR9r1x!y
> zbgu&0UZPb(m7(voVee?V>htU(q$<w(z4EteHCR92MWL%XC#k^T)KEeRPb_(qM
> }FWj
> z)JqC*cGby(ymEpo;HojZX!VtKZtuXH&&<6<@w&hDUA0L$esMftHi^@&ryNey{|
> 0u9
> z84^{2Ky?#>pz38!h9&#Hm|awxJmP3AWkT<e3U^$Qm3zPU)=-~v@e`3Tv-pm=
> 5jg@P
> zXKt|LiByXQyZhOai3H~>MI#~cM=)#)lUGu;pNXXvz~zCY!hSz)Av_m!!TWRQ*p$_
> c
> zBztm0SeY0O?IOKj)rNV{COgSg>2Yh<<MNw982*6R3(TgzPnRYN<5tGcA&Y!dvJJ
> t*
> zg^1Y{EbeQ62<?dF8!TX^ZI&RZwUbJ!&^t*aUl^~pE1|qs=`bZ9`k+?pH<VxD`7{0O
> zm@0g-2LpcQx0zhwa69bR<k@3fdZ4PF=3o)ecAgu|^d~8lUS+*nAP17?Z#Au}M
> pk@6
> za$kQqn6SlUEIw;njB?!*dS5v~puAK=RPk-Z)cBrzK^&BuM`|2%9SurmBx5hRy<T-
> 7
> z&+i*4S&<`BN@E+mA9;kmh0FcRc1aJ7$=`?Fn%Kpg-)_0NlLp<&j-vM*ypOn1b1l
> 7Q
> zGtO#|w5~F&M8pzWt{<WOF5_A5-gMDqy43geROxrgs;^-3yC)Q|Z})t6MG5J5m
> UsNz
> zGw>km&@OkzKT~GkuO!pP1=e|olUKE<b(So5_efme<#`8IcKb5q!*u!#dImRm4g
> 3eM
> z><7uq8#>hNpV!j;85uOgA&Cj)y2r#mJS&Yu*wKA-%KiF6?fOze(YPhd*t|}I!D49~
> z^SGNMsaKgme-~B=nee{gZ6&w3A}h8Gm%q&pIj4kE^vfI)i!{jA9>58ewK|bqrjPP
> 2
> z%eP*~433Ds@HK~m_8BM~co6^_bVU}a(y-DKb^>(3Bo8Ntn5*%&UpI{8zq3oVc
> anVp
> zM6GiIw(qiB>s*d<;5%?@CBxr|;jh!qg4~Uadp<C6$ma`WG5p<p>K3`0ye45Wu(J
> 3k
> z2R$>x)7$dekINj%JF6%d(%`?Fxze|Kpe>P?5*=dHlGJvc7AclojK;8hc5(}7RzNJ3
> z3l1;OeVNg;lUuf%T29Rmm4x4vZ=uKJ4rVS+8qUI1gx4B^C*aZ)^M*16MTy$o1
> Wu%8
> z-*@Hh%a=oyUZ!p;c>;~SvkM;t>fcQn_@_Jlzlje6QRPus^VLShiryd6i86qeXS=!D
> z5U>-pA8~ORe2t*BV}dkPwhq})^@VxZ;`%;DLsZK*a`IeKZDzGqX?&3sJMcU7N#g
> Ah
> z-N1Luz>`BMz@qQzdA$zaR!(T?yx4xw_ou)r{in8S`#g;FrRurr^u(0MP+W>%D9pu
> D
> zYan4h<qGibp}qu&ZQ?GNOLTJdBGP?o<>C;;X2QSfLOnF>F<FJ!Y0VLkn?pBQxZA
> ?#
> zwrY18=sf+1j6t7fu0*9udLA)J*6opz<%ovb9T@7{dkw<S0qnQE&63u~if5<pATu$
> K
> z3_xg!e~jM@x|I;Oj|JwQIV2rYY`T(4{MfN2E|ywc#F<EO9a-yD{b$XuVU|JZ`O-U8
> znrqKlglil19^pHPgCd!WK5T=K*&+6E`PC-&6DgN|pP~t1{@4UMV@{4*Ohthn9K
> RQL
> zD`J;HIPZJ4d><?XR#3&N-VC}5kJC#m+78kSAn)K&j;5o!HI}UiWKn<gW@nE7+<@
> pl
> z_?-akG3t<EXkdTJ5e966`YrL};#NzV8fw&^`^?tW{l|U;2xM*r{o6QVeXV@yRBn3Y
> zz09z?!%BYm|4W<Y)xs5KVn_}REj@|34pXmN>S`YfXieX^eM4*UWw&UTC-rzjxL
> }9P
> z9?<>qxP9Lf&!e_dFr5;S#%o`Uhu+p+Z?}ox3i>GPKSVeGw}pQdrM!w5i&oNc^74
> mm
> zdXNVed29<vRv-SG0DcYImVcr|1@yPpPp_QpG)`SGPC<Puz*c_MJRp2}lbY0>p%
> Sqq
> zvs|Bjgzr4Txd8vUSK`;tgSw(Zg;e5_rmqw`d`-R9c-4eazrGz-rE0UPUdu0ldl9hY
> znOTY&Tuc~>PC53U17>o0&98Kf8o^<@lN7!-_O!-%*ZYh_2$PKG6_Zqg;0HLGI4
> PSw
> z^l$U9pHm75G}&~>U(ROFiB!IBx5nI{{i_85cI~qaJC<uT+{5Bk%ct_c{OjucD9O!1
> z?c)W(t3|F2thB<h_Kf?+cr8jxQA1PJ{Xym|p9NVy4%s70zAK@^$+k*){id=w-`bNq
> z2|qaZd|O|rIaJ>)8pLCL&gP%lMNRrAAE(fLoUmCNGnNgOujICtT2l(_$pX!uYHZT
> ~
> zuV!~AJbcJ%_H0k~c}vuOGi$zEgS0aFO-jAxSpGzX#l*uCi`nUQhQWZc_{+pSWi|h
> k
> zfB~|F&!*=F)hQWOqgZ>-gk+oO=YgYTlwmFvGi#2P6HS1>J$t@@mzVR)rVhFkF
> O1U^
> zq)XPs;vtkipRp$=`VKBj7>q8h^iQMSy*pUo@YM2ttV3qUSq@718NC>_G=7vtXq
> xF-
> z_kwaMMSSPv&d<W;^JMtJN3QRba+ii)+J9RrrNVO{<%9C_^F(qRrB3-v#MxU*1D
> -8e
> z0`(qpx7hmCj}*640TKAp)cvc@U16LVNC8sKp5+<$iKefsG(=TyEB$LF;C%+KvSz)C
> zDvY8&0u;wM;LcyUm+>7Jvo-Dw>P~iF{9^=kC-+dBQ|+4_ep1E8AZ^mQQ-^G)6;
> nKO
> z2QJLSBfd^Oyg73(0Hu8wzK_#<a(Gj#?VmPtbY6UPf(@xC=~Hku`E-7C=$enlF^u3
> #
> zGn4txR2+@Yq=ZZ%N7!p^v8syXZ&flf%O8Mp-HmPivRf{s&v>~?em?APV4u+3!s
> YUu
> ziPCws<A=3Imzi8mU#iN*`G?19VNyB$ip4RUx5V4M_tL*+=ac3X6>_RSL?5g&HY
> p@~
> zpT_ayx{my9u{)!Bg|ei!(B|lJ<g4z5)&9wO+4o(0lwuPpzlQS!U(#NAVO6?lS3(P&
> z=jjq>tZW1R%w?}pS7GM9=@9R5*DU4gl0BtazIgcCK^#fns)_*H2{8*>=)RSlD|FI
> @
> zn%s9a9EnM<mLz-Uv4`%y3owQ|!K_P&IdlExWYxrQvzcosZ!0L*%TP<rWaFG+w
> ^yt4
> zedFbQ`Rh^5=N%;sG9MU#12Wm;ElQVlJ{mOhdGo<pQ;L}x#c!rQ!rjvN5?it!zTBK
> 6
> z$j;)+0*Aw;aYKp|@$M&$5|XCH9>e!i{wB169VWn?bMtO*{CmN%0+o-bh<%)P
> cZ%?F
> zOE5}DfrfsVM4`Zc1;pXb7r$d>HJ<77%mQSuuf><<To8;e>XGI1C_0l4ITe$TJ3WyI
> zrYA|w)XjL<egf*`D5UX9{M-7T#~e~2e!_IlPZ?ly2(K?7ih1!<2D2z9xba)+6)36J
> zPj<Ny7T6r=VvP50c&0BQ1v(C65;^m)#AaDQ(Jp^VY`2_Kr-3K(MgvgD>L(W<-Mj_
> Q
> zBwFd19GMw{M6vQ0y&tKml743#5&OjUm&Sz#GC^=$>5Zy^ej+*O^w=SC?29J
> mp+|P4
> zX|Hlk8G)YSe9oKI(V10;@g=>Nt7E$ho+kdv(o%7Q5cG(5CPk({=~x!$LTL%oCFE7
> w
> zA@c?8oSpIK<BILlEnEscVP};cG3+k_sHZP}?DgQW1B@a0^3KvT<oB0kRVLywwn
> II&
> z_l-lq2?==rl13eN0eh*`4yFp1hZEiIe=T6xrO?@YoIiD<0kU&Of?2n~3PL0@_Yrc7
> zIVR38Zj2c2=6}~%K!V5kH49(GxloZbJpU_x_}YQLaSLwOVr@U28{ZjAgsRkCQ1ah
> >
> zSW%`qbuua3X{_<)rXISJPzu}>ZO@<_OSrw;Vz-0hwst*5*d|r^SUp9t%-!!kW8&cl
> z=bs#(?Ka4~zp#>e`r@n~uzAp#5v>2bJGD=rm+JHO7;OIdVWn&aa-D8iG^bOonD)
> 19
> zL|Hv=eqE7#Y~p&xjbW=khe^Y#IEBy+Wz`BhWO+-7PpiKX`E$v<J!H9Y{{G4N!v+4
> c
> zhqoM2ny?8Egivpm>n<P9LC+>Ng#nPki#}Zi{~F40U%nnh)kNLUuV!IKo{DR&??v
> Z`
> zKJELyh4pSgA(8mvPzN!a^IOw-QY(nXhdb&_KWCe^ufiRNNaDM-s^Th*m`88(9}b
> fm
> zz+WjRi0&!tQ6;sz(=OCgWX^6CWXQ!oeTY82(@IJ{<(JBzG>~_;daGh4_ZK1_16}K1
> z_H;=wR#u;rVLtJoDSVKU+F2bJxO2`aud$+H@TkwrKv`hr&v`w6;WF|F=W}4O2K
> yjH
> zZJ?u~0@`00Yp6zl(fyfOLIUUo-|i0oYoy+CGB>Mt6?OPOyN}#Fel|RJ#%|p?!Ybs=
> zyQcF-uFgLV0N*E8+#72=9|Su}KALbFqE@9^vt%ALF{3$ODt;w#rdVy^K5C`%U&
> FKd
> zE#O%3p|Jf^8ThgL?vHE6%$<+Sv8_2G6YPX=W7XNu0(&4H`ObMVtXBUHW`A|
> ByQ~tv
> zOfu8k#Imf-4-!0>zq*fz93>Ua`PWpGe_gm{n3=ZSWLI5(u&mY`j<>Uqp<U04<f{O
> 4
> z=lZ4SZ9a@7n}*nPrgZ#SukAaNx}T^WDt)cEoq_g`_hhFNm$$}Amd2HQoCCSx&S
> @z6
> z?+UyX@KIy_!hu9P=s9oL-^cxZj*&ic^S=0hW4`~nmfF=wJ{5SFz%cD8kxg_T50IU
> W
> zk@si#xM|vr$VT8C4sjTfC8;!#876J77boWL572MKU*74;tQas~#WSnhR_wSHzK
> 1Fr
> znVylSmkQe}sn>A&-}%cklovljf8~g{2d5n>^^iny4r@G-6-sMPTO41CvLe;;4|E?1
> z#g(CLIy927KF-fu3peA*CuRDT8L2?<{CR5o*F!xE^IqlMi4K<9W>u&6`JuH4)m@`{
> zKT2=@EK@4Zvl7J1+Bdu|^0lrRwvZvjVijzIP}$@_(Zb}4O=Bur{LmF&&fZ$T<jD#i
> zw|ljk>?MN(dyaY!=?G3n(0UPM0*<J?n?4rs_1Q&Tn)I^t6{HV^{ja|7OR_9fB*mw
> z
> ziOnaz2>NW7ua=dH)k+h;?NZmGbk9Pfs>I_RZ8mlCqfK&NfLogH?jZlg_?Ry4P5P6
> T
> z7G1ENv<AMt0Nf;XEqkUkqcj?xy2S>f1}-cDs;*PZ=DLv8mGaHLax(kcOp-^c|9ss2
> zg>v_ou<u;Gd_hMCNO<yx49R8|kvbz9jQ)e&6f)P=)7M)rD-IZoA9`d2oasH(kxuU
> Q
> zs;)RSLP5Y*44x%sRm_BcHOzE86a&d^@iyE|Ll3^}ozKu@6GR&n$IX<mS|ONm2
> `j}j
> z=_WE?O0-7LdAPlTd^&h7Xn(AO{caMZ)6zmh>@m15QThI)LF5^;Z`1$Mt;7Kxcn)
> )4
> zB21CdF<!g6EgOGY?+DuGrKsABEy@mrZBsFR(i1j`jgj3_Jw_o}F9xg?@u=WJ<Oc
> %w
> zyNsr@IWQ8F^UZp$aw@Jej&QAQCGf)MJpX3+;GoMsKe0fDVF>T$n`mYUlT(3&
> `9;d%
> z70MD=8P_8kMYE2+f&amihV802s7Js#suIciu!ObT_N~krvHkwx`39p$eppy&RzG
> Nv
> zc_fwxjU2Hhq?x|2YGZ<RM48Y6cinp4$Y@$dBAFg?KA!T<(K9;Y8eTuCdvs}j{#Qi
> h
> zo5pPdW2ZCXRYs4cua~Tp&UM$yMmUz9aScCEfQG-Rjx<!2d)P|o8tR4QPNBoz^
> ;pFx
> z4i5A|j>vQMjjLvlyD)KwWaj;$=>gvpD0v*m0&D~4CIqWyj$Gn4${lCn$-{`b!&l?$
> z*=>lH%8_1wq;NH~UHaF2@>@qV;lYtdwH?tkv9|+gXuj*6<L`uJMnIL>*_@Ape*
> `9Q
> zrF*+VtKLQNvw01-PV`77Ki_ie$zXo$tA{Z>nl+|3c>C|UR6A+-KFXQx`9m*z`-`Yo
> zV&P*D@wy*{yXNTZ4~Y{MQkr_w=(S&CcIr9YHC9&M6uP!o15+BOcEH$eP4@6
> k_gdMj
> zPdn~;I017>8v@aIiHU~6^GJmmp<D7e31gWQr!Y7EUUUX5a;%8GRDKgK&)HjS
> uyD-6
> zP-oB_xth9i54kp04x6yehtXdvF-*(I!!zFTNflMFD&gU$Bvd?ykic@DPX!c0bi}M_
> z_IK7_lR+@)h2Ox3eI>+iwe~j^4xB6RAXTAd6eZ%JW{Rq6-^x5xWyL?a0|~%^<h$
> &u
> z`u&En)7_e#>8NrA^+NZc+G?`XQnG^9Na+Zy^?JB6d{}!{?&^$_>Teg}j2t_{^x4e7
> z2KWb{6a|)u?T0>LucXnD7q?x+Cv{G(Oy6~%YTRq1URK{SFzJ~op5*!JLaxD`z3}w
> M
> zY}<NAg4?df2|G{Nq=rVrZ@AZ9esU@^#~%$CNMh~;&Y*Aw116lpmX5i>NmM}
> HLFEGO
> zZ_bo5S|-UKTFnY{Z#Pn3%;>y_m`m3zspo{YizjwM1_jFglV4vUkCS-g$K8(u*nu&y
> z0JixH*rwe2u@v0kyk!YkI5L2%W{0Yf$JYgZ5jj4hGyZlc0aBSzs>?txDze*MNtJfY
> zP4|6nv2X8^UFLO;<gbr4#&XQoMhMuqzpl4|LaGx=S=20s>F_ECfQ&d0PGtUJi3j
> k_
> zHhTmM3n}t2szPZZPlqeNvBF^_^9hiZ<(A0J0HA?pX9=4O$Rhypm0ox@VtShyX1
> $w&
> zEtUP}W{qBnfjUd!2P&k-x2dL{Ipc03;wMNfBV7XgyM(o2X>|3Xl=Xw#SAhPxl9pZ
> x
> zdILQeBKCpb=Ms)Uc3bN1d$0*)Mkb6*?fjjz&L#Xe*>E)x!spkVV}#7Y+=0!6TjA^L
> ztIH*Dj)>Zcu4vu+KcvI~8^Xaq>bAe#23u{|-nwlI=;M7z#Iy+~74=12_nixdOS3G4
> zMgF8OZhuvJpPP!B2FMdvUAC&<YA;PhvZRP_2R*>9e_yOGGg#~en3f(YmiHn3
> WX<XM
> z-wuIZeoVE#W3I{VdbEeM-dDp|O(5R>z6lh3F46^TTe<|77xXJut63Xv+V-GIlLt^I
> zmUTf?qvdxpg$KhogMr>0_*Qp;;DGD9_VV=qpedb}b>j!a=hx>*?n2TvngW0o31
> 43^
> zet6I^+qF8Ih&Ea+30YH(GXi~mXkzr-D5@x>?DLp;!C2iKmc>qzCspzvwHgyq>mkP
> F
> z>vK2l04QfN&Mj-bGm4br;0mAgGun&#t)?ers-{6iy6k+4J{Eokpbv#_M34+7d*R|
> v
> z5NITd#61`BM|qiF)qt6(`=<i;Ti@B^#a0!5>h88Su3Nt$+$p8@u07MJrh1*vAs}~w
> zZk;;GJGD(&QdyB59-Xq3n8*8xtcYa*@gM;fQOF&KL_mzi^Q6#Yu_EqMOJC%~g
> ye42
> zukQe(D1wf28>eS$=3F)!^+hOQhciGW#wN)pidf1WZ5Q=q%JhEGh+4X%J{@Dfn
> 8l8v
> z#~;qeO8nw+10~wGsapLa727QTKi^=^^2x;T21NXEwGNDb3RWHDDO?3$3&#k
> ^TU+Wx
> zYl?1`l|7p!f{a(o97Rm>KL~U>YQ9mbk522A@vHT(OB_U>HnXE#LqsD&1XkWYxj
> hMR
> zX_cQKCO9al-OTKV2)f%)TU1sBHecSfHaS5FF0kQ<4Pm-9<HcZsjNF8FDu49G*#!
> 4@
> zKXFiUN2k8bD_XY}w!q!6d@vI_LE9}YI4vzyo+~{>Q?ntIb-x_+KMj8(x>_qTp?#pq
> zN3|2+c^@aUQZ4J=>IkA^^Qgc4l0A>qYA5GwNpzo<xh3ooBYw?sfpu1plCcwgSB
> Q(8
> z0*C+>jNl9!tE4F1sjT}9DKlI?m4+0SAap*rCw1}+&90*mm*L}URn)&+Mz1?^@0{
> xj
> z&ZG1_%CW`z)Btp=h_=phNpY1nK!nNu?EYj<ZlI2a)4*@{zIX+yFi$?h1*Koi8^#J|
> zyt~uf;kY%dtE<4XrX_mXqz9Tgcm{Z4QEQLWpxokRoJrUR^p|X%u(%$3R$uVdFz
> w)E
> zw9Vc{CuM?iEBx5>GCMURvZvSUm!I-+m(x+t#R+c@dgL`GdlcJyJGoyuF+scXeqk
> 4Q
> z6f@C$&d+8<=dc$W(=9<!xPs_|tZ%uK=-uwuU9xVQ?~24{7G1(L|Kmn?%fX(I;x%
> O>
> z(z=4lAn}Gz(%FW`drncw5tqW~9#KUEd#Xy&9(Jr9_bT&b_w0JZ!O+WsxM7OP6-
> ?AW
> zO&~>0NH?d>CWjL)5#Q3dq=3BaiOVJ}bbAwvy0}V+AUXY|PfwkJ-xm4hZg#D`Fw
> f_k
> zli5aU9*<o53#YACM6mHtzjqGp&(`eiB6|BL5O{@rNQCdH<wji{m(gYMILrF480U
> e$
> z8lKyg8xfIl;y;tewrq!uF|UQ60cD@4!!koy?qkgGC2X-de2(!n?hE)bLQ;h5UI^?w
> zW>NQ+HbFW#fnXHPsZb-sewoMMzD6}ORdNYX|1pN*C5_cVYvd%R;3!u$;o#3
> F^pqd6
> z@q09^QKj)o<8M1*GQXu;tZLU>$3jIXFmtZb{T%UJIf_>v3`l~-b?ltWG<<pHkGGJ
> C
> ze=K`131^=mG_-!_?>X!4P!3bH!&8Ct(|Fj(HF0h3UxRNd3N3Y5Uid}t@DueU8>9
> GZ
> z`d1`omtTxm{Cqi~?P9D5d47I9xzkQAo9a^RJJa`<%k@(K;4qCLomTowT~m50>N
> 4Uw
> zmS~Nz%c>z$$#ss6DP<AvD+NarxBN80`M<Q||BT)OF`PXOFwFnW-HKEz4N$F%
> msIAH
> zv!(t)ZbizaB?57Oak8T4OdbUQ)i|7pd=&xMTH682I_ChCb2Cr$!o@G0`^CCu3$@
> 2L
> zB9A`aYf$?WqxLewFYSm2s-g=!jsP!H0ATfh{ROx`SW+Z@2_~ZSPJ5;1N~+Zv>=M
> 1l
> zP|Yc+>CI}KAu>oKy?=H6B-U2_YUjx`;WDEhwZ6*I)hC@|MHi|8mWUu)KxHj-xG
> 8Ed
> z$6*ww7>|b%d~euu816kxovV|>o+8W_MUPb{g%;O($=i#FV?B`wXTNGzblx^RX
> ~-_6
> z$T<~UoZfbhjyo83Inu>_(hmEp?^ZoMDpSTw9VQ$<xvH58y1nku{V#ksfSmlJJwy
> M4
> z=&-;*67u8Y7Eb@lpf>?=Y+{}5cr(SP0r9Q;Fe91Kb%kWVo-5+TgJcaV|C_+Y{uhCZ
> zakpV;3=TwjPq<{^?Sv<byHs4A+bAM{+JEXz{(7`hro(Ha@Zcjq8bx&qK5O;ko)YXF
> z)c8IB19hW|?QIQt&o`zzZv#0Th{0?KiN>V~Pd3J~TpTE7l*_6Q(4#wD`%OEC@7O
> SI
> z=!{`ji(5WI=|GsguHAi?T|V~qocf4hHl&2KGG(l;?h69l4?A+aer~9yq|vetSBbCC
> z=Drj4WeK_0W@BglkDZ*O|F1yYnX=-dtS@J|B0d}io^C%o=Ug|3nRBXde;d3Bl%
> d5>
> zzb>399D6?Y*I4&uk06`5gM4CE7>9i|tBO$Tu?ci8-FUSv%N4^qJ5rI5w+FKFO|g1
> +
> z9{|cN5E*6moH}YLMiB3^*b_2`7YBhTEvt(xXrl4@iekVKGl2Nr3e|STTO0xoZ-$?j
> zF2@Kfq#go*u~yz((T3l~OntHXUpS}Q#Tf>PUnF0L;hs%gTVh+w3mR!xH@J*srW
> okH
> z$AO3?Vs4B%->GYE@O0OX#^Qo3EJ~u8O!EL}z5Z{;F*FrjJ0+m9(?cCJ2irv$3fOpp
> zZw{h97*K?`@U=Z<8ms(0A5^=shx1(`E}@5!eZ?Fa!v`~)vcU*g1A!8@3Wr~eC6B!
> v
> z>jO?1sw_=qtaD$XFo?V@r{_Ahfa1Og>IE;6KA4r!r@EcN0}JJ5qN;XBpA0B<R}3_
> 9
> ztu<mz+-Bn?au<Dq+oBeKmn}3}Lbr;8K86}1CvrLcIOE#G%(LGMP8WB0pI&n8VyR
> ;a
> zWz|6s;O4IP(35^xx<O`qm78%)(J~W?=?f51xBW+q-y8LWr`kLl$zE2`1^kz4puux2
> zg*pqq07plbZ|T-R{fWtXzqxq*&>Js3QB|*GjP^eQp9465%N|(fnt3T|BLDDl(1W6
> E
> zsQ-b!_Sa{LsG0IUolf77csI7X3tIRvw<ZDCDFTrr?ys_X!Mx8;cBU8BoL)_*t)#1h
> zQf1Z)9>LMW6DRXWR8I26DsZ1f*HSlT3wpu!j58IctKn0tF99#e@SSWMb%h^r7
> =Wl=
> zhTg<-*^k^GiRyR}aJ<no2h@DP(qZD#M)Y+|-*N{Oz6Ep!xz20TEnyA?QjLhw=o(L;
> z{Em#mWdn~ePWW&&Y2#|uda13p8VF>=a>e|ZD`sY_n<0Vi!%ZN?2h8tXL9Evy
> C4%CO
> zuMY7$K_7cM8E=)_hy5u>(wm6sqUD*yNB>|?6hkmVoL$a>@M0$_K_H9Q8>?
> Az4wNd)
> zhOZcKBzI<$?7gS#Mk!ZkASPfOjIT)NM1>mg5<@|6mWYyVM(P#$mJ)ocr}L5Nu
> 6)8q
> ziL4ah(nS%3PpQ(p*Y0zS;VVLfEkKlkY|%Rz>|Q57n5fL_OU)@_@qzQnTl7P&zuW
> A&
> z|9EJbDb>sBNnHELxGAKi$f+%UeBZD0jR6#Kxv6Z}6W|c@P<sWK)b)pDokfBLRo
> pL}
> zQ|%(`VTsV@lvNg4UPu=9k)5Xkll`RRR*K|FIwf-(vrhIA>rV3Oi*1{@>N0oRa>0~9
> zDXf5jT@j~15x50JYsuo|vqglf8-0w27{pa(>$|o+g@48z#+6!dOGKUDbr0yGXF$G
> -
> zVt2k~?u>>h4I4OiO;IJ~?A-xM<Yct{C#Ft%O}9=kP+>MyG|te=xQvJ;N<~YAV`V%B
> z#T&Rsn27hN>0avYx>twG?F%fcCP0JBEayf!A1a^#{7h!9jfv{l`my6c`Ksb;8*2gE
> zd2BYRV-3IXb*t#Orz+$tYgkr)#y><zZ^yj$o(t<c+sEpcm6FAje9vuug^~vM^^Gk@
> z%-n0mur6q6xV@9;p71Jn$wF?9&hXn^n5h58W8_Y5gkvGbK+=oqK)KZy7Ff8dv
> #k{2
> zFVI2IR@lc%A1Cu)7Nvv{qx<ChukcSLm26C=#n-J**{>#-FjS>c6U%txKwT~@)?!Q
> q
> zDo<d+)C%%lugWaDLC^Hoya3TTB=NOi#4iO?@IX+JsCbVJOS%8Cry=N;5pU<>A
> =Bf>
> z$p&E9tgQ9RIKSO+*CS^V8v7I50QX;8M4a|{x4^3^o(x@k3ZSj(Wsa_wCxyV|M
> @OTE
> zzYDg#`O6n{=X>(K_I#JVS867HwU_osPX{IvXYVCXriS}~-DiIu&mmY46|OL?tSC`s
> z!Qdlme87E5`_G7lr=$(>KOMIDHmRJg{<A~;(Q@}3spyB!Xp*UMouvd6LAw(d7B
> ~3r
> zy^mHraE3K&bJ}U@Lrda}jWO}nQfcj__JX7<Qbi6a{Jc`CaKA5}@=@TKm95gjuIC
> Zh
> zLDGX2@zTo<ls7pHe1LPoBHsG2tDjBHnDe_I9LR`y;5C3BP|UC{BWt%$=*g94_Dj
> j{
> z%%z%4T6)*(ZNs_3yOHa^&)|c18*PWnmA7JIDVD6L!TrlNE;P};HGo6*<uPcpdL#
> S+
> zDL=9qHzs?K+`~DT_R==eeq8m{YRTGParG^)KNL|3&0)K}OBxp6$9(ax*GpkV32x
> @;
> zPkMdD1?lZ7Fq$oim<^0@LjMdGzQ?%(dQT5QVWG%PJnTVbV)Z$1r+1o3-RQrj
> 3WyWc
> za3c+K5~%DNN-Mt9r;|p?Rzj2$qQZhY+1}nR$pKD*G9Ui*x*aIF93Q%SYB}3IrbX7
> E
> zV?4k)rQ@OjHo<n&6~!9P%8s_gT2~7y<xF~X6!~}p^gEDgchiNAie@XxIg-k{j46Ff
> zVH8}Rx;@@i+l=mf#@T#mQol=k#B1@gba<b>Ej^X5w)ncWQ*7-Sp`phM1FbTdz
> (U~x
> zX?iZHdV7L)d#smDnhp7-hZ$THozlWnsjhmb|I1R8Ol1`GlH-yz$_DnPW|zei>%V*
> +
> zTWIapP_)aG)G;i6uX5+Hv#Y!s(V)}K=|glExD%q2)Y(s|P#zYDgD)uexqBs-;kqZ-
> z_jKIhqr5!|X*an3Q)|s+NPZ*M@9e#?)CPE`d4?|iyHc4Sgk(bf1nh;IH@v5ubDvzz
> zrIuI+o`z_5uPr9Upug{aLf5bzh`Xx#Z*&{S4|y(t#W#EAtbVJAR#-ebNwxKd&YR9_
> zjdZX>5Usi&6*CEGUs_mIvQ+O}{kpcKF$FyxXyH%>6>Me9lmM4Q!y?N*!4JYqcz
> ynz
> ztBy08za_?E{o<1chgc@Zs<JN3Bv7<_>7wJ+$xCv}Q7ub{Dkhgo;4`%c?VOZx{+*=
> 3
> z$-t9|1S}3wJg+BdC@Js&gw9swUU74Z<+I&oO0N|OEip<jm7X+lJv16I7&lWxuu
> *dy
> zn+EwTprE)l?AU3q8mlxz>A-;tN`H5~P|x)E?9O{`y-R{Vc(77SryUyqh$!Bu<mdWY
> z&2<%TC>UFaPk2f;6+uPtN#TJdw=^=1a@}}wEt=UqSn_dtCagV_)PP|Ej=dfEHQlQ
> |
> zbiUKNZ7x-gb#0`6&M<}*FjPXqs}a2|Y_XZfnGuKY5p{EQPSstJkxBe=%`P9c%f>h;
> zkA#bms$$SmGZfQeebYNLrqi<YBmSTaqg`yL4C4e*p`;=s@ZE9F+exGap<^up?6C
> ue
> zNMyZQELua?#h=tc#^ZV1@`8(UjBLH#yQJ}?;23mUlU_;92yPenEmcirPKILSo7lI>
> zRYpS)hfMC<^%_pA3~k4cl+4@JIF+s)x_@@L6PT$w{%n@O8)WAUbj*R<3Qx1H
> zZ#is
> zK5iR4MDZ9qoA{$)>5o==yj5H(sm-+2optWD-<^PARcBv_!Z1bQe5ZTpJ7JYJRuye
> A
> zw)M@EEj`a_Yu>z%e~7Z^M#oP3$BnJ|Y;Dqu7+YJ?8D+9cwdS{dE;K3~(NhfD$X-;
> O
> zgG&Ftk6Ny0<uB-1Y-0Hj>D*bLiD{uOQdkktI?{tBSQF(V^NekBk=&Oa!E1gm6n5o
> m
> zXn~6!&s2+o`I6oocHBR6V(Xu33t<`5zDse^H0`pTlt+m9b-OeQmGzvgR&gD8OJ}h
> G
> z1DET|c92-^^S|cfZyXY>kg&z~DTHMZWJDZ=lI#ud8`I><Pqa(K`&op9`A0|yAMv7
> R
> z_LZJ~9$!Z>lIzx!55Q{nI@GLV5=06x2ey&v*)G4mybU?&y#mhWMPsyX+NcM;LF
> c?e
> zUkH1Mc%+0H>P;IzHn~X3yrVcL5pSj9P~PNTW0mVaeZ_AyU@>~KVz;)M!M4L8
> !dVR`
> zv%jBMy6m<;IO_+baQ1u4gO6n|xuI&W4{b}4Ca$jNpNuZ~c;Hc(tc3i|B-Tamk#X
> S*
> z_1Sv?5Whs$w+hzv@|nGbYlac%c@HjhB~|u4f(*FqsRq}CBs5#K^DX%mXR=%7(
> VA(C
> zu$|gqzl2krdcVKsfOoT((ZjJUJIJ2lm$LP6>6YB!hN?`?^~~(P2T{v}E;kEr*mk&c
> zi>Fs2^D5NmI%GMIQ)#QWdj~89XLtb_sy3DOsvBczbGF(ohCj+TwOTH}D4dNw+
> k#(B
> zq9{^SCpa<`FlCU-sxtmfKCx1@J&a4#CP5%d`v@VQ09H0hhpV`qrgwdem%Z`;N>
> |3A
> z{r`%twcd4rZuQ+6c%6r|B$kp+w|Q*}YgjAR5L{bF43^a5h*#)R$p{?QmeM60kpx
> ^6
> zw#!g42Tv$QS6R~f?We=)YZE;uV(B;o%Vfe}@D5w7oQ-^6rRs+n!{O~f2D)pi#ouP
> 1
> zh_J4!fHG|S7O*u<!V>bhLw|2S1+AaI3*0p*DJ3yxiG}RJZ%sxsJJRZ_GSxKuL`R^I
> z2uGooi$lvy+4T~OAhPcE;`FDI0B*{TVkRfniTit7nGUHx?>3Z#=^nx~RBAVL#Qx)e
> z$I5CHeMVaA=hfh%<v;Gr^Eyvk)cr7(N_#Uhi=cnI49umLyi}@yd|7b;Qd&pf_R6_}
> z6StHHtsF8O9qui8Bhz`YnexW&uKez!{!egEwp{uZEVMn_U!`Uyc3yM7J2c5Gcwe
> XL
> z(xBOSs{!1M2&j6lY&6&BxV3q;c35gx=}Z{#654Od#?&nK*z<X?{vCQ8<#wU6CIq
> Oe
> z%G~ClK>kUGMHFDX>k$+0@nC($oPC;Ug5>DfFgJ_qafm$kAhpKY5~ZxCG<{+l$L
> P62
> zq#6^0*UB=+SO++<UQs(0%x>!G*n6s06n|MfDtA^s2|>1A#aDPaLD%FTQfv@M
> +@=~+
> z;WK>ZA@RJJRyo(OM1U1Q*Yt$%FGRS>TekOb07zycdg4(seSh0x#deKZdfI9IXZR
> z7
> zBVN|!t|~u>KoYfIaz#C9ZDZPQEs|9G@(B-k<nW%E0*T5iiG|;@!$Quxr{m;zYv>!L
> z4P;Y3hSpaSC|$O?h8kOc$qvra4mCnRZc1QXt)2QFAh)_r!xukzRuXWFyeLYTRUu
> 6k
> z(urTX#zix^&s0VnE30tqhD9Ryc$PP85tkftNg8|?VM0rRhz^w(3u5rkd&4H*6Y~x
> y
> zNt%qEYhbz_=_6sya42GxTJY%nfiwap(nW8h!WJ-`D80ufK9)lCa}L(utrR_|$m@J-
> zfzjITf6Qy*+TBl{o!7=`yN-OV>?(Zp?(7xm<l4=zmT8YmS6@y?DMilz#v2EhI?u^J
> zsu#dDbF;7oVC#?n6C}=AbmeMa$O|gv6SAU3_Mx?lG)Wm&Xe39$Y=P|*<xAyj
> eWQ|8
> zVgg~ZjL(`O3jmgD&7;qqj7e6{+<q(}I`K*LL>dq<N@J@zS@S~&{ACx`F@T7+3CJ^c
> z;gky#tW_rSFwuR8XHwA?D*IA#x#n;Z{15UUNd#4JF~@m24O(1l(h>#smRZo(_z
> eIy
> z#El{{)Dw~cVxSA^dgqdCROlL>nMP+^v?H_*Oz4>!W|CcSv8fR<xKiIERE41UtFB@
> 2
> zct-X$dIKcwMN!J{0WjG3mUMA*sev-oX7NUe{v=9+h9${!>cU;MG-B$c!Ewy=L7
> +4O
> zkM*|(HTrjLy((C_APEhqY59KKMHab>7MUt(`9ZHI`Y@-EzmIjvqHjJ%u{JgoebFM
> 8
> z;y0y#Yb<reA=S&p77z_c5PYBD*g~lnI7pUk_Ag_DYWEE5F#R3UhUUC15|Y=(Pf-(
> 7
> zU&75)RFequ3C*kM;>o*)^2qjyC9uDd-C)NeWpyERm%jOOBXV5kgCvAkS`1OFP
> H3!!
> ztqi6Rz3C*%S7uQ~btVLEJn2A8VC}^PhoS{7fso|ud09GM>3}F|aC4|TD0YzCbNU
> Va
> z&?H9zYQNoh^7ZXX)^bU$+Bi)D`_2nF`peef*Uv=)NjCsTmTA|gbwcmTclJX=eU=2
> -
> zRy_``v_w7Qfa<|4)+IB{kF5g@OX#&BhFJ1Jzd!!NXa{V*#!^1D-<yFT-~L&zKuY|b
> z%qW5mgr?lnyk?OwmOVd(n?#dV+ZT`LPo(Z+K51vM(Ongxa%M>>)IKlY$wN8V
> al;GT
> zvUXN>Wr8wdYLy8bi(8GY1<DrIUR0Dzk!?jEX{D=-S>Cn^40ojX?C>8N)5hTw;R#4i
> z=<e*8R<G`73mNJZKKwjF;%9j`_0eNt^?x;Y=J8Oj|NozJI&DYVRCc0IDnew<GN)`O
> zS+Y*qMuu!5*_knIl>KzdQbdR_7*lp;<b)|ph%qvZkZl-~Wyp--cir?k=kxtNe!u^|
> zkMHCACv)BReO=dmUDy4--tXu8b^YtWx!=3L+j_vCa;vjs-n266_z<6!`WYeSNBsm
> c
> zzh!F6q$FDmDZeA2vvms-#jFE9m!AEU>NIkz&MdyD<X0DwcSV^}Zp+#0-W%ux
> VAn$1
> z8<bOPX2E`UK0jGO1?!^QeU2nOfork3qmj9dGd=&1-J~>6IAc>|6JCL&u69K&cTB
> z*
> z=sy?o+{l_K&R2EJ7l3s>!tB-m?3=EzBV&&y8YXXl8m-B$$y^C}_I}coBz0M5{#0IM
> z(s*3xdV#o7iK;q^8#OQzT9kHZM)+33jgbXhdG=(|;yA%lOO@p1j^wHshMj(}f6P<
> P
> zY<ETTSTZW+rerT&0fi?#oF^EEXksuHIyPC0)#S>VD+uaKC30<$_JhmKZb=uBVIL9
> V
> zS*YKjp8GRj0_JrnbgVf%^~G*-Ow#n$BKA!G4b>oM?{Z$-m1b?nlYWELmgfFyH`n
> hu
> zVYA1mXAiNEd4()v?}}~>q^hm*AI4V5CldT`+F!-zB_%He3NwCXiDHXPM3Izh0=P
> h4
> zu?gXu=r$XB)gANI-2NtB!jZlVbFS-RMq8`;F-kSAgKpG9WW2iqmOJ(vW7c2eWFH
> 0@
> zykB!S>*-esV95gvu(Pzr3z6;1)h;<O#%=dX;_b%yQB<wUmOZSjvx;sjkH6X8qb#ka
> z>-d!_9+eOm(;}byeZSyOcGm1Y%>V$HLm@<?7Asn0$t}OK15!h$P-3QjaU`2rDY
> y4~
> zaPZ`?geF9wU3GE2AGO*Dg-oCdp@u@V>k3+$14wx7Nh@I+j%RLV3;<<!rtQbb
> NZ%^H
> z!x#LH9OQYc0ety(xZYYAzA-(BYkv|GRu}(pPmYPL3Z3Ifv&;X`(!3;mUD#B(NzC4r
> zVQrpzQT%Qj?o(~Mo9n~fOIHeqO0Dwy$T@al5_ieXSpYC{k}57}y<9hT!L+pMLRf
> |Q
> z?3*sqx%?8{m4rG!p<a?#CR3|VFVQ{D%b_<(YFsm`wcwMvXoV@nMZCR#xh(M
> V%!ROE
> z;=@M&QB>V#55dHqwd(&Yc2~(o8}#Je(4aG=OH&~}wCl~g^6w=riY-C@)5w`rux;
> D@
> zHoLL+-9;tCcnCl@xvli`I$>($fus{e(6g6%%>63Mo7vmV1Wa@?4xyStAr#{mp<`bp
> z<6Id-S&NgMc%WnvW7?WNj;5r+#6+NCi9wq@{F$Fxc6}%3crZ-c2DOL)CeXWA
> _~7wh
> z1m7(m-*0kX8rBxtaAZj=dVkjX(kp6oZBi0;Z48%${(6iExeaHi9-Qj2L@itQ&P-wp
> zQ0BN<0)Glga?m&+98kiYWP)eH+P9Nyp|=D}gO%LI(tN5MFQso&UaKX1lk5qPP
> ?dqf
> z0viA0p;+LiKZ)p;lR$rKR4-{oIMm!4H^r?ThN7)Hi|uFcg-++Cmd;CVjA85hBm;ny
> z0zPa1SHBkj@c+0~c4{eh_GUawl#%5&g%e*4=H<q(4KX=n@pZf3dA;!7uHa73Zq
> 82e
> zS1}u54_bz4=nVaKzQwoX;Ju&t?|_dpp88N0q-*_Cx$M+J-~wA5w8ex(sJzW#`&A
> |%
> zfuk5BLf;6_@3}JYKyHPCSs9|LEFK9E0jPnH5<u^XCO|&PKv&ei<o)C_P00PpKbM
> de
> zlJUh(p(9ZpuY8qW{p7qm{=k?LyQ{6&ggA*(8S{0Y)l*^D9{3hK!W<O>d%VZdYtJ4
> z
> z>3s6m_ZMy7)=A!6*?)mKD{Ni&vf@wq8-clnR4&8r0JgJ^UPGS<>0OMN6>bm_
> XrsFO
> z;1^a#lxs~erJWuTr}eD!oiKkE_I2D|EF5OAfb2`^@E?dQR9Kj{YtwUE_UExvksH*=
> z<$+*f0Om}T#Os1b-ub^C=_(3h6{8vE*4P)a<?_!)WBW*sp~HGl!`Z4L%-9>1DEJ$
> H
> zYQi5qz~Fi{e)zlBjX*1lq5E%_=C!xJ#a~>%RlJ=)o2x?ph%zLoDa7wWKCq(-H#?L4
> znw^tJRFFn4>nO6%NAe}wFVrLe);WWLnJxJI>p7CmZy~oYmkB5r?@d$^;@G=M
> n^_6}
> zTCP-3WX8;0D!pAN=Nr;;N5MDleCf=@ae)M4OEc|_f2aB`0>1kj0=28HanX5mnYi
> =#
> z+TMJ+xyfRMzJD!_sYxf-lM<WO$ceNJ?(F(ChWS28h{S*36#Xn*n4VDz^Li&)#g*=
> M
> zxAw`g7krGlDXK5`Hb&^{ATm36`tFyw`wOdv&EIXr#$#y2B#vNJT=k!l{P4!$&!G~h
> zP3J;XWo{~K*jA2~eY&+rN6l`+kycx($VeY8bEzyS-Kn%e7*m_n^-lkCE+G875VU+
> O
> z<veyESN)1EfgR{>Ennz=|EsXe_SQ6fRO?6e?Wh;GbV35kR)R;4rt*hI#>@4RJ(|XH
> z?UL@5`6tQE8>;Uxj$`wS%cUu1@?ClittA<I5|c|b@a1B9N&G8PRc~fQCcKe2KY
> Eq1
> z*7fo<TQ&mI*qbFGf+YaHs;h0kRn~@v%Bb^zGFeYptH0V&dMC5UsbneCKJ|ldv
> M91~
> zK6t1&)PBuzY?_*+U$1%FZM3FrbA5aJ;Y*sW<F@;o%^Z9%Xom4+cTh>T!@%AS
> F7_C?
> zNPULZp-k$X&TPD?lG+5QKt091H*C5@5B7?gi0eDhE59BsSM*JNf4;7$N)G!n*}
> Sor
> zW>9~zi-A^|H;$7*s$U@Ny(01NW{Dk1!WhJpIkQ(@X}l5=LK^g1wPw%T$4RzIj
> %Bsl
> zniMbWYxdV9`vS5qkIXF95?Blyu13BT;bY^xwKEW0+v&fke||5z9)&;U-)+jCP}(mt
> zti9vr>(IolcX0H@nEmr_nDeF{@NxhBi^e?9o%^dUfwU0jUrc&Po>LRz=BU9Xx2jC
> %
> zZw&rjom)oNFMH$2B<W|C7~A5<pW}C^^|)QAz~fKITQ6UfziG{m3mszAu1~Yug
> ~sHx
> zulvK{-Aoa=$q&@jhY1a(nl|UeBr^q}Rhk&@!-%^3{O7>-kWV+f8E>ZP%g@6{oHP
> 8L
> zlei*I#Ggi1|L*cDSD`LH)af$?q|@GUgi$^-Hl{3`_q`*UF1pQjiVsifh>qxucq7}i
> zt4gtSuu=7n;^eJ={)QVSz&S(~XNqvJWPT|PK|Q5mhBg5S5fb5=_X}4li`pN?gH#`C
> zQFCcEzstiy!8!c5X7W#0S6#{u+q?}rqwatFc;$!kyK1NT{@r-9Xi_K>^|HUu?&1pW
> zsEx1!Ty!vA(7?=#{h3NaW6n_6o%s&5Ak0J01LKQqX~n_N;vzyX`!`P~n3uRfUYsm
> {
> zqJX&h);N?h!R+ynJ%*P#>4zq3&uJa;O{MMcsn?;F98EV3i?jVLiFa^5y7?!XNBk(M
> z%t8Ca=DtQiLs(4Ce#a+rL9)Wy%QwpJ6AMd3@BYrt{GR=-0`K*vpcEU+9mo%EVI
> F4O
> z<4X8l*S570UT;xB#>tMA&J->Litp3V4Q&5=aNaJsYu6%p|3-T*!7uwoPyzXYZ>o?
> N
> zqj^7S_^^riP18LS(uo(T#`{1Y#B42;W;@8OwhoI*t%TFL+M0bFka)-*;*)e#3O*gI
> z#HOn)<-e2BK~4sWcLm8Lc{<!#A+3sxBa+QVTXweSkxe$o-soG~?cK5kZs_yx?~pz
> 6
> z2F8W9t1zh{kNS6l0r*0kW?5kmGLyP*%XN9Vx&3~WZF;`q;8?Q6`O7=TGztHni
> &qM>
> zY&c!0m!J|Y8uk1&8G2=Lg(2M0N(DO{Ba(7b_d}4A;<B|IhcvBOh<M_x9XJRz`-i!4
> zsF^*!B<#X?kT%P>;==H@!bcsGU-uY!Ppkh~Qm}3zmP9^_Uwbz{R<ng=3OL8#h8
> y@}
> zG%<TU8FiVzWU1Aqh5ON#+3D5Ne&s*oRZ8BsmCvly|I9j3M~9O#xAj~gTR@H
> 7GF;3p
> z9FS&NdO)h`_kEu?UVd)wrMXV}l^5O|PKK9~(!Yl%v~|dRQ{#GF@}@}U-${H9cx
> VvZ
> z-hIaKZB|Y-DMR;x3j42duH*a><+AW0eVHodM%0_Q!X!hY9Y%dFZXsw0=0O?4k
> 4(8I
> zlkwx#^^b)_g(StbprHn0mjBe<hmqUJjtx-BSEd%xuGetx-hp~e&`C*9dC)(|5kLNZ
> zwyjvs5qSSboU#Sw7bR!33JG`S=amMHavh`~($4oiz&x+09fHAvC&v}x0N|t|u{;
> @u
> zf)T09X6)w^@3Kva4_ioaQuvDaW}vzC-h(UiGHRJ+Z-b~^3?aT^)%H%viYH1+PGV
> GI
> z3!!AlS}822UonM$I|$}?-;Hr8K5{ul202yaf9LVYzf6eTvX!l|(O0e`<+{Sj^#hZa
> zXHkr(JvA+->BR*|J0dGw^+YZS7cbi}b^QVSkLPNc$G9Gctg!Q=%|Ut)M`5{DR{$R
> P
> ziK7%9s>@<(+a4(fQb6E#(}n}J*!WIHZtyX&?8XMfR%xq^StAg+CUNIzAi(@yu-3}~
> zWFFo{4e0_AFgT=H7hTWQqC0Jsj-UrDo@>#aZxR4;Zn4!12q<>!aI=_Zl$-W9Pg)}
> S
> zP7Var$L4#JlK?0B^sKur5~#PSTZCxVjzo+0@vzek>vgx#Fj)RaY9{)tUB)oDJbb#_
> z2)Pj)&C*&aeGLW5b+8+25mW;E&Q6JbeVW^JCC%4<WusGf<I?zdC=@-3U9p<c
> sA8o`
> z#RLB1Zs1}E9xhE<h}oBtumg5`1ik+0L7$Qhj)Es~7%~G2m)51Dhr_<UYE$|dumG
> C@
> zd;hsat+{@E(vZrt!c+<D+I&zheIdoqk@vxG9sT(x3^rvX*W*s-_{^knK6ge23%I4e
> zf~4$pcEA_!NS-F|hB-6V(jzulK^Z-QowK<BI9>X350A}S2qBL7{a{nqwT9;_gHH4M
> z$-0#>rER@XxV|R8zC=t83WEb7xpm2CcJo;-Kt~jZAfvp+vpEyt4}(SLB7Ey1UR6>0
> zO6{5xPf(IKIiB_t%;Ydu_%HdY!PhaNbu&i+M-ofy!d|e-#eIaq-p<?^!XA<<zmSxT
> zb&RaA!d=Dk$XN<xpj{UG9ljS?bIVO&KA74F=(FEu;h90dt$%%>Sh{iaH|#VQP{{I
> H
> zyI^PB4{=H1>_#>=mI6;%Sn1;9S<L=9oQ;2(o(}Ct$diw-t4VE2+hJ)U;pqnc%)y#~
> z+|#3Cy}g$>Ezy8#{XP)F57^)I?iGO<Z@E;CygDYFGyMty5{`$|L-{uLZHLJuWqxF+
> z1~qS-$U@%|m|pH_G$rQY>#Z;MHE1^69Qlh=r-5%+=cqgOgRz+ClI8NkCEF!u-D
> >vW
> z2BEi-GdQVRCDCveyGT2;3_Ekxb?PwrP^T_$+T2fiaQ&Rprtl?GPN$<Q>ayH&!vb
> O7
> z6G0O|ix}VsL!%;x@n!9svX{@EEUE7V#EYN-PcVRVcqc4TyyPP{$%X8cUA3u^DR
> Y8&
> zuL!QNxZM)j^bOCdLGkQ9ZmgN_D~5RS@u<VFFI^|sr}(ZdHJ{;-E!QQ&v#VA6w3
> e;{
> z#l_!;z+Pg81vA=Zir`f4!n8eG*KORag#NG|9$%vU6>%8F=$0OTyhlo9-!xo_!RytU
> zt48@OuzfDf-v!i;<;nGvdrwK6QebkkTps+2xOe1KRww|^06e(Abcej<58n1-wMh
> Bz
> zKLFhmBVJw70D^<?wv79@^KplM0J;bNe;4?Sdr1JHUP^*Gy;@Cg1#lGMv`untc
> {EP3
> z6fSAZNi?TKSPrwpKGg<Jf2zfr>#%#TxA_jZWkATA#y#A+LP;l7t7rJiTdK<Em6!iC
> z)y|I;1E#kvgY}6M4`F<(US+sPp2K~o6ANq7IRqfUCc_T##27;LM632wGAavonVeT
> S
> zjxAf7rBW(B{I^s?08*kV;9B0{x$YRd!vg<j6NX)6Mx8lsT(?4xz>hZoZ80i758!cf
> zT2CnsnN9+q+>@2c>(f}PlUYa>t3mEEw|6*YDNqj^@dE98ujMyxfw_fa3x`3$&F~
> xy
> z#Zv2$H}Icqu*e9M@PMM^w&ZShcyi0}zGK#gLB+`v>viS>8!yL$r9qeP>NtPT(Nr
> bQ
> zLf%TT=ak+_J+J8y;fCst*QczX@#EuPw)O-Q+mMuDOGFruAk%9NXrM?YyX!Gjgs
> qa3
> zkLMPx8OP;Q)X#~eR}V2`U(a`p74Xmj*S7iA5L@vWRbK+p*XqekpKfUzx2e`e^t4Y
> 8
> zN%ma}lIs;=le*w_+I}q9(o$<vG`LGF5YXd#?kJ&gzpj|q*AivaTXztO8Z41v1HM*N
> zL?(Brb>_8;qjFVIgK}fao{~M~<N{^3GAT#I=%QP-PYMmk!7ZQDsiX#lQ1Z0HOoHy
> ^
> z@a%ZjPR!h~Yo{7CO!X9N#J5ISSEC80ZOYB+)V9Io;n#Oc;=Ovi7F<;xJegb$etJ2N
> zYk%io;|VCsixZk68#fpgj#;@er7uPARSzpLgN87u?AmfKzv+N1%@BBfEPfaz{OWj
> x
> z9$+k?zTA11Eq^bqZQfh8<giN{?yr(tCE4TS!R24k48$c($Lo%?=r25N-<0;(wja4)
> z(*?X!ml+`yHRgnbS`Z_pL@ZjI+uJ&s{*YAp{0We%+YZeN55~E)Lcdd~J*mT9lq}WY
> zFujFSIwF*{u!pzm7ON|ttkCj^91N{Cx);^Vxiti8^A2-<bL7_UsC$+YL!<U45no3Z
> zBy&p9#z}@YI64NO=Q{4+BPXjDAMBk6#H1J@5Pk`+$;UO8=DQtPQDsN&r03(0
> KmA&?
> zUYtGd!!^Sobn}dIiz-|H6*6>X%5kyOf3a=~qkf_!gi+;*pP3VlBhU+zg0fP2_il<T
> ztL%JUTbSxv=62T}dx)_zjpN9%lP7H^q{eNS9hTuLMapH4ICc?o+Uu1~LOQ*W#m
> TjV
> zC;w4X84TTxvn)zUCYQ5VV%D~E?xo!Fm%+QB9?sUBI%z`uO7VGWBzoy7Dpfr&
> k1m3%
> zw~fesl9^CsTVT^*Qj<_NFv1jh<M9IL-xH}k&?<@y=nk$xdMda3uZ4K}i3toHEl5aV
> zSW5OzFl<EeO}IVy7BY~LDR5ZvyvW@DV~lhI*w^r~+vl5~54Bzi1}v~i!5+`k=2<o^
> zT?^4)t-{@`-ER3j8QLK{@A0gpulmDLIa98UTf8SD54~qCHi19lIK^#@#j`e~nCID`
> zf<E2S+U($oqYfQS$M0n!-dQyKkSl(CQ`Jbe0Y8TbkqxsU$?YrpXQ9({S=|@Ot~Ey
> U
> z3}oo|c58g~3-uYGe*w{^%OYxQe;V}7{g429MoL8<<j*?TC3LdJxjHMQD-+%6K`T
> Qc
> zNYQug&2fV2HBg!R`9Lq(F24LAQaF6R{cPurThIX8WeU`o%TL<i*u|e97TSXWQO
> JdM
> zkXkPEyA@D(mpAi=#D1aD9p^!5jZNX*zwu+~yuWE=K|mw_*FQHK{$CX2Ph4jp
> !>98o
> z?r`pB3Vij%hHDT|Fl9H4H_QRbnAa!`duw@l)@qOSX={^uNEjE={LsGy)h$ANuy=;
> &
> z#yjs4bPWj9`rYC(s}`#yK-+>pHJb1?)`vGDf<SJJL){e&*M6ggLC1byr?9jadT?;=
> znqmoL0BnlWA~q(UwgJul3j}ke%Y@5H76c2WPxA|#6fE#ArhCMKwz7d4vaYzXp
> opj%
> z3Z`*J19??EByNM{KIed8elTi{6NSMXoOpORXCLlC<eX%QZz#>4N?1&>_q~c4mq
> UKm
> zw*i$du+L2o7v`k<)_P(rIGHPh^_;fhI%}u5FxZ!l`fo@HJKo!sHD=Hroq&mU&I|%{
> zShjA07lYlJfSB5tTgh}Ju8kYD#T|jc$}MK=xsqsB#o-92W-!ZpUjfr>IGg&bPP7Do
> zDi7%qT{^LF@B!+5)onr~5zY%r67@Re)L}4gF2a8;k@whKw<e|%bXQk85X@bn+
> A@+{
> zaB%`K*o{s+dKU1~;XvZmwA=ucpceym+jYVdg>xKZHPZMFn6vg$i3SVJt-_;DV^!i
> n
> z!}6dKR>cY$oYHYd&Mz=n-Qrov?~Ot6n5Awbbqhf-e8O`LmDn8h$=WYdSq0XDA
> j_=(
> zxgutQ1k`BZdC-`Yg_u)d)04hTH5{|uEFjKXx!T+h{Ixzg3ZTf6At=%bHZGvZUsjHP
> zz#ti2c1kP(qP}Gq(09zQ_hkM$7(4dJo9mn;xT6zP2!03!og`rG`9$u%q8cS?7Yw$%
> z6SAQSE^Y42ZbKGD@A1tI-dpI50>(iQ<#7Jl8Cnd30GA0c(9KC&yX7R_v`iegkWPq
> 0
> z&sNxEPB+#2FQ;y9(_sHIx94n|pUGN_@g7?M>0Np4s5$ODA9aDTBHMXz^^xRv
> t?_zJ
> z?r=4h$b(o+`jZ{B9+64C?6G`KpcpQ&pgV0hG%63~um7f>tFS&A*WfTApkB``p)P!I
> zwh2qwmA~%#w%2@H!c$0_U-lYf-~EI?yIV2IEC{k3{=?jQ_5W3d+*Iz=_(6`dB?Uv
> z
> zyHf}kKEv+$4J89+Qnqm#XWLP$Ii(nD{Ff$#U%V|VK=;_OrN{6ko?p1zWYq@qLE
> D<z
> zjZr*l>03qmE}x{;UbQ^pq$A7W6%I|w<=ptoWDsA`(3KFsY(u2z%DXrosM1UiPe>
> hL
> zpo^-!_ZU!|KvIz--q_~r@>HRT*osC<*s#g4#zbuqPZ9vJPOP)#kyWaGRPAML+(c??
> zTL57ujjZ%RBME3VSl5We`Al{?%wK^PyyP?NoiD=k48J07+O5jIJHj8vCtP&ncFv<
> %
> zZrgWFa}~eC-OjOBNDr}x9I{_mg>fz?OUYUdtICv!iUV)9ksa5zTIaI~qRGE=sp67b
> z#7FCZmn@B43D>+6n|v|_a%zRA(&H#ftg~z{gz7%kjmoF+faWpit7!wWkYU0KN
> f%YZ
> zI64z?Ehk0X)2X&}h1Uc>;ZvfjC9}4MMSQ99(L$~kPwRDM-<-OUGvr$`TYH18^Nd
> !{
> z@U<$&1ZB9SKSA*32|`}8LlU{c<P+Ov5kWP?WT~XFG+N%m7$e9gT35?aeXl<
> @A*OXD
> zbRsN`eANe`nRD*CEOJ%VOjbRSuuz9D;SKen-+UR%uPIgIUMa%(n<*YnNci`7xNZ
> 99
> zAJdO&<|jRFk1l&KTvJ@a!VKW){SHPdXoCC&Ls^lR=r=DgLxz-VXNBiUt%5Ub<JU
> dx
> zi26bc`wX@FH`Gj3y1lSwf@+D%QS|&s95F{Z{XjAft=InbkL6vI>G{}2XOFtk4rA?>
> z0V7)U@+Gp$vRvYu$?QaXn-uOdKZ&!(R+)^nDie&iR()cNA!L3*Z|!JlZJUt6*p0`+
> z3ALv6@HmgO96}!ur6UjoltbterWm)f3>iCobd&P^Dx=4Ry4Eu0w1v>G+VVM{<
> NL1^
> zWyzyJtQK=C!p#9i|1}x6FlKt5uj<5byiLCff*ET_j3WPQg1t6C*;Ci<F=ao-s1for
> zwhMJwQ1W{kLLCl5mAk5_K_?Xyfvte>Q>ml}Kr1&?SjH#LaCnEgC;bS2QhQ$2zG
> kkG
> zUZ!cTX{x8NYP$RwlOh`_sTYr%KFh{Tn1@+M!&#r`)mXkNcaPyfZCr3gd|(q<x-V}*
> z8y_t7@sWa_V(`<rFo&h9v@tPt-JQMpvX~E{+R#S>%&|u?hMx`PM0sliu?S_{Va$
> jR
> z%~|gdzr7lQP$HdgV7Kb>Sx?vt9o5!J8_FcTeq!YIeNlm#z^xJ*kjPcW1vhKU7u7~r
> zzKNO<#N^XgE2Wa+biM012dgFnvn%<-WXpchmH~^x<>My2_e}`PQR7<<#}j2_
> Nv{ud
> zm<&>Bm5tG-`jR`6`Ex!8=jo>CJQ<8uaEPSq_Ug+ewSIG#yqD&_C%TdxPqf5$qKbR
> #
> zW))CbG!maz9mDf!cy`Sc{JqU{y_V~99;cyQf%R@HxUzqRjtKrnyeYcs+_Tx%rI_rN
> z?*6ih<)CK|ePW>ZNES`QBsfLQbkIUn*}i^jD7w<k2(y{z;hjkej|UCJCUsE_9sIxZ
> zRbV_KbksOk^AvX{-1{+*jaqhG)bEiN_{x0r-RQn%Qq`pB&rfrzi%P=-&r0Q{A!w)w
> z*&ZRk3;L(^;A8wz^CaUWTC)1GQ3KA5LWmoQzkW97bvcn?t`EI6Z*&DiZt*$!RIZ
> x$
> zu+#*Fd0+tFdQ3$kk}2EA3igZ;YcR>!Nnn*C-2$14y}IJB`tjNy9b4!@<I2j$J?oT&
> zoPkp1==PXwa`<i!vA^3d!S~42T<txjwX{-GN2I*fq3R)*{WSdPCDGRGCf=AAMv_)q
> znFyTm{5VcAFSn+TWxS&Fo?H1ae}LQk>L)P!HHls!DznOe%?lM3hl=5pM?0?4hk
> 1W~
> zu&s@3!QlMol$T2{jlD!GIkK18Hnb-xro@4g*BFzf9v@pBCDy@i4aoKepNmL+R}
> bwY
> z%Br9m>J}v~hKolMvg3R9qp~{8Pve!ZTwqr2#+4D2!?5Y)3bnRn_SV;}fTPbqckf}0
> zT=%1X3Gp5W+n47i^6`$!-Hv$19?iWr4R&L*%YnjN1v@gC*`BZ4333dShArnqf6
> VA(
> z2ODZ%D?e5?G!yh!rRbw<pGpf$3BLiDZTRS~?ykV)@0GsC9c}p{+!}e~altZT$V+b!
> z*-#hnm8HAajR8Ii3DXM94rcdcXnf@<DzlZ0aGMcgvxm4w_XZJI?+k-fz_lL|>Fje*
> z$uzIgkwtpy%E%(z#=}!%$wFH*`GJ$`AUfV&ChWtKu+MF!Z<-IcmTE<zl6+48ZsFx
> M
> zh2Y5N_x+^8&ZdQH>_+c}3=*&!@saTt_MFG-eUoiUheG9F7We<SAss$<D7>?VB
> MQ<}
> z-hKEIaNx)PRh9Es_ZdYwedv}`FYc<^yeQz{){jt>`hSf90vyTEF&^yZ3H-+Qp7CbL
> zvf8#~`uy-Qllsvh)o1gUe><7oKW2Q^cLaSil>j`j$m*NdFpaAqLC!Ml{LlaRjW=xt
> z;B%GabxS%TZ$?f&%2H7L9ZD}UcU+wUdI4dR&(jNl@wtm=0gA~XN{baWR@U
> FGq;A=k
> zSWKt*j1c$RyM2ED7>`I0&Sr0)qz!Y_Z8v6;*7d?Hz$2O6g8LyCFe#A$V)koEroEeH
> zZvilB1HrZeuXt`BZxF}@nLs|<tW|Y5DY0bo<8zw6J=BN}k&a$CZ>eX7Z!;T*5w)>!
> zIm{p2b_5N!2DVTVe+g;^yWY7GCg20D&g$MOR*DcEftg>u!&;@y;GJ;_-~_R*B&
> WJn
> z?&NS?Vqy3}Xx9<sX>GN!(58s@tr<^Y8%)h%zY*(^`(S0;l$T<&WAh++21Z`0(}4a}
> zXkzvi<U4U%5v14aJ5SXL@rGDBZ8+eiwtoS6S+Nte1`K=lO{Z^;99U6%`=;`p!slux
> zOH2B}fg5ZblDoR`z$OBAdvEzUJUo_H+M%^%mrd>h*04y?J+KMJkA4@A$mZK
> RYXmx;
> zV`d}qieoPfmePb=r{}TZDW6ulxZZM6^z6xGQBWV+lyg2k9B35UvLc4LrH;ig*b76
> X
> z{fDYSFsBC$cB->eoIZqs{Q(uX0`LrZMGtUh$^~c#ws9&2dm1F2dq_S{+jRDUz7RLu
> s@&`)&Hy$?zZu>uSyNlixoQ<DYCk$5m-3A|n!{1;R4b1dQFF4=*KeaXjr~m)}
> 
> literal 0
> HcmV?d00001
> 
> diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
> index 61a0423..53747d4 100644
> --- a/lib/librte_gro/gro_tcp4.c
> +++ b/lib/librte_gro/gro_tcp4.c
> @@ -34,8 +34,6 @@
>  #include <rte_mbuf.h>
>  #include <rte_cycles.h>
>  #include <rte_ethdev.h>
> -#include <rte_ip.h>
> -#include <rte_tcp.h>
> 
>  #include "gro_tcp4.h"
> 
> @@ -72,20 +70,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
>  	}
>  	tbl->max_item_num = entries_num;
> 
> -	size = sizeof(struct gro_tcp4_key) * entries_num;
> -	tbl->keys = rte_zmalloc_socket(__func__,
> +	size = sizeof(struct gro_tcp4_flow) * entries_num;
> +	tbl->flows = rte_zmalloc_socket(__func__,
>  			size,
>  			RTE_CACHE_LINE_SIZE,
>  			socket_id);
> -	if (tbl->keys == NULL) {
> +	if (tbl->flows == NULL) {
>  		rte_free(tbl->items);
>  		rte_free(tbl);
>  		return NULL;
>  	}
> -	/* INVALID_ARRAY_INDEX indicates empty key */
> +	/* INVALID_ARRAY_INDEX indicates an empty flow */
>  	for (i = 0; i < entries_num; i++)
> -		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
> -	tbl->max_key_num = entries_num;
> +		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
> +	tbl->max_flow_num = entries_num;
> 
>  	return tbl;
>  }
> @@ -97,116 +95,15 @@ gro_tcp4_tbl_destroy(void *tbl)
> 
>  	if (tcp_tbl) {
>  		rte_free(tcp_tbl->items);
> -		rte_free(tcp_tbl->keys);
> +		rte_free(tcp_tbl->flows);
>  	}
>  	rte_free(tcp_tbl);
>  }
> 
> -/*
> - * merge two TCP/IPv4 packets without updating checksums.
> - * If cmp is larger than 0, append the new packet to the
> - * original packet. Otherwise, pre-pend the new packet to
> - * the original packet.
> - */
> -static inline int
> -merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
> -		struct rte_mbuf *pkt,
> -		uint16_t ip_id,
> -		uint32_t sent_seq,
> -		int cmp)
> -{
> -	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> -	uint16_t tcp_datalen;
> -
> -	if (cmp > 0) {
> -		pkt_head = item_src->firstseg;
> -		pkt_tail = pkt;
> -	} else {
> -		pkt_head = pkt;
> -		pkt_tail = item_src->firstseg;
> -	}
> -
> -	/* check if the packet length will be beyond the max value */
> -	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
> -		pkt_tail->l3_len - pkt_tail->l4_len;
> -	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
> -			TCP4_MAX_L3_LENGTH)
> -		return 0;
> -
> -	/* remove packet header for the tail packet */
> -	rte_pktmbuf_adj(pkt_tail,
> -			pkt_tail->l2_len +
> -			pkt_tail->l3_len +
> -			pkt_tail->l4_len);
> -
> -	/* chain two packets together */
> -	if (cmp > 0) {
> -		item_src->lastseg->next = pkt;
> -		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
> -		/* update IP ID to the larger value */
> -		item_src->ip_id = ip_id;
> -	} else {
> -		lastseg = rte_pktmbuf_lastseg(pkt);
> -		lastseg->next = item_src->firstseg;
> -		item_src->firstseg = pkt;
> -		/* update sent_seq to the smaller value */
> -		item_src->sent_seq = sent_seq;
> -	}
> -	item_src->nb_merged++;
> -
> -	/* update mbuf metadata for the merged packet */
> -	pkt_head->nb_segs += pkt_tail->nb_segs;
> -	pkt_head->pkt_len += pkt_tail->pkt_len;
> -
> -	return 1;
> -}
> -
> -static inline int
> -check_seq_option(struct gro_tcp4_item *item,
> -		struct tcp_hdr *tcp_hdr,
> -		uint16_t tcp_hl,
> -		uint16_t tcp_dl,
> -		uint16_t ip_id,
> -		uint32_t sent_seq)
> -{
> -	struct rte_mbuf *pkt0 = item->firstseg;
> -	struct ipv4_hdr *ipv4_hdr0;
> -	struct tcp_hdr *tcp_hdr0;
> -	uint16_t tcp_hl0, tcp_dl0;
> -	uint16_t len;
> -
> -	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
> -			pkt0->l2_len);
> -	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
> -	tcp_hl0 = pkt0->l4_len;
> -
> -	/* check if TCP option fields equal. If not, return 0. */
> -	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
> -	if ((tcp_hl != tcp_hl0) ||
> -			((len > 0) && (memcmp(tcp_hdr + 1,
> -					tcp_hdr0 + 1,
> -					len) != 0)))
> -		return 0;
> -
> -	/* check if the two packets are neighbors */
> -	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
> -	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
> -			(ip_id == (item->ip_id + 1)))
> -		/* append the new packet */
> -		return 1;
> -	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
> -			((ip_id + item->nb_merged) == item->ip_id))
> -		/* pre-pend the new packet */
> -		return -1;
> -	else
> -		return 0;
> -}
> -
>  static inline uint32_t
>  find_an_empty_item(struct gro_tcp4_tbl *tbl)
>  {
> -	uint32_t i;
> -	uint32_t max_item_num = tbl->max_item_num;
> +	uint32_t max_item_num = tbl->max_item_num, i;
> 
>  	for (i = 0; i < max_item_num; i++)
>  		if (tbl->items[i].firstseg == NULL)
> @@ -215,13 +112,12 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
>  }
> 
>  static inline uint32_t
> -find_an_empty_key(struct gro_tcp4_tbl *tbl)
> +find_an_empty_flow(struct gro_tcp4_tbl *tbl)
>  {
> -	uint32_t i;
> -	uint32_t max_key_num = tbl->max_key_num;
> +	uint32_t max_flow_num = tbl->max_flow_num, i;
> 
> -	for (i = 0; i < max_key_num; i++)
> -		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
> +	for (i = 0; i < max_flow_num; i++)
> +		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
>  			return i;
>  	return INVALID_ARRAY_INDEX;
>  }
> @@ -229,10 +125,11 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
>  static inline uint32_t
>  insert_new_item(struct gro_tcp4_tbl *tbl,
>  		struct rte_mbuf *pkt,
> -		uint16_t ip_id,
> -		uint32_t sent_seq,
> +		uint64_t start_time,
>  		uint32_t prev_idx,
> -		uint64_t start_time)
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint8_t is_atomic)
>  {
>  	uint32_t item_idx;
> 
> @@ -247,9 +144,10 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
>  	tbl->items[item_idx].sent_seq = sent_seq;
>  	tbl->items[item_idx].ip_id = ip_id;
>  	tbl->items[item_idx].nb_merged = 1;
> +	tbl->items[item_idx].is_atomic = is_atomic;
>  	tbl->item_num++;
> 
> -	/* if the previous packet exists, chain the new one with it */
> +	/* If the previous packet exists, chain them together. */
>  	if (prev_idx != INVALID_ARRAY_INDEX) {
>  		tbl->items[item_idx].next_pkt_idx =
>  			tbl->items[prev_idx].next_pkt_idx;
> @@ -260,12 +158,13 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
>  }
> 
>  static inline uint32_t
> -delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
> +delete_item(struct gro_tcp4_tbl *tbl,
> +		uint32_t item_idx,
>  		uint32_t prev_item_idx)
>  {
>  	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
> 
> -	/* set NULL to firstseg to indicate it's an empty item */
> +	/* NULL indicates an empty item. */
>  	tbl->items[item_idx].firstseg = NULL;
>  	tbl->item_num--;
>  	if (prev_item_idx != INVALID_ARRAY_INDEX)
> @@ -275,53 +174,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t
> item_idx,
>  }
> 
>  static inline uint32_t
> -insert_new_key(struct gro_tcp4_tbl *tbl,
> -		struct tcp4_key *key_src,
> +insert_new_flow(struct gro_tcp4_tbl *tbl,
> +		struct tcp4_flow_key *src,
>  		uint32_t item_idx)
>  {
> -	struct tcp4_key *key_dst;
> -	uint32_t key_idx;
> +	struct tcp4_flow_key *dst;
> +	uint32_t flow_idx;
> 
> -	key_idx = find_an_empty_key(tbl);
> -	if (key_idx == INVALID_ARRAY_INDEX)
> +	flow_idx = find_an_empty_flow(tbl);
> +	if (flow_idx == INVALID_ARRAY_INDEX)
>  		return INVALID_ARRAY_INDEX;
> 
> -	key_dst = &(tbl->keys[key_idx].key);
> +	dst = &(tbl->flows[flow_idx].key);
> 
> -	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
> -	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
> -	key_dst->ip_src_addr = key_src->ip_src_addr;
> -	key_dst->ip_dst_addr = key_src->ip_dst_addr;
> -	key_dst->recv_ack = key_src->recv_ack;
> -	key_dst->src_port = key_src->src_port;
> -	key_dst->dst_port = key_src->dst_port;
> +	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
> +	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
> +	dst->ip_src_addr = src->ip_src_addr;
> +	dst->ip_dst_addr = src->ip_dst_addr;
> +	dst->recv_ack = src->recv_ack;
> +	dst->src_port = src->src_port;
> +	dst->dst_port = src->dst_port;
> 
> -	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
> -	tbl->keys[key_idx].start_index = item_idx;
> -	tbl->key_num++;
> +	tbl->flows[flow_idx].start_index = item_idx;
> +	tbl->flow_num++;
> 
> -	return key_idx;
> +	return flow_idx;
>  }
> 
> -static inline int
> -is_same_key(struct tcp4_key k1, struct tcp4_key k2)
> -{
> -	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
> -		return 0;
> -
> -	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
> -		return 0;
> -
> -	return ((k1.ip_src_addr == k2.ip_src_addr) &&
> -			(k1.ip_dst_addr == k2.ip_dst_addr) &&
> -			(k1.recv_ack == k2.recv_ack) &&
> -			(k1.src_port == k2.src_port) &&
> -			(k1.dst_port == k2.dst_port));
> -}
> -
> -/*
> - * update packet length for the flushed packet.
> - */
>  static inline void
>  update_header(struct gro_tcp4_item *item)
>  {
> @@ -343,84 +222,99 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  	struct ipv4_hdr *ipv4_hdr;
>  	struct tcp_hdr *tcp_hdr;
>  	uint32_t sent_seq;
> -	uint16_t tcp_dl, ip_id;
> +	uint16_t tcp_dl, ip_id, frag_off, hdr_len;
> +	uint8_t is_atomic;
> 
> -	struct tcp4_key key;
> +	struct tcp4_flow_key key;
>  	uint32_t cur_idx, prev_idx, item_idx;
> -	uint32_t i, max_key_num;
> +	uint32_t i, max_flow_num;
>  	int cmp;
> 
>  	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
>  	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
>  	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> 
>  	/*
> -	 * if FIN, SYN, RST, PSH, URG, ECE or
> -	 * CWR is set, return immediately.
> +	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> +	 * or CWR set.
>  	 */
>  	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
>  		return -1;
> -	/* if payload length is 0, return immediately */
> -	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
> -		pkt->l4_len;
> -	if (tcp_dl == 0)
> +	/*
> +	 * Don't process the packet whose payload length is less than or
> +	 * equal to 0.
> +	 */
> +	tcp_dl = pkt->pkt_len - hdr_len;
> +	if (tcp_dl <= 0)
>  		return -1;
> 
> -	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
> +	/*
> +	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
> +	 * whose DF bit is 1, IPv4 ID is ignored.
> +	 */
> +	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
> +	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
> +	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
>  	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> 
>  	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
>  	ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr));
>  	key.ip_src_addr = ipv4_hdr->src_addr;
>  	key.ip_dst_addr = ipv4_hdr->dst_addr;
> +	key.recv_ack = tcp_hdr->recv_ack;
>  	key.src_port = tcp_hdr->src_port;
>  	key.dst_port = tcp_hdr->dst_port;
> -	key.recv_ack = tcp_hdr->recv_ack;
> 
> -	/* search for a key */
> -	max_key_num = tbl->max_key_num;
> -	for (i = 0; i < max_key_num; i++) {
> -		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
> -				is_same_key(tbl->keys[i].key, key))
> +	/* Search for a matched flow. */
> +	max_flow_num = tbl->max_flow_num;
> +	for (i = 0; i < max_flow_num; i++) {
> +		if ((tbl->flows[i].start_index != INVALID_ARRAY_INDEX) &&
> +				is_same_tcp4_flow(tbl->flows[i].key, key))
>  			break;
>  	}
> 
> -	/* can't find a key, so insert a new key and a new item. */
> -	if (i == tbl->max_key_num) {
> -		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
> -				INVALID_ARRAY_INDEX, start_time);
> +	/*
> +	 * Fail to find a matched flow. Insert a new flow and store the
> +	 * packet into the flow.
> +	 */
> +	if (i == tbl->max_flow_num) {
> +		item_idx = insert_new_item(tbl, pkt, start_time,
> +				INVALID_ARRAY_INDEX, sent_seq, ip_id,
> +				is_atomic);
>  		if (item_idx == INVALID_ARRAY_INDEX)
>  			return -1;
> -		if (insert_new_key(tbl, &key, item_idx) ==
> +		if (insert_new_flow(tbl, &key, item_idx) ==
>  				INVALID_ARRAY_INDEX) {
> -			/*
> -			 * fail to insert a new key, so
> -			 * delete the inserted item
> -			 */
> +			/* Fail to insert a new flow. */
>  			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
>  			return -1;
>  		}
>  		return 0;
>  	}
> 
> -	/* traverse all packets in the item group to find one to merge */
> -	cur_idx = tbl->keys[i].start_index;
> +	/*
> +	 * Check all packets in the flow and try to find a neighbor for
> +	 * the input packet.
> +	 */
> +	cur_idx = tbl->flows[i].start_index;
>  	prev_idx = cur_idx;
>  	do {
>  		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> -				pkt->l4_len, tcp_dl, ip_id, sent_seq);
> +				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> +				is_atomic);
>  		if (cmp) {
>  			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
> -						pkt, ip_id,
> -						sent_seq, cmp))
> +						pkt, cmp, sent_seq, ip_id, 0))
>  				return 1;
>  			/*
> -			 * fail to merge two packets since the packet
> -			 * length will be greater than the max value.
> -			 * So insert the packet into the item group.
> +			 * Fail to merge the two packets, as the packet
> +			 * length is greater than the max value. Store
> +			 * the packet into the flow.
>  			 */
> -			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
> -						prev_idx, start_time) ==
> +			if (insert_new_item(tbl, pkt, start_time, prev_idx,
> +						sent_seq, ip_id,
> +						is_atomic) ==
>  					INVALID_ARRAY_INDEX)
>  				return -1;
>  			return 0;
> @@ -429,12 +323,9 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  		cur_idx = tbl->items[cur_idx].next_pkt_idx;
>  	} while (cur_idx != INVALID_ARRAY_INDEX);
> 
> -	/*
> -	 * can't find a packet in the item group to merge,
> -	 * so insert the packet into the item group.
> -	 */
> -	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
> -				start_time) == INVALID_ARRAY_INDEX)
> +	/* Fail to find a neighbor, so store the packet into the flow. */
> +	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> +				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
>  		return -1;
> 
>  	return 0;
> @@ -446,46 +337,35 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl
> *tbl,
>  		struct rte_mbuf **out,
>  		uint16_t nb_out)
>  {
> -	uint16_t k = 0;
> +	uint32_t max_flow_num = tbl->max_flow_num;
>  	uint32_t i, j;
> -	uint32_t max_key_num = tbl->max_key_num;
> +	uint16_t k = 0;
> 
> -	for (i = 0; i < max_key_num; i++) {
> -		/* all keys have been checked, return immediately */
> -		if (tbl->key_num == 0)
> +	for (i = 0; i < max_flow_num; i++) {
> +		if (unlikely(tbl->flow_num == 0))
>  			return k;
> 
> -		j = tbl->keys[i].start_index;
> +		j = tbl->flows[i].start_index;
>  		while (j != INVALID_ARRAY_INDEX) {
>  			if (tbl->items[j].start_time <= flush_timestamp) {
>  				out[k++] = tbl->items[j].firstseg;
>  				if (tbl->items[j].nb_merged > 1)
>  					update_header(&(tbl->items[j]));
>  				/*
> -				 * delete the item and get
> -				 * the next packet index
> +				 * Delete the packet and get the next
> +				 * packet in the flow.
>  				 */
> -				j = delete_item(tbl, j,
> -						INVALID_ARRAY_INDEX);
> +				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
> +				tbl->flows[i].start_index = j;
> +				if (j == INVALID_ARRAY_INDEX)
> +					tbl->flow_num--;
> 
> -				/*
> -				 * delete the key as all of
> -				 * packets are flushed
> -				 */
> -				if (j == INVALID_ARRAY_INDEX) {
> -					tbl->keys[i].start_index =
> -						INVALID_ARRAY_INDEX;
> -					tbl->key_num--;
> -				} else
> -					/* update start_index of the key */
> -					tbl->keys[i].start_index = j;
> -
> -				if (k == nb_out)
> +				if (unlikely(k == nb_out))
>  					return k;
>  			} else
>  				/*
> -				 * left packets of this key won't be
> -				 * timeout, so go to check other keys.
> +				 * The left packets in this flow won't be
> +				 * timeout. Go to check other flows.
>  				 */
>  				break;
>  		}
> diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
> index 0a81716..66d6ce9 100644
> --- a/lib/librte_gro/gro_tcp4.h
> +++ b/lib/librte_gro/gro_tcp4.h
> @@ -33,17 +33,20 @@
>  #ifndef _GRO_TCP4_H_
>  #define _GRO_TCP4_H_
> 
> +#include <rte_ip.h>
> +#include <rte_tcp.h>
> +
>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>  #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> 
>  /*
> - * the max L3 length of a TCP/IPv4 packet. The L3 length
> - * is the sum of ipv4 header, tcp header and L4 payload.
> + * The max length of a IPv4 packet, which includes the length of the L3
> + * header, the L4 header and the data payload.
>   */
> -#define TCP4_MAX_L3_LENGTH UINT16_MAX
> +#define MAX_IPV4_PKT_LENGTH UINT16_MAX
> 
> -/* criteria of mergeing packets */
> -struct tcp4_key {
> +/* Header fields representing a TCP/IPv4 flow */
> +struct tcp4_flow_key {
>  	struct ether_addr eth_saddr;
>  	struct ether_addr eth_daddr;
>  	uint32_t ip_src_addr;
> @@ -54,77 +57,76 @@ struct tcp4_key {
>  	uint16_t dst_port;
>  };
> 
> -struct gro_tcp4_key {
> -	struct tcp4_key key;
> +struct gro_tcp4_flow {
> +	struct tcp4_flow_key key;
>  	/*
> -	 * the index of the first packet in the item group.
> -	 * If the value is INVALID_ARRAY_INDEX, it means
> -	 * the key is empty.
> +	 * The index of the first packet in the flow.
> +	 * INVALID_ARRAY_INDEX indicates an empty flow.
>  	 */
>  	uint32_t start_index;
>  };
> 
>  struct gro_tcp4_item {
>  	/*
> -	 * first segment of the packet. If the value
> +	 * The first MBUF segment of the packet. If the value
>  	 * is NULL, it means the item is empty.
>  	 */
>  	struct rte_mbuf *firstseg;
> -	/* last segment of the packet */
> +	/* The last MBUF segment of the packet */
>  	struct rte_mbuf *lastseg;
>  	/*
> -	 * the time when the first packet is inserted
> -	 * into the table. If a packet in the table is
> -	 * merged with an incoming packet, this value
> -	 * won't be updated. We set this value only
> -	 * when the first packet is inserted into the
> -	 * table.
> +	 * The time when the first packet is inserted into the table.
> +	 * This value won't be updated, even if the packet is merged
> +	 * with other packets.
>  	 */
>  	uint64_t start_time;
>  	/*
> -	 * we use next_pkt_idx to chain the packets that
> -	 * have same key value but can't be merged together.
> +	 * next_pkt_idx is used to chain the packets that
> +	 * are in the same flow but can't be merged together
> +	 * (e.g. caused by packet reordering).
>  	 */
>  	uint32_t next_pkt_idx;
> -	/* the sequence number of the packet */
> +	/* TCP sequence number of the packet */
>  	uint32_t sent_seq;
> -	/* the IP ID of the packet */
> +	/* IPv4 ID of the packet */
>  	uint16_t ip_id;
> -	/* the number of merged packets */
> +	/* The number of merged packets */
>  	uint16_t nb_merged;
> +	/* Indicate if IPv4 ID can be ignored */
> +	uint8_t is_atomic;
>  };
> 
>  /*
> - * TCP/IPv4 reassembly table structure.
> + * TCP/IPv4 reassembly table structure
>   */
>  struct gro_tcp4_tbl {
>  	/* item array */
>  	struct gro_tcp4_item *items;
> -	/* key array */
> -	struct gro_tcp4_key *keys;
> +	/* flow array */
> +	struct gro_tcp4_flow *flows;
>  	/* current item number */
>  	uint32_t item_num;
> -	/* current key num */
> -	uint32_t key_num;
> +	/* current flow num */
> +	uint32_t flow_num;
>  	/* item array size */
>  	uint32_t max_item_num;
> -	/* key array size */
> -	uint32_t max_key_num;
> +	/* flow array size */
> +	uint32_t max_flow_num;
>  };
> 
>  /**
>   * This function creates a TCP/IPv4 reassembly table.
>   *
>   * @param socket_id
> - *  socket index for allocating TCP/IPv4 reassemble table
> + *  Socket index for allocating the TCP/IPv4 reassemble table
>   * @param max_flow_num
> - *  the maximum number of flows in the TCP/IPv4 GRO table
> + *  The maximum number of flows in the TCP/IPv4 GRO table
>   * @param max_item_per_flow
> - *  the maximum packet number per flow.
> + *  The maximum number of packets per flow
>   *
>   * @return
> - *  if create successfully, return a pointer which points to the
> - *  created TCP/IPv4 GRO table. Otherwise, return NULL.
> + *  - Return the table pointer on success.
> + *  - Return NULL on failure.
>   */
>  void *gro_tcp4_tbl_create(uint16_t socket_id,
>  		uint16_t max_flow_num,
> @@ -134,62 +136,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
>   * This function destroys a TCP/IPv4 reassembly table.
>   *
>   * @param tbl
> - *  a pointer points to the TCP/IPv4 reassembly table.
> + *  Pointer pointing to the TCP/IPv4 reassembly table.
>   */
>  void gro_tcp4_tbl_destroy(void *tbl);
> 
>  /**
> - * This function searches for a packet in the TCP/IPv4 reassembly table
> - * to merge with the inputted one. To merge two packets is to chain them
> - * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
> - * CWR, ECE or URG bit is set, are returned immediately. Packets which
> - * only have packet headers (i.e. without data) are also returned
> - * immediately. Otherwise, the packet is either merged, or inserted into
> - * the table. Besides, if there is no available space to insert the
> - * packet, this function returns immediately too.
> + * This function merges a TCP/IPv4 packet. It doesn't process the packet,
> + * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
> + * payload.
>   *
> - * This function assumes the inputted packet is with correct IPv4 and
> - * TCP checksums. And if two packets are merged, it won't re-calculate
> - * IPv4 and TCP checksums. Besides, if the inputted packet is IP
> - * fragmented, it assumes the packet is complete (with TCP header).
> + * This function doesn't check if the packet has correct checksums and
> + * doesn't re-calculate checksums for the merged packet. Additionally,
> + * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
> + * when IP fragmentation is possible (i.e., DF==0). It returns the
> + * packet, if the packet has invalid parameters (e.g. SYN bit is set)
> + * or there is no available space in the table.
>   *
>   * @param pkt
> - *  packet to reassemble.
> + *  Packet to reassemble
>   * @param tbl
> - *  a pointer that points to a TCP/IPv4 reassembly table.
> + *  Pointer pointing to the TCP/IPv4 reassembly table
>   * @start_time
> - *  the start time that the packet is inserted into the table
> + *  The time when the packet is inserted into the table
>   *
>   * @return
> - *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
> - *  or URG bit is set, or there is no available space in the table to
> - *  insert a new item or a new key, return a negative value. If the
> - *  packet is merged successfully, return an positive value. If the
> - *  packet is inserted into the table, return 0.
> + *  - Return a positive value if the packet is merged.
> + *  - Return zero if the packet isn't merged but stored in the table.
> + *  - Return a negative value for invalid parameters or no available
> + *    space in the table.
>   */
>  int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  		struct gro_tcp4_tbl *tbl,
>  		uint64_t start_time);
> 
>  /**
> - * This function flushes timeout packets in a TCP/IPv4 reassembly table
> - * to applications, and without updating checksums for merged packets.
> - * The max number of flushed timeout packets is the element number of
> - * the array which is used to keep flushed packets.
> + * This function flushes timeout packets in a TCP/IPv4 reassembly table,
> + * and without updating checksums.
>   *
>   * @param tbl
> - *  a pointer that points to a TCP GRO table.
> + *  TCP/IPv4 reassembly table pointer
>   * @param flush_timestamp
> - *  this function flushes packets which are inserted into the table
> - *  before or at the flush_timestamp.
> + *  Flush packets which are inserted into the table before or at the
> + *  flush_timestamp.
>   * @param out
> - *  pointer array which is used to keep flushed packets.
> + *  Pointer array used to keep flushed packets
>   * @param nb_out
> - *  the element number of out. It's also the max number of timeout
> + *  The element number in 'out'. It also determines the maximum number of
>   *  packets that can be flushed finally.
>   *
>   * @return
> - *  the number of packets that are returned.
> + *  The number of flushed packets
>   */
>  uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
>  		uint64_t flush_timestamp,
> @@ -201,10 +197,131 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct
> gro_tcp4_tbl *tbl,
>   * reassembly table.
>   *
>   * @param tbl
> - *  pointer points to a TCP/IPv4 reassembly table.
> + *  TCP/IPv4 reassembly table pointer
>   *
>   * @return
> - *  the number of packets in the table
> + *  The number of packets in the table
>   */
>  uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
> +
> +/*
> + * Check if two TCP/IPv4 packets belong to the same flow.
> + */
> +static inline int
> +is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
> +{
> +	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
> +			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
> +			(k1.ip_src_addr == k2.ip_src_addr) &&
> +			(k1.ip_dst_addr == k2.ip_dst_addr) &&
> +			(k1.recv_ack == k2.recv_ack) &&
> +			(k1.src_port == k2.src_port) &&
> +			(k1.dst_port == k2.dst_port));
> +}
> +
> +/*
> + * Check if two TCP/IPv4 packets are neighbors.
> + */
> +static inline int
> +check_seq_option(struct gro_tcp4_item *item,
> +		struct tcp_hdr *tcph,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint16_t tcp_hl,
> +		uint16_t tcp_dl,
> +		uint16_t l2_offset,
> +		uint8_t is_atomic)
> +{
> +	struct rte_mbuf *pkt_orig = item->firstseg;
> +	struct ipv4_hdr *iph_orig;
> +	struct tcp_hdr *tcph_orig;
> +	uint16_t len, l4_len_orig;
> +
> +	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
> +			l2_offset + pkt_orig->l2_len);
> +	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
> +	l4_len_orig = pkt_orig->l4_len;
> +
> +	/* Check if TCP option fields equal */
> +	len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
> +	if ((tcp_hl != l4_len_orig) || ((len > 0) &&
> +				(memcmp(tcph + 1, tcph_orig + 1,
> +					len) != 0)))
> +		return 0;
> +
> +	/* Don't merge packets whose DF bits are different */
> +	if (unlikely(item->is_atomic ^ is_atomic))
> +		return 0;
> +
> +	/* Check if the two packets are neighbors */
> +	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
> +		pkt_orig->l3_len - l4_len_orig;
> +	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
> +				(ip_id == item->ip_id + item->nb_merged)))
> +		/* Append the new packet */
> +		return 1;
> +	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
> +				(ip_id + 1 == item->ip_id)))
> +		/* Pre-pend the new packet */
> +		return -1;
> +
> +	return 0;
> +}
> +
> +/*
> + * Merge two TCP/IPv4 packets without updating checksums.
> + * If cmp is larger than 0, append the new packet to the
> + * original packet. Otherwise, pre-pend the new packet to
> + * the original packet.
> + */
> +static inline int
> +merge_two_tcp4_packets(struct gro_tcp4_item *item,
> +		struct rte_mbuf *pkt,
> +		int cmp,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint16_t l2_offset)
> +{
> +	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> +	uint16_t hdr_len, l2_len;
> +
> +	if (cmp > 0) {
> +		pkt_head = item->firstseg;
> +		pkt_tail = pkt;
> +	} else {
> +		pkt_head = pkt;
> +		pkt_tail = item->firstseg;
> +	}
> +
> +	/* Check if the IPv4 packet length is greater than the max value */
> +	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> +		pkt_head->l4_len;
> +	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> +	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - hdr_len >
> +			MAX_IPV4_PKT_LENGTH))
> +		return 0;
> +
> +	/* Remove the packet header */
> +	rte_pktmbuf_adj(pkt_tail, hdr_len);
> +
> +	/* Chain two packets together */
> +	if (cmp > 0) {
> +		item->lastseg->next = pkt;
> +		item->lastseg = rte_pktmbuf_lastseg(pkt);
> +	} else {
> +		lastseg = rte_pktmbuf_lastseg(pkt);
> +		lastseg->next = item->firstseg;
> +		item->firstseg = pkt;
> +		/* Update sent_seq and ip_id */
> +		item->sent_seq = sent_seq;
> +		item->ip_id = ip_id;
> +	}
> +	item->nb_merged++;
> +
> +	/* Update MBUF metadata for the merged packet */
> +	pkt_head->nb_segs += pkt_tail->nb_segs;
> +	pkt_head->pkt_len += pkt_tail->pkt_len;
> +
> +	return 1;
> +}
>  #endif
> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
> index 7853246..b3931a8 100644
> --- a/lib/librte_gro/rte_gro.c
> +++ b/lib/librte_gro/rte_gro.c
> @@ -51,11 +51,14 @@ static gro_tbl_destroy_fn
> tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
>  static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] =
> {
>  			gro_tcp4_tbl_pkt_count, NULL};
> 
> +#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
> +		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
> +
>  /*
> - * GRO context structure, which is used to merge packets. It keeps
> - * many reassembly tables of desired GRO types. Applications need to
> - * create GRO context objects before using rte_gro_reassemble to
> - * perform GRO.
> + * GRO context structure. It keeps the table structures, which are
> + * used to merge packets, for different GRO types. Before using
> + * rte_gro_reassemble(), applications need to create the GRO context
> + * first.
>   */
>  struct gro_ctx {
>  	/* GRO types to perform */
> @@ -93,7 +96,7 @@ rte_gro_ctx_create(const struct rte_gro_param *param)
>  				param->max_flow_num,
>  				param->max_item_per_flow);
>  		if (gro_ctx->tbls[i] == NULL) {
> -			/* destroy all created tables */
> +			/* Destroy all created tables */
>  			gro_ctx->gro_types = gro_types;
>  			rte_gro_ctx_destroy(gro_ctx);
>  			return NULL;
> @@ -113,8 +116,6 @@ rte_gro_ctx_destroy(void *ctx)
>  	uint64_t gro_type_flag;
>  	uint8_t i;
> 
> -	if (gro_ctx == NULL)
> -		return;
>  	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
>  		gro_type_flag = 1ULL << i;
>  		if ((gro_ctx->gro_types & gro_type_flag) == 0)
> @@ -131,62 +132,54 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
>  		const struct rte_gro_param *param)
>  {
> -	uint16_t i;
> -	uint16_t nb_after_gro = nb_pkts;
> -	uint32_t item_num;
> -
> -	/* allocate a reassembly table for TCP/IPv4 GRO */
> +	/* Allocate a reassembly table for TCP/IPv4 GRO */
>  	struct gro_tcp4_tbl tcp_tbl;
> -	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
> +	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>  	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] =
> {{0} };
> 
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
> -	uint16_t unprocess_num = 0;
> +	uint32_t item_num;
>  	int32_t ret;
> -	uint64_t current_time;
> +	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> 
> -	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
> +	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
>  		return nb_pkts;
> 
> -	/* get the actual number of packets */
> +	/* Get the maximum number of packets */
>  	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
> -			param->max_item_per_flow));
> +				param->max_item_per_flow));
>  	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
> 
>  	for (i = 0; i < item_num; i++)
> -		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
> +		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> 
> -	tcp_tbl.keys = tcp_keys;
> +	tcp_tbl.flows = tcp_flows;
>  	tcp_tbl.items = tcp_items;
> -	tcp_tbl.key_num = 0;
> +	tcp_tbl.flow_num = 0;
>  	tcp_tbl.item_num = 0;
> -	tcp_tbl.max_key_num = item_num;
> +	tcp_tbl.max_flow_num = item_num;
>  	tcp_tbl.max_item_num = item_num;
> 
> -	current_time = rte_rdtsc();
> -
>  	for (i = 0; i < nb_pkts; i++) {
> -		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
> -					RTE_PTYPE_L4_TCP)) ==
> -				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
> -			ret = gro_tcp4_reassemble(pkts[i],
> -					&tcp_tbl,
> -					current_time);
> +		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> +			/*
> +			 * The timestamp is ignored, since all packets
> +			 * will be flushed from the tables.
> +			 */
> +			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
>  			if (ret > 0)
> -				/* merge successfully */
> +				/* Merge successfully */
>  				nb_after_gro--;
> -			else if (ret < 0) {
> -				unprocess_pkts[unprocess_num++] =
> -					pkts[i];
> -			}
> +			else if (ret < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
>  		} else
>  			unprocess_pkts[unprocess_num++] = pkts[i];
>  	}
> 
> -	/* re-arrange GROed packets */
>  	if (nb_after_gro < nb_pkts) {
> -		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
> -				pkts, nb_pkts);
> +		/* Flush all packets from the tables */
> +		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
> +		/* Copy unprocessed packets */
>  		if (unprocess_num > 0) {
>  			memcpy(&pkts[i], unprocess_pkts,
>  					sizeof(struct rte_mbuf *) *
> @@ -202,31 +195,28 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
>  		void *ctx)
>  {
> -	uint16_t i, unprocess_num = 0;
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	struct gro_ctx *gro_ctx = ctx;
> +	void *tcp_tbl;
>  	uint64_t current_time;
> +	uint16_t i, unprocess_num = 0;
> 
> -	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
> +	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
>  		return nb_pkts;
> 
> +	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>  	current_time = rte_rdtsc();
> 
>  	for (i = 0; i < nb_pkts; i++) {
> -		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
> -					RTE_PTYPE_L4_TCP)) ==
> -				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
> -			if (gro_tcp4_reassemble(pkts[i],
> -						gro_ctx->tbls
> -						[RTE_GRO_TCP_IPV4_INDEX],
> +		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> +			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
>  						current_time) < 0)
>  				unprocess_pkts[unprocess_num++] = pkts[i];
>  		} else
>  			unprocess_pkts[unprocess_num++] = pkts[i];
>  	}
>  	if (unprocess_num > 0) {
> -		memcpy(pkts, unprocess_pkts,
> -				sizeof(struct rte_mbuf *) *
> +		memcpy(pkts, unprocess_pkts, sizeof(struct rte_mbuf *) *
>  				unprocess_num);
>  	}
> 
> @@ -252,6 +242,7 @@ rte_gro_timeout_flush(void *ctx,
>  				flush_timestamp,
>  				out, max_nb_out);
>  	}
> +
>  	return 0;
>  }
> 
> @@ -262,7 +253,7 @@ rte_gro_get_pkt_count(void *ctx)
>  	gro_tbl_pkt_count_fn pkt_count_fn;
>  	uint64_t item_num = 0;
>  	uint64_t gro_type_flag;
> -	uint8_t i;
> +	uint8_t gro_type_num = RTE_GRO_TYPE_SUPPORT_NUM, i;
> 
>  	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
>  		gro_type_flag = 1ULL << i;
> @@ -270,9 +261,12 @@ rte_gro_get_pkt_count(void *ctx)
>  			continue;
> 
>  		pkt_count_fn = tbl_pkt_count_fn[i];
> -		if (pkt_count_fn == NULL)
> -			continue;
> -		item_num += pkt_count_fn(gro_ctx->tbls[i]);
> +		if (pkt_count_fn) {
> +			item_num += pkt_count_fn(gro_ctx->tbls[i]);
> +			if (--gro_type_num == 0)
> +				break;
> +		}
>  	}
> +
>  	return item_num;
>  }
> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
> index d57e0c5..36a1e60 100644
> --- a/lib/librte_gro/rte_gro.h
> +++ b/lib/librte_gro/rte_gro.h
> @@ -59,8 +59,8 @@ extern "C" {
>  /**< TCP/IPv4 GRO flag */
> 
>  /**
> - * A structure which is used to create GRO context objects or tell
> - * rte_gro_reassemble_burst() what reassembly rules are demanded.
> + * Structure used to create GRO context objects or used to pass
> + * application-determined parameters to rte_gro_reassemble_burst().
>   */
>  struct rte_gro_param {
>  	uint64_t gro_types;
> @@ -106,26 +106,23 @@ void rte_gro_ctx_destroy(void *ctx);
> 
>  /**
>   * This is one of the main reassembly APIs, which merges numbers of
> - * packets at a time. It assumes that all inputted packets are with
> - * correct checksums. That is, applications should guarantee all
> - * inputted packets are correct. Besides, it doesn't re-calculate
> - * checksums for merged packets. If inputted packets are IP fragmented,
> - * this function assumes them are complete (i.e. with L4 header). After
> - * finishing processing, it returns all GROed packets to applications
> - * immediately.
> + * packets at a time. It doesn't check if input packets have correct
> + * checksums and doesn't re-calculate checksums for merged packets.
> + * It assumes the packets are complete (i.e., MF==0 && frag_off==0),
> + * when IP fragmentation is possible (i.e., DF==1). The GROed packets
> + * are returned as soon as the function finishes.
>   *
>   * @param pkts
> - *  a pointer array which points to the packets to reassemble. Besides,
> - *  it keeps mbuf addresses for the GROed packets.
> + *  Pointer array pointing to the packets to reassemble. Besides, it
> + *  keeps MBUF addresses for the GROed packets.
>   * @param nb_pkts
> - *  the number of packets to reassemble.
> + *  The number of packets to reassemble
>   * @param param
> - *  applications use it to tell rte_gro_reassemble_burst() what rules
> - *  are demanded.
> + *  Application-determined parameters for reassembling packets.
>   *
>   * @return
> - *  the number of packets after been GROed. If no packets are merged,
> - *  the returned value is nb_pkts.
> + *  The number of packets after been GROed. If no packets are merged,
> + *  the return value is equals to nb_pkts.
>   */
>  uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
> @@ -135,32 +132,28 @@ uint16_t rte_gro_reassemble_burst(struct rte_mbuf
> **pkts,
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
>   *
> - * Reassembly function, which tries to merge inputted packets with
> - * the packets in the reassembly tables of a given GRO context. This
> - * function assumes all inputted packets are with correct checksums.
> - * And it won't update checksums if two packets are merged. Besides,
> - * if inputted packets are IP fragmented, this function assumes they
> - * are complete packets (i.e. with L4 header).
> + * Reassembly function, which tries to merge input packets with the
> + * existed packets in the reassembly tables of a given GRO context.
> + * It doesn't check if input packets have correct checksums and doesn't
> + * re-calculate checksums for merged packets. Additionally, it assumes
> + * the packets are complete (i.e., MF==0 && frag_off==0), when IP
> + * fragmentation is possible (i.e., DF==1).
>   *
> - * If the inputted packets don't have data or are with unsupported GRO
> - * types etc., they won't be processed and are returned to applications.
> - * Otherwise, the inputted packets are either merged or inserted into
> - * the table. If applications want get packets in the table, they need
> - * to call flush API.
> + * If the input packets have invalid parameters (e.g. no data payload,
> + * unsupported GRO types), they are returned to applications. Otherwise,
> + * they are either merged or inserted into the table. Applications need
> + * to flush packets from the tables by flush API, if they want to get the
> + * GROed packets.
>   *
>   * @param pkts
> - *  packet to reassemble. Besides, after this function finishes, it
> - *  keeps the unprocessed packets (e.g. without data or unsupported
> - *  GRO types).
> + *  Packets to reassemble. It's also used to store the unprocessed packets.
>   * @param nb_pkts
> - *  the number of packets to reassemble.
> + *  The number of packets to reassemble
>   * @param ctx
> - *  a pointer points to a GRO context object.
> + *  GRO context object pointer
>   *
>   * @return
> - *  return the number of unprocessed packets (e.g. without data or
> - *  unsupported GRO types). If all packets are processed (merged or
> - *  inserted into the table), return 0.
> + *  The number of unprocessed packets.
>   */
>  uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
> @@ -170,29 +163,28 @@ uint16_t rte_gro_reassemble(struct rte_mbuf
> **pkts,
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
>   *
> - * This function flushes the timeout packets from reassembly tables of
> - * desired GRO types. The max number of flushed timeout packets is the
> - * element number of the array which is used to keep the flushed packets.
> + * This function flushes the timeout packets from the reassembly tables
> + * of desired GRO types. The max number of flushed packets is the
> + * element number of 'out'.
>   *
> - * Besides, this function won't re-calculate checksums for merged
> - * packets in the tables. That is, the returned packets may be with
> - * wrong checksums.
> + * Additionally, the flushed packets may have incorrect checksums, since
> + * this function doesn't re-calculate checksums for merged packets.
>   *
>   * @param ctx
> - *  a pointer points to a GRO context object.
> + *  GRO context object pointer.
>   * @param timeout_cycles
> - *  max TTL for packets in reassembly tables, measured in nanosecond.
> + *  The max TTL for packets in reassembly tables, measured in nanosecond.
>   * @param gro_types
> - *  this function only flushes packets which belong to the GRO types
> - *  specified by gro_types.
> + *  This function flushes packets whose GRO types are specified by
> + *  gro_types.
>   * @param out
> - *  a pointer array that is used to keep flushed timeout packets.
> + *  Pointer array used to keep flushed packets.
>   * @param max_nb_out
> - *  the element number of out. It's also the max number of timeout
> + *  The element number of 'out'. It's also the max number of timeout
>   *  packets that can be flushed finally.
>   *
>   * @return
> - *  the number of flushed packets. If no packets are flushed, return 0.
> + *  The number of flushed packets.
>   */
>  uint16_t rte_gro_timeout_flush(void *ctx,
>  		uint64_t timeout_cycles,
> @@ -208,10 +200,10 @@ uint16_t rte_gro_timeout_flush(void *ctx,
>   * of a given GRO context.
>   *
>   * @param ctx
> - *  pointer points to a GRO context object.
> + *  GRO context object pointer.
>   *
>   * @return
> - *  the number of packets in all reassembly tables.
> + *  The number of packets in the tables.
>   */
>  uint64_t rte_gro_get_pkt_count(void *ctx);
> 
> --
> 2.7.4

Reviewed-by: Junjie Chen<junjie.j.chen@intel.com>

Thanks

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 2/2] gro: support VxLAN GRO
  2017-12-22  7:25     ` [PATCH v3 2/2] gro: support VxLAN GRO Jiayu Hu
  2017-12-22  8:17       ` Chen, Junjie J
@ 2017-12-29  3:53       ` Chen, Junjie J
  1 sibling, 0 replies; 31+ messages in thread
From: Chen, Junjie J @ 2017-12-29  3:53 UTC (permalink / raw)
  To: Hu, Jiayu, dev
  Cc: Tan, Jianfeng, Ananyev, Konstantin, stephen, Yigit, Ferruh, Yao, Lei A


> -----Original Message-----
> From: Hu, Jiayu
> Sent: Friday, December 22, 2017 3:26 PM
> To: dev@dpdk.org
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Chen, Junjie J
> <junjie.j.chen@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org; Yigit,
> Ferruh <ferruh.yigit@intel.com>; Yao, Lei A <lei.a.yao@intel.com>; Hu, Jiayu
> <jiayu.hu@intel.com>
> Subject: [PATCH v3 2/2] gro: support VxLAN GRO
> 
> This patch adds a framework that allows GRO on tunneled packets.
> Furthermore, it leverages that framework to provide GRO support for
> VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
> IPv4 header, and contain an inner TCP/IPv4 packet.
> 
> VxLAN GRO doesn't check if input packets have correct checksums and doesn't
> update checksums for output packets. Additionally, it assumes the packets are
> complete (i.e., MF==0 && frag_off==0), when IP fragmentation is possible (i.e.,
> DF==0).
> 
> Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> ---
>  .../prog_guide/generic_receive_offload_lib.rst     |  31 +-
>  lib/librte_gro/Makefile                            |   1 +
>  lib/librte_gro/gro_vxlan_tcp4.c                    | 515
> +++++++++++++++++++++
>  lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
>  lib/librte_gro/rte_gro.c                           | 129 +++++-
>  lib/librte_gro/rte_gro.h                           |   5 +-
>  6 files changed, 837 insertions(+), 28 deletions(-)  create mode 100644
> lib/librte_gro/gro_vxlan_tcp4.c  create mode 100644
> lib/librte_gro/gro_vxlan_tcp4.h
> 
> diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> index c2d7a41..078bec0 100644
> --- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> +++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> @@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 &&
> frag_off==0), when IP  fragmentation is possible (i.e., DF==0). Additionally, it
> complies RFC
>  6864 to process the IPv4 ID field.
> 
> -Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
> +Currently, the GRO library provides GRO supports for TCP/IPv4 packets
> +and VxLAN packets which contain an outer IPv4 header and an inner
> +TCP/IPv4 packet.
> 
>  Two Sets of API
>  ---------------
> @@ -108,7 +110,8 @@ Reassembly Algorithm
> 
>  The reassembly algorithm is used for reassembling packets. In the GRO
> library, different GRO types can use different algorithms. In this -section, we
> will introduce an algorithm, which is used by TCP/IPv4 GRO.
> +section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
> +and VxLAN GRO.
> 
>  Challenges
>  ~~~~~~~~~~
> @@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors
> include:
>  - IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
>    be increased by 1.
> 
> +VxLAN GRO
> +---------
> +
> +The table structure used by VxLAN GRO, which is in charge of processing
> +VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
> +similar with that of TCP/IPv4 GRO. Differently, the header fields used
> +to define a VxLAN flow include:
> +
> +- outer source and destination: Ethernet and IP address, UDP port
> +
> +- VxLAN header (VNI and flag)
> +
> +- inner source and destination: Ethernet and IP address, TCP port
> +
> +Header fields deciding if packets are neighbors include:
> +
> +- outer IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
> +  outer IPv4 header is 0, should be increased by 1.
> +
> +- inner TCP sequence number
> +
> +- inner IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
> +  inner IPv4 header is 0, should be increased by 1.
> +
>  .. note::
>          We comply RFC 6864 to process the IPv4 ID field. Specifically,
>          we check IPv4 ID fields for the packets whose DF bit is 0 and diff
> --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile index
> eb423cc..0110455 100644
> --- a/lib/librte_gro/Makefile
> +++ b/lib/librte_gro/Makefile
> @@ -45,6 +45,7 @@ LIBABIVER := 1
>  # source files
>  SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
>  SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
> +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
> 
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h diff --git
> a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c new file
> mode 100644 index 0000000..6567779
> --- /dev/null
> +++ b/lib/librte_gro/gro_vxlan_tcp4.c
> @@ -0,0 +1,515 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <rte_malloc.h>
> +#include <rte_mbuf.h>
> +#include <rte_cycles.h>
> +#include <rte_ethdev.h>
> +#include <rte_udp.h>
> +
> +#include "gro_vxlan_tcp4.h"
> +
> +void *
> +gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
> +		uint16_t max_flow_num,
> +		uint16_t max_item_per_flow)
> +{
> +	struct gro_vxlan_tcp4_tbl *tbl;
> +	size_t size;
> +	uint32_t entries_num, i;
> +
> +	entries_num = max_flow_num * max_item_per_flow;
> +	entries_num = RTE_MIN(entries_num,
> GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
> +
> +	if (entries_num == 0)
> +		return NULL;
> +
> +	tbl = rte_zmalloc_socket(__func__,
> +			sizeof(struct gro_vxlan_tcp4_tbl),
> +			RTE_CACHE_LINE_SIZE,
> +			socket_id);
> +	if (tbl == NULL)
> +		return NULL;
> +
> +	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
> +	tbl->items = rte_zmalloc_socket(__func__,
> +			size,
> +			RTE_CACHE_LINE_SIZE,
> +			socket_id);
> +	if (tbl->items == NULL) {
> +		rte_free(tbl);
> +		return NULL;
> +	}
> +	tbl->max_item_num = entries_num;
> +
> +	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
> +	tbl->flows = rte_zmalloc_socket(__func__,
> +			size,
> +			RTE_CACHE_LINE_SIZE,
> +			socket_id);
> +	if (tbl->flows == NULL) {
> +		rte_free(tbl->items);
> +		rte_free(tbl);
> +		return NULL;
> +	}
> +
> +	for (i = 0; i < entries_num; i++)
> +		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
> +	tbl->max_flow_num = entries_num;
> +
> +	return tbl;
> +}
> +
> +void
> +gro_vxlan_tcp4_tbl_destroy(void *tbl)
> +{
> +	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
> +
> +	if (vxlan_tbl) {
> +		rte_free(vxlan_tbl->items);
> +		rte_free(vxlan_tbl->flows);
> +	}
> +	rte_free(vxlan_tbl);
> +}
> +
> +static inline uint32_t
> +find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl) {
> +	uint32_t max_item_num = tbl->max_item_num, i;
> +
> +	for (i = 0; i < max_item_num; i++)
> +		if (tbl->items[i].inner_item.firstseg == NULL)
> +			return i;
> +	return INVALID_ARRAY_INDEX;
> +}
> +
> +static inline uint32_t
> +find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl) {
> +	uint32_t max_flow_num = tbl->max_flow_num, i;
> +
> +	for (i = 0; i < max_flow_num; i++)
> +		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
> +			return i;
> +	return INVALID_ARRAY_INDEX;
> +}
> +
> +static inline uint32_t
> +insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
> +		struct rte_mbuf *pkt,
> +		uint64_t start_time,
> +		uint32_t prev_idx,
> +		uint32_t sent_seq,
> +		uint16_t outer_ip_id,
> +		uint16_t ip_id,
> +		uint8_t outer_is_atomic,
> +		uint8_t is_atomic)
> +{
> +	uint32_t item_idx;
> +
> +	item_idx = find_an_empty_item(tbl);
> +	if (item_idx == INVALID_ARRAY_INDEX)
> +		return INVALID_ARRAY_INDEX;
> +
> +	tbl->items[item_idx].inner_item.firstseg = pkt;
> +	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
> +	tbl->items[item_idx].inner_item.start_time = start_time;
> +	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
> +	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
> +	tbl->items[item_idx].inner_item.ip_id = ip_id;
> +	tbl->items[item_idx].inner_item.nb_merged = 1;
> +	tbl->items[item_idx].inner_item.is_atomic = is_atomic;
> +	tbl->items[item_idx].outer_ip_id = outer_ip_id;
> +	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
> +	tbl->item_num++;
> +
> +	/* If the previous packet exists, chain the new one with it. */
> +	if (prev_idx != INVALID_ARRAY_INDEX) {
> +		tbl->items[item_idx].inner_item.next_pkt_idx =
> +			tbl->items[prev_idx].inner_item.next_pkt_idx;
> +		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
> +	}
> +
> +	return item_idx;
> +}
> +
> +static inline uint32_t
> +delete_item(struct gro_vxlan_tcp4_tbl *tbl,
> +		uint32_t item_idx,
> +		uint32_t prev_item_idx)
> +{
> +	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
> +
> +	/* NULL indicates an empty item. */
> +	tbl->items[item_idx].inner_item.firstseg = NULL;
> +	tbl->item_num--;
> +	if (prev_item_idx != INVALID_ARRAY_INDEX)
> +		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
> +
> +	return next_idx;
> +}
> +
> +static inline uint32_t
> +insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
> +		struct vxlan_tcp4_flow_key *src,
> +		uint32_t item_idx)
> +{
> +	struct vxlan_tcp4_flow_key *dst;
> +	uint32_t flow_idx;
> +
> +	flow_idx = find_an_empty_flow(tbl);
> +	if (flow_idx == INVALID_ARRAY_INDEX)
> +		return INVALID_ARRAY_INDEX;
> +
> +	dst = &(tbl->flows[flow_idx].key);
> +
> +	ether_addr_copy(&(src->inner_key.eth_saddr),
> +			&(dst->inner_key.eth_saddr));
> +	ether_addr_copy(&(src->inner_key.eth_daddr),
> +			&(dst->inner_key.eth_daddr));
> +	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
> +	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
> +	dst->inner_key.recv_ack = src->inner_key.recv_ack;
> +	dst->inner_key.src_port = src->inner_key.src_port;
> +	dst->inner_key.dst_port = src->inner_key.dst_port;
> +
> +	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
> +	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
> +	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
> +	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
> +	dst->outer_ip_src_addr = src->outer_ip_src_addr;
> +	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
> +	dst->outer_src_port = src->outer_src_port;
> +	dst->outer_dst_port = src->outer_dst_port;
> +
> +	tbl->flows[flow_idx].start_index = item_idx;
> +	tbl->flow_num++;
> +
> +	return flow_idx;
> +}
> +
> +static inline int
> +is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
> +		struct vxlan_tcp4_flow_key k2)
> +{
> +	return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr)
> &&
> +			is_same_ether_addr(&k1.outer_eth_daddr,
> +				&k2.outer_eth_daddr) &&
> +			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
> +			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
> +			(k1.outer_src_port == k2.outer_src_port) &&
> +			(k1.outer_dst_port == k2.outer_dst_port) &&
> +			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
> +			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
> +			is_same_tcp4_flow(k1.inner_key, k2.inner_key)); }
> +
> +static inline int
> +check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
> +		struct tcp_hdr *tcp_hdr,
> +		uint32_t sent_seq,
> +		uint16_t outer_ip_id,
> +		uint16_t ip_id,
> +		uint16_t tcp_hl,
> +		uint16_t tcp_dl,
> +		uint8_t outer_is_atomic,
> +		uint8_t is_atomic)
> +{
> +	struct rte_mbuf *pkt = item->inner_item.firstseg;
> +	int cmp;
> +	uint16_t l2_offset;
> +
> +	/* Don't merge packets whose outer DF bits are different. */
> +	if (unlikely(item->outer_is_atomic ^ outer_is_atomic))
> +		return 0;
> +
> +	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> +	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
> +			tcp_hl, tcp_dl, l2_offset, is_atomic);
> +	if ((cmp == 1) && (outer_is_atomic ||
> +				(outer_ip_id == item->outer_ip_id +
> +				 item->inner_item.nb_merged)))
> +		/* Append the packet. */
> +		return 1;
> +	else if ((cmp == -1) && (outer_is_atomic ||
> +				(outer_ip_id + 1 == item->outer_ip_id)))
> +		/* Prepend the packet. */
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static inline int
> +merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
> +		struct rte_mbuf *pkt,
> +		int cmp,
> +		uint32_t sent_seq,
> +		uint16_t outer_ip_id,
> +		uint16_t ip_id)
> +{
> +	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
> +				ip_id, pkt->outer_l2_len +
> +				pkt->outer_l3_len)) {
> +		item->outer_ip_id = cmp < 0 ? outer_ip_id : item->outer_ip_id;
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline void
> +update_vxlan_header(struct gro_vxlan_tcp4_item *item) {
> +	struct ipv4_hdr *ipv4_hdr;
> +	struct udp_hdr *udp_hdr;
> +	struct rte_mbuf *pkt = item->inner_item.firstseg;
> +	uint16_t len;
> +
> +	/* Update the outer IPv4 header. */
> +	len = pkt->pkt_len - pkt->outer_l2_len;
> +	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
> +			pkt->outer_l2_len);
> +	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
> +
> +	/* Update the outer UDP header. */
> +	len -= pkt->outer_l3_len;
> +	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
> +	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
> +
> +	/* Update the inner IPv4 header. */
> +	len -= pkt->l2_len;
> +	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
> +	ipv4_hdr->total_length = rte_cpu_to_be_16(len); }
> +
> +int32_t
> +gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> +		struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t start_time)
> +{
> +	struct ether_hdr *outer_eth_hdr, *eth_hdr;
> +	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
> +	struct tcp_hdr *tcp_hdr;
> +	struct udp_hdr *udp_hdr;
> +	struct vxlan_hdr *vxlan_hdr;
> +	uint32_t sent_seq;
> +	uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
> +	uint8_t outer_is_atomic, is_atomic;
> +
> +	struct vxlan_tcp4_flow_key key;
> +	uint32_t cur_idx, prev_idx, item_idx;
> +	uint32_t i, max_flow_num;
> +	int cmp;
> +	uint16_t hdr_len;
> +
> +	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
> +	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
> +			pkt->outer_l2_len);
> +	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
> +			pkt->outer_l3_len);
> +	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
> +			sizeof(struct udp_hdr));
> +	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
> +			sizeof(struct vxlan_hdr));
> +	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
> +	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +
> +	/*
> +	 * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> +	 * ECE or CWR set.
> +	 */
> +	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
> +		return -1;
> +
> +	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
> +		pkt->l3_len + pkt->l4_len;
> +	/*
> +	 * Don't process the packet whose payload length is less than or
> +	 * equal to 0.
> +	 */
> +	tcp_dl = pkt->pkt_len - hdr_len;
> +	if (tcp_dl <= 0)
> +		return -1;
> +
> +	/*
> +	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
> +	 * whose DF bit is 1, IPv4 ID is ignored.
> +	 */
> +	frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
> +	outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) ==
> IPV4_HDR_DF_FLAG;
> +	outer_ip_id = outer_is_atomic ? 0 :
> +		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
> +	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
> +	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
> +	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
> +
> +	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> +
> +	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
> +	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
> +	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
> +	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
> +	key.inner_key.recv_ack = tcp_hdr->recv_ack;
> +	key.inner_key.src_port = tcp_hdr->src_port;
> +	key.inner_key.dst_port = tcp_hdr->dst_port;
> +
> +	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
> +	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
> +	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
> +	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
> +	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
> +	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
> +	key.outer_src_port = udp_hdr->src_port;
> +	key.outer_dst_port = udp_hdr->dst_port;
> +
> +	/* Search for a matched flow. */
> +	max_flow_num = tbl->max_flow_num;
> +	for (i = 0; i < max_flow_num; i++) {
> +		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX &&
> +				is_same_vxlan_tcp4_flow(tbl->flows[i].key,
> +					key))
> +			break;
> +	}
> +
> +	/*
> +	 * Can't find a matched flow. Insert a new flow and store the
> +	 * packet into the flow.
> +	 */
> +	if (i == tbl->max_flow_num) {
> +		item_idx = insert_new_item(tbl, pkt, start_time,
> +				INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
> +				ip_id, outer_is_atomic, is_atomic);
> +		if (item_idx == INVALID_ARRAY_INDEX)
> +			return -1;
> +		if (insert_new_flow(tbl, &key, item_idx) ==
> +				INVALID_ARRAY_INDEX) {
> +			/*
> +			 * Fail to insert a new flow, so
> +			 * delete the inserted packet.
> +			 */
> +			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
> +			return -1;
> +		}
> +		return 0;
> +	}
> +
> +	/* Check all packets in the flow and try to find a neighbor. */
> +	cur_idx = tbl->flows[i].start_index;
> +	prev_idx = cur_idx;
> +	do {
> +		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> +				sent_seq, outer_ip_id, ip_id, pkt->l4_len,
> +				tcp_dl, outer_is_atomic, is_atomic);
> +		if (cmp) {
> +			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
> +						pkt, cmp, sent_seq,
> +						outer_ip_id, ip_id))
> +				return 1;
> +			/*
> +			 * Can't merge two packets, as the packet
> +			 * length will be greater than the max value.
> +			 * Insert the packet into the flow.
> +			 */
> +			if (insert_new_item(tbl, pkt, start_time, prev_idx,
> +						sent_seq, outer_ip_id,
> +						ip_id, outer_is_atomic,
> +						is_atomic) ==
> +					INVALID_ARRAY_INDEX)
> +				return -1;
> +			return 0;
> +		}
> +		prev_idx = cur_idx;
> +		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
> +	} while (cur_idx != INVALID_ARRAY_INDEX);
> +
> +	/* Can't find neighbor. Insert the packet into the flow. */
> +	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> +				outer_ip_id, ip_id, outer_is_atomic,
> +				is_atomic) == INVALID_ARRAY_INDEX)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +uint16_t
> +gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t flush_timestamp,
> +		struct rte_mbuf **out,
> +		uint16_t nb_out)
> +{
> +	uint16_t k = 0;
> +	uint32_t i, j;
> +	uint32_t max_flow_num = tbl->max_flow_num;
> +
> +	for (i = 0; i < max_flow_num; i++) {
> +		if (unlikely(tbl->flow_num == 0))
> +			return k;
> +
> +		j = tbl->flows[i].start_index;
> +		while (j != INVALID_ARRAY_INDEX) {
> +			if (tbl->items[j].inner_item.start_time <=
> +					flush_timestamp) {
> +				out[k++] = tbl->items[j].inner_item.firstseg;
> +				if (tbl->items[j].inner_item.nb_merged > 1)
> +					update_vxlan_header(&(tbl->items[j]));
> +				/*
> +				 * Delete the item and get the next packet
> +				 * index.
> +				 */
> +				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
> +				tbl->flows[i].start_index = j;
> +				if (j == INVALID_ARRAY_INDEX)
> +					tbl->flow_num--;
> +
> +				if (unlikely(k == nb_out))
> +					return k;
> +			} else
> +				/*
> +				 * The left packets in the flow won't be
> +				 * timeout. Go to check other flows.
> +				 */
> +				break;
> +		}
> +	}
> +	return k;
> +}
> +
> +uint32_t
> +gro_vxlan_tcp4_tbl_pkt_count(void *tbl) {
> +	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
> +
> +	if (gro_tbl)
> +		return gro_tbl->item_num;
> +
> +	return 0;
> +}
> diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
> new file mode 100644 index 0000000..66baf73
> --- /dev/null
> +++ b/lib/librte_gro/gro_vxlan_tcp4.h
> @@ -0,0 +1,184 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#ifndef _GRO_VXLAN_TCP4_H_
> +#define _GRO_VXLAN_TCP4_H_
> +
> +#include "gro_tcp4.h"
> +
> +#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> +
> +/* Header fields representing a VxLAN flow */ struct
> +vxlan_tcp4_flow_key {
> +	struct tcp4_flow_key inner_key;
> +	struct vxlan_hdr vxlan_hdr;
> +
> +	struct ether_addr outer_eth_saddr;
> +	struct ether_addr outer_eth_daddr;
> +
> +	uint32_t outer_ip_src_addr;
> +	uint32_t outer_ip_dst_addr;
> +
> +	/* Outer UDP ports */
> +	uint16_t outer_src_port;
> +	uint16_t outer_dst_port;
> +
> +};
> +
> +struct gro_vxlan_tcp4_flow {
> +	struct vxlan_tcp4_flow_key key;
> +	/*
> +	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
> +	 * indicates an empty flow.
> +	 */
> +	uint32_t start_index;
> +};
> +
> +struct gro_vxlan_tcp4_item {
> +	struct gro_tcp4_item inner_item;
> +	/* IPv4 ID in the outer IPv4 header */
> +	uint16_t outer_ip_id;
> +	/* Indicate if outer IPv4 ID can be ignored */
> +	uint8_t outer_is_atomic;
> +};
> +
> +/*
> + * VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet)
> + * reassembly table structure
> + */
> +struct gro_vxlan_tcp4_tbl {
> +	/* item array */
> +	struct gro_vxlan_tcp4_item *items;
> +	/* flow array */
> +	struct gro_vxlan_tcp4_flow *flows;
> +	/* current item number */
> +	uint32_t item_num;
> +	/* current flow number */
> +	uint32_t flow_num;
> +	/* the maximum item number */
> +	uint32_t max_item_num;
> +	/* the maximum flow number */
> +	uint32_t max_flow_num;
> +};
> +
> +/**
> + * This function creates a VxLAN reassembly table for VxLAN packets
> + * which have an outer IPv4 header and an inner TCP/IPv4 packet.
> + *
> + * @param socket_id
> + *  Socket index for allocating the table
> + * @param max_flow_num
> + *  The maximum number of flows in the table
> + * @param max_item_per_flow
> + *  The maximum number of packets per flow
> + *
> + * @return
> + *  - Return the table pointer on success.
> + *  - Return NULL on failure.
> + */
> +void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
> +		uint16_t max_flow_num,
> +		uint16_t max_item_per_flow);
> +
> +/**
> + * This function destroys a VxLAN reassembly table.
> + *
> + * @param tbl
> + *  Pointer pointing to the VxLAN reassembly table  */ void
> +gro_vxlan_tcp4_tbl_destroy(void *tbl);
> +
> +/**
> + * This function merges a VxLAN packet which has an outer IPv4 header
> +and
> + * an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP
> + * header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which
> + * doesn't have payload.
> + *
> + * This function doesn't check if the packet has correct checksums and
> + * doesn't re-calculate checksums for the merged packet. Additionally,
> + * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
> +when
> + * IP fragmentation is possible (i.e., DF==0). It returns the packet,
> +if
> + * the packet has invalid parameters (e.g. SYN bit is set) or there is
> +no
> + * available space in the table.
> + *
> + * @param pkt
> + *  Packet to reassemble
> + * @param tbl
> + *  Pointer pointing to the VxLAN reassembly table
> + * @start_time
> + *  The time when the packet is inserted into the table
> + *
> + * @return
> + *  - Return a positive value if the packet is merged.
> + *  - Return zero if the packet isn't merged but stored in the table.
> + *  - Return a negative value for invalid parameters or no available
> + *    space in the table.
> + */
> +int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> +		struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t start_time);
> +
> +/**
> + * This function flushes timeout packets in the VxLAN reassembly table,
> + * and without updating checksums.
> + *
> + * @param tbl
> + *  Pointer pointing to a VxLAN GRO table
> + * @param flush_timestamp
> + *  This function flushes packets which are inserted into the table
> + *  before or at the flush_timestamp.
> + * @param out
> + *  Pointer array used to keep flushed packets
> + * @param nb_out
> + *  The element number in 'out'. It also determines the maximum number
> +of
> + *  packets that can be flushed finally.
> + *
> + * @return
> + *  The number of flushed packets
> + */
> +uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
> +		uint64_t flush_timestamp,
> +		struct rte_mbuf **out,
> +		uint16_t nb_out);
> +
> +/**
> + * This function returns the number of the packets in a VxLAN
> + * reassembly table.
> + *
> + * @param tbl
> + *  Pointer pointing to the VxLAN reassembly table
> + *
> + * @return
> + *  The number of packets in the table
> + */
> +uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl); #endif
> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c index
> b3931a8..5a26893 100644
> --- a/lib/librte_gro/rte_gro.c
> +++ b/lib/librte_gro/rte_gro.c
> @@ -37,6 +37,7 @@
> 
>  #include "rte_gro.h"
>  #include "gro_tcp4.h"
> +#include "gro_vxlan_tcp4.h"
> 
>  typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
>  		uint16_t max_flow_num,
> @@ -45,15 +46,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
> typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
> 
>  static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
> -		gro_tcp4_tbl_create, NULL};
> +		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
>  static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
> -			gro_tcp4_tbl_destroy, NULL};
> +			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
> +			NULL};
>  static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] =
> {
> -			gro_tcp4_tbl_pkt_count, NULL};
> +			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
> +			NULL};
> 
>  #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
>  		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
> 
> +#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) &&
> \
> +		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
> +		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
> +		 RTE_PTYPE_TUNNEL_VXLAN) && \
> +		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
> +		  RTE_PTYPE_INNER_L4_TCP) && \
> +		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
> +		    (RTE_PTYPE_INNER_L3_IPV4 | \
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
> +		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
> +
>  /*
>   * GRO context structure. It keeps the table structures, which are
>   * used to merge packets, for different GRO types. Before using @@ -137,12
> +151,20 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>  	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] =
> {{0} };
> 
> +	/* Allocate a reassembly table for VXLAN GRO */
> +	struct gro_vxlan_tcp4_tbl vxlan_tbl;
> +	struct gro_vxlan_tcp4_flow
> vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> +	struct gro_vxlan_tcp4_item
> vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
> +		{{0}, 0, 0} };
> +
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	uint32_t item_num;
>  	int32_t ret;
>  	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> +	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0;
> 
> -	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
> +	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> +					RTE_GRO_TCP_IPV4)) == 0))
>  		return nb_pkts;
> 
>  	/* Get the maximum number of packets */ @@ -150,22 +172,47 @@
> rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  				param->max_item_per_flow));
>  	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
> 
> -	for (i = 0; i < item_num; i++)
> -		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> +	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> +		for (i = 0; i < item_num; i++)
> +			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
> +
> +		vxlan_tbl.flows = vxlan_flows;
> +		vxlan_tbl.items = vxlan_items;
> +		vxlan_tbl.flow_num = 0;
> +		vxlan_tbl.item_num = 0;
> +		vxlan_tbl.max_flow_num = item_num;
> +		vxlan_tbl.max_item_num = item_num;
> +		do_vxlan_gro = 1;
> +	}
> 
> -	tcp_tbl.flows = tcp_flows;
> -	tcp_tbl.items = tcp_items;
> -	tcp_tbl.flow_num = 0;
> -	tcp_tbl.item_num = 0;
> -	tcp_tbl.max_flow_num = item_num;
> -	tcp_tbl.max_item_num = item_num;
> +	if (param->gro_types & RTE_GRO_TCP_IPV4) {
> +		for (i = 0; i < item_num; i++)
> +			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> +
> +		tcp_tbl.flows = tcp_flows;
> +		tcp_tbl.items = tcp_items;
> +		tcp_tbl.flow_num = 0;
> +		tcp_tbl.item_num = 0;
> +		tcp_tbl.max_flow_num = item_num;
> +		tcp_tbl.max_item_num = item_num;
> +		do_tcp4_gro = 1;
> +	}
> 
>  	for (i = 0; i < nb_pkts; i++) {
> -		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> -			/*
> -			 * The timestamp is ignored, since all packets
> -			 * will be flushed from the tables.
> -			 */
> +		/*
> +		 * The timestamp is ignored, since all packets
> +		 * will be flushed from the tables.
> +		 */
> +		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> +				do_vxlan_gro) {
> +			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0);
> +			if (ret > 0)
> +				/* Merge successfully */
> +				nb_after_gro--;
> +			else if (ret < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
> +		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> +				do_tcp4_gro) {
>  			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
>  			if (ret > 0)
>  				/* Merge successfully */
> @@ -177,8 +224,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  	}
> 
>  	if (nb_after_gro < nb_pkts) {
> +		i = 0;
>  		/* Flush all packets from the tables */
> -		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
> +		if (do_vxlan_gro) {
> +			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
> +					0, pkts, nb_pkts);
> +		}
> +		if (do_tcp4_gro) {
> +			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
> +					&pkts[i], nb_pkts - i);
> +		}
>  		/* Copy unprocessed packets */
>  		if (unprocess_num > 0) {
>  			memcpy(&pkts[i], unprocess_pkts,
> @@ -197,18 +252,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,  {
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	struct gro_ctx *gro_ctx = ctx;
> -	void *tcp_tbl;
> +	void *tcp_tbl, *vxlan_tbl;
>  	uint64_t current_time;
>  	uint16_t i, unprocess_num = 0;
> +	uint8_t do_tcp4_gro, do_vxlan_gro;
> 
> -	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
> +	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> +					RTE_GRO_TCP_IPV4)) == 0))
>  		return nb_pkts;
> 
>  	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
> +	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> +
> +	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
> +		RTE_GRO_TCP_IPV4;
> +	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4)
> ==
> +		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
> +
>  	current_time = rte_rdtsc();
> 
>  	for (i = 0; i < nb_pkts; i++) {
> -		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> +		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> +				do_vxlan_gro) {
> +			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
> +						current_time) < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
> +		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> +				do_tcp4_gro) {
>  			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
>  						current_time) < 0)
>  				unprocess_pkts[unprocess_num++] = pkts[i]; @@ -232,18
> +302,27 @@ rte_gro_timeout_flush(void *ctx,  {
>  	struct gro_ctx *gro_ctx = ctx;
>  	uint64_t flush_timestamp;
> +	uint16_t num = 0;
> 
>  	gro_types = gro_types & gro_ctx->gro_types;
>  	flush_timestamp = rte_rdtsc() - timeout_cycles;
> 
> -	if (gro_types & RTE_GRO_TCP_IPV4) {
> -		return gro_tcp4_tbl_timeout_flush(
> +	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> +		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
> +				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
> +				flush_timestamp, out, max_nb_out);
> +		max_nb_out -= num;
> +	}
> +
> +	/* If no available space in 'out', stop flushing. */
> +	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
> +		num += gro_tcp4_tbl_timeout_flush(
>  				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
>  				flush_timestamp,
> -				out, max_nb_out);
> +				&out[num], max_nb_out);
>  	}
> 
> -	return 0;
> +	return num;
>  }
> 
>  uint64_t
> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h index
> 36a1e60..5ed72d7 100644
> --- a/lib/librte_gro/rte_gro.h
> +++ b/lib/librte_gro/rte_gro.h
> @@ -51,12 +51,15 @@ extern "C" {
>   */
>  #define RTE_GRO_TYPE_MAX_NUM 64
>  /**< the max number of supported GRO types */ -#define
> RTE_GRO_TYPE_SUPPORT_NUM 1
> +#define RTE_GRO_TYPE_SUPPORT_NUM 2
>  /**< the number of currently supported GRO types */
> 
>  #define RTE_GRO_TCP_IPV4_INDEX 0
>  #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)  /**<
> TCP/IPv4 GRO flag */
> +#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1 #define
> +RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL <<
> RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
> +/**< VxLAN GRO flag. */
> 
>  /**
>   * Structure used to create GRO context objects or used to pass
> --
> 2.7.4

Reviewed-by: Junjie Chen<junjie.j.chen@intel.com>

Thanks

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/2] gro: code cleanup
  2017-12-22  7:25     ` [PATCH v3 1/2] gro: code cleanup Jiayu Hu
  2017-12-29  3:53       ` Chen, Junjie J
@ 2018-01-02 11:26       ` Bruce Richardson
  2018-01-03  1:07         ` Tan, Jianfeng
  1 sibling, 1 reply; 31+ messages in thread
From: Bruce Richardson @ 2018-01-02 11:26 UTC (permalink / raw)
  To: Jiayu Hu
  Cc: dev, jianfeng.tan, junjie.j.chen, konstantin.ananyev, stephen,
	ferruh.yigit, lei.a.yao

On Fri, Dec 22, 2017 at 03:25:43PM +0800, Jiayu Hu wrote:
> - Remove needless check and variants
> - For better understanding, update the programmer guide and rename
>   internal functions and variants
> - For supporting tunneled gro, move common internal functions from
>   gro_tcp4.c to gro_tcp4.h
> - Comply RFC 6864 to process the IPv4 ID field
> 
> Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> ---
>  .../prog_guide/generic_receive_offload_lib.rst     | 246 ++++++++-------
>  doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231 bytes

Rather than binary PNG images, please use SVG files (note, real SVG, not
an SVG file with a binary blob pasted into it).

Thanks,
/Bruce

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/2] gro: code cleanup
  2018-01-02 11:26       ` Bruce Richardson
@ 2018-01-03  1:07         ` Tan, Jianfeng
  2018-01-03  1:27           ` Stephen Hemminger
  0 siblings, 1 reply; 31+ messages in thread
From: Tan, Jianfeng @ 2018-01-03  1:07 UTC (permalink / raw)
  To: Richardson, Bruce, Hu, Jiayu
  Cc: dev, Chen, Junjie J, Ananyev, Konstantin, stephen, Yigit, Ferruh,
	Yao, Lei A



> -----Original Message-----
> From: Richardson, Bruce
> Sent: Tuesday, January 2, 2018 7:26 PM
> To: Hu, Jiayu
> Cc: dev@dpdk.org; Tan, Jianfeng; Chen, Junjie J; Ananyev, Konstantin;
> stephen@networkplumber.org; Yigit, Ferruh; Yao, Lei A
> Subject: Re: [dpdk-dev] [PATCH v3 1/2] gro: code cleanup
> 
> On Fri, Dec 22, 2017 at 03:25:43PM +0800, Jiayu Hu wrote:
> > - Remove needless check and variants
> > - For better understanding, update the programmer guide and rename
> >   internal functions and variants
> > - For supporting tunneled gro, move common internal functions from
> >   gro_tcp4.c to gro_tcp4.h
> > - Comply RFC 6864 to process the IPv4 ID field
> >
> > Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> > ---
> >  .../prog_guide/generic_receive_offload_lib.rst     | 246 ++++++++-------
> >  doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231
> bytes
> 
> Rather than binary PNG images, please use SVG files (note, real SVG, not
> an SVG file with a binary blob pasted into it).

Based on my limited experience, there is no shortcut for this, but re-draw the picture with tools like visio.

Thanks,
Jianfeng

> 
> Thanks,
> /Bruce

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/2] gro: code cleanup
  2018-01-03  1:07         ` Tan, Jianfeng
@ 2018-01-03  1:27           ` Stephen Hemminger
  0 siblings, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2018-01-03  1:27 UTC (permalink / raw)
  To: Tan, Jianfeng
  Cc: Richardson, Bruce, Hu, Jiayu, dev, Chen, Junjie J, Ananyev,
	Konstantin, Yigit, Ferruh, Yao, Lei A

On Wed, 3 Jan 2018 01:07:37 +0000
"Tan, Jianfeng" <jianfeng.tan@intel.com> wrote:

> > -----Original Message-----
> > From: Richardson, Bruce
> > Sent: Tuesday, January 2, 2018 7:26 PM
> > To: Hu, Jiayu
> > Cc: dev@dpdk.org; Tan, Jianfeng; Chen, Junjie J; Ananyev, Konstantin;
> > stephen@networkplumber.org; Yigit, Ferruh; Yao, Lei A
> > Subject: Re: [dpdk-dev] [PATCH v3 1/2] gro: code cleanup
> > 
> > On Fri, Dec 22, 2017 at 03:25:43PM +0800, Jiayu Hu wrote:  
> > > - Remove needless check and variants
> > > - For better understanding, update the programmer guide and rename
> > >   internal functions and variants
> > > - For supporting tunneled gro, move common internal functions from
> > >   gro_tcp4.c to gro_tcp4.h
> > > - Comply RFC 6864 to process the IPv4 ID field
> > >
> > > Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> > > ---
> > >  .../prog_guide/generic_receive_offload_lib.rst     | 246 ++++++++-------
> > >  doc/guides/prog_guide/img/gro-key-algorithm.png    | Bin 0 -> 28231  
> > bytes
> > 
> > Rather than binary PNG images, please use SVG files (note, real SVG, not
> > an SVG file with a binary blob pasted into it).  
> 
> Based on my limited experience, there is no shortcut for this, but re-draw the picture with tools like visio.

Inkscape is open source and produces svg files.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v4 0/2] Support VxLAN GRO
  2017-12-22  7:25   ` [PATCH v3 0/2] Support " Jiayu Hu
                       ` (2 preceding siblings ...)
  2017-12-29  3:52     ` [PATCH v3 0/2] Support " Chen, Junjie J
@ 2018-01-05  6:12     ` Jiayu Hu
  2018-01-05  6:12       ` [PATCH v4 1/2] gro: code cleanup Jiayu Hu
                         ` (2 more replies)
  3 siblings, 3 replies; 31+ messages in thread
From: Jiayu Hu @ 2018-01-05  6:12 UTC (permalink / raw)
  To: dev
  Cc: bruce.richardson, junjie.j.chen, jianfeng.tan, stephen,
	ferruh.yigit, konstantin.ananyev, lei.a.yao, Jiayu Hu

VxLAN is one of the most widely used tunneled protocols. Providing GRO
support for VxLAN-encapsulated packets can benefit many per-packet based
applications, like Open vSwitch.

This patchset is to support VxLAN GRO. The first patch cleans up current
gro codes for the sake of supporting tunneled GRO. The second patch
supports GRO on the VxLAN packets which have an outer IPv4 header and an
inner TCP/IPv4 packet.

Change log
===========
v4:
- replace gro-key-algorithm.png with gro-key-algorithm.svg
- reduce key comparison times in gro_tcp4_reassemble() and
  gro_vxlan_tcp4_reassemble()
- reduce comparison times in rte_gro_get_pkt_count()
v3:
- remove needless check
- add "likely()" and "unlikely()" to optimize branch prediction
- fix a bug in merge_two_tcp4_packets(): for VxLAN packets, check if
  the outer IPv4 packet length is less than or equal to UINT16_MAX,
  rather than the inner IPv4 packet length.
- fix a bug in rte_gro.h: change RTE_GRO_TYPE_SUPPORT_NUM to 2
- Avoid inserting timestamp in rte_gro_reassemble_burst(), since all
  packets in the tables will be flushed.
- fix typos
v2:
- comply RFC 6848 to process IP ID fields. Specifically, we require the
  IP ID fields of neighbor packets whose DF bit is 0 to be increased by
  1. We don't check IP ID for the packets whose DF bit is 1.
  Additionally, packets whose DF bits are different cannot be merged.
- update the programmer guide and function comments

Jiayu Hu (2):
  gro: code cleanup
  gro: support VxLAN GRO

 .../prog_guide/generic_receive_offload_lib.rst     | 269 +++++++----
 doc/guides/prog_guide/img/gro-key-algorithm.svg    | 223 +++++++++
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_tcp4.c                          | 339 +++++---------
 lib/librte_gro/gro_tcp4.h                          | 253 +++++++---
 lib/librte_gro/gro_vxlan_tcp4.c                    | 521 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
 lib/librte_gro/rte_gro.c                           | 203 +++++---
 lib/librte_gro/rte_gro.h                           |  97 ++--
 9 files changed, 1575 insertions(+), 515 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.svg
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

-- 
2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v4 1/2] gro: code cleanup
  2018-01-05  6:12     ` [PATCH v4 " Jiayu Hu
@ 2018-01-05  6:12       ` Jiayu Hu
  2018-01-08  1:15         ` Yao, Lei A
  2018-01-10  0:09         ` Thomas Monjalon
  2018-01-05  6:12       ` [PATCH v4 2/2] gro: support VxLAN GRO Jiayu Hu
  2018-01-10 14:03       ` [PATCH v5 0/3] Support " Jiayu Hu
  2 siblings, 2 replies; 31+ messages in thread
From: Jiayu Hu @ 2018-01-05  6:12 UTC (permalink / raw)
  To: dev
  Cc: bruce.richardson, junjie.j.chen, jianfeng.tan, stephen,
	ferruh.yigit, konstantin.ananyev, lei.a.yao, Jiayu Hu

- Remove needless check and variants
- For better understanding, update the programmer guide and rename
  internal functions and variants
- For supporting tunneled gro, move common internal functions from
  gro_tcp4.c to gro_tcp4.h
- Comply RFC 6864 to process the IPv4 ID field

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Junjie Chen <junjie.j.chen@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     | 246 ++++++++-------
 doc/guides/prog_guide/img/gro-key-algorithm.svg    | 223 ++++++++++++++
 lib/librte_gro/gro_tcp4.c                          | 339 +++++++--------------
 lib/librte_gro/gro_tcp4.h                          | 253 ++++++++++-----
 lib/librte_gro/rte_gro.c                           | 102 +++----
 lib/librte_gro/rte_gro.h                           |  92 +++---
 6 files changed, 750 insertions(+), 505 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.svg

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index 22e50ec..c2d7a41 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -32,128 +32,162 @@ Generic Receive Offload Library
 ===============================
 
 Generic Receive Offload (GRO) is a widely used SW-based offloading
-technique to reduce per-packet processing overhead. It gains performance
-by reassembling small packets into large ones. To enable more flexibility
-to applications, DPDK implements GRO as a standalone library. Applications
-explicitly use the GRO library to merge small packets into large ones.
-
-The GRO library assumes all input packets have correct checksums. In
-addition, the GRO library doesn't re-calculate checksums for merged
-packets. If input packets are IP fragmented, the GRO library assumes
-they are complete packets (i.e. with L4 headers).
-
-Currently, the GRO library implements TCP/IPv4 packet reassembly.
-
-Reassembly Modes
-----------------
-
-The GRO library provides two reassembly modes: lightweight and
-heavyweight mode. If applications want to merge packets in a simple way,
-they can use the lightweight mode API. If applications want more
-fine-grained controls, they can choose the heavyweight mode API.
-
-Lightweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble_burst()`` function is used for reassembly in
-lightweight mode. It tries to merge N input packets at a time, where
-N should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
-
-In each invocation, ``rte_gro_reassemble_burst()`` allocates temporary
-reassembly tables for the desired GRO types. Note that the reassembly
-table is a table structure used to reassemble packets and different GRO
-types (e.g. TCP/IPv4 GRO and TCP/IPv6 GRO) have different reassembly table
-structures. The ``rte_gro_reassemble_burst()`` function uses the reassembly
-tables to merge the N input packets.
-
-For applications, performing GRO in lightweight mode is simple. They
-just need to invoke ``rte_gro_reassemble_burst()``. Applications can get
-GROed packets as soon as ``rte_gro_reassemble_burst()`` returns.
-
-Heavyweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble()`` function is used for reassembly in heavyweight
-mode. Compared with the lightweight mode, performing GRO in heavyweight mode
-is relatively complicated.
-
-Before performing GRO, applications need to create a GRO context object
-by calling ``rte_gro_ctx_create()``. A GRO context object holds the
-reassembly tables of desired GRO types. Note that all update/lookup
-operations on the context object are not thread safe. So if different
-processes or threads want to access the same context object simultaneously,
-some external syncing mechanisms must be used.
-
-Once the GRO context is created, applications can then use the
-``rte_gro_reassemble()`` function to merge packets. In each invocation,
-``rte_gro_reassemble()`` tries to merge input packets with the packets
-in the reassembly tables. If an input packet is an unsupported GRO type,
-or other errors happen (e.g. SYN bit is set), ``rte_gro_reassemble()``
-returns the packet to applications. Otherwise, the input packet is either
-merged or inserted into a reassembly table.
-
-When applications want to get GRO processed packets, they need to use
-``rte_gro_timeout_flush()`` to flush them from the tables manually.
+technique to reduce per-packet processing overheads. By reassembling
+small packets into larger ones, GRO enables applications to process
+fewer large packets directly, thus reducing the number of packets to
+be processed. To benefit DPDK-based applications, like Open vSwitch,
+DPDK also provides own GRO implementation. In DPDK, GRO is implemented
+as a standalone library. Applications explicitly use the GRO library to
+reassemble packets.
+
+Overview
+--------
+
+In the GRO library, there are many GRO types which are defined by packet
+types. One GRO type is in charge of process one kind of packets. For
+example, TCP/IPv4 GRO processes TCP/IPv4 packets.
+
+Each GRO type has a reassembly function, which defines own algorithm and
+table structure to reassemble packets. We assign input packets to the
+corresponding GRO functions by MBUF->packet_type.
+
+The GRO library doesn't check if input packets have correct checksums and
+doesn't re-calculate checksums for merged packets. The GRO library
+assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
+fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
+6864 to process the IPv4 ID field.
 
-TCP/IPv4 GRO
-------------
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+
+Two Sets of API
+---------------
+
+For different usage scenarios, the GRO library provides two sets of API.
+The one is called the lightweight mode API, which enables applications to
+merge a small number of packets rapidly; the other is called the
+heavyweight mode API, which provides fine-grained controls to
+applications and supports to merge a large number of packets.
+
+Lightweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+The lightweight mode only has one function ``rte_gro_reassemble_burst()``,
+which process N packets at a time. Using the lightweight mode API to
+merge packets is very simple. Calling ``rte_gro_reassemble_burst()`` is
+enough. The GROed packets are returned to applications as soon as it
+finishes.
+
+In ``rte_gro_reassemble_burst()``, table structures of different GRO
+types are allocated in the stack. This design simplifies applications'
+operations. However, limited by the stack size, the maximum number of
+packets that ``rte_gro_reassemble_burst()`` can process in an invocation
+should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
+
+Heavyweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+Compared with the lightweight mode, using the heavyweight mode API is
+relatively complex. Firstly, applications need to create a GRO context
+by ``rte_gro_ctx_create()``. ``rte_gro_ctx_create()`` allocates tables
+structures in the heap and stores their pointers in the GRO context.
+Secondly, applications use ``rte_gro_reassemble()`` to merge packets.
+If input packets have invalid parameters, ``rte_gro_reassemble()``
+returns them to applications. For example, packets of unsupported GRO
+types or TCP SYN packets are returned. Otherwise, the input packets are
+either merged with the existed packets in the tables or inserted into the
+tables. Finally, applications use ``rte_gro_timeout_flush()`` to flush
+packets from the tables, when they want to get the GROed packets.
+
+Note that all update/lookup operations on the GRO context are not thread
+safe. So if different processes or threads want to access the same
+context object simultaneously, some external syncing mechanisms must be
+used.
+
+Reassembly Algorithm
+--------------------
+
+The reassembly algorithm is used for reassembling packets. In the GRO
+library, different GRO types can use different algorithms. In this
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
 
-TCP/IPv4 GRO supports merging small TCP/IPv4 packets into large ones,
-using a table structure called the TCP/IPv4 reassembly table.
+Challenges
+~~~~~~~~~~
 
-TCP/IPv4 Reassembly Table
-~~~~~~~~~~~~~~~~~~~~~~~~~
+The reassembly algorithm determines the efficiency of GRO. There are two
+challenges in the algorithm design:
 
-A TCP/IPv4 reassembly table includes a "key" array and an "item" array.
-The key array keeps the criteria to merge packets and the item array
-keeps the packet information.
+- a high cost algorithm/implementation would cause packet dropping in a
+  high speed network.
 
-Each key in the key array points to an item group, which consists of
-packets which have the same criteria values but can't be merged. A key
-in the key array includes two parts:
+- packet reordering makes it hard to merge packets. For example, Linux
+  GRO fails to merge packets when encounters packet reordering.
 
-* ``criteria``: the criteria to merge packets. If two packets can be
-  merged, they must have the same criteria values.
+The above two challenges require our algorithm is:
 
-* ``start_index``: the item array index of the first packet in the item
-  group.
+- lightweight enough to scale fast networking speed
 
-Each element in the item array keeps the information of a packet. An item
-in the item array mainly includes three parts:
+- capable of handling packet reordering
 
-* ``firstseg``: the mbuf address of the first segment of the packet.
+In DPDK GRO, we use a key-based algorithm to address the two challenges.
 
-* ``lastseg``: the mbuf address of the last segment of the packet.
+Key-based Reassembly Algorithm
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:numref:`figure_gro-key-algorithm` illustrates the procedure of the
+key-based algorithm. Packets are classified into "flows" by some header
+fields (we call them as "key"). To process an input packet, the algorithm
+searches for a matched "flow" (i.e., the same value of key) for the
+packet first, then checks all packets in the "flow" and tries to find a
+"neighbor" for it. If find a "neighbor", merge the two packets together.
+If can't find a "neighbor", store the packet into its "flow". If can't
+find a matched "flow", insert a new "flow" and store the packet into the
+"flow".
+
+.. note::
+        Packets in the same "flow" that can't merge are always caused
+        by packet reordering.
+
+The key-based algorithm has two characters:
+
+- classifying packets into "flows" to accelerate packet aggregation is
+  simple (address challenge 1).
+
+- storing out-of-order packets makes it possible to merge later (address
+  challenge 2).
+
+.. _figure_gro-key-algorithm:
+
+.. figure:: img/gro-key-algorithm.*
+   :align: center
+
+   Key-based Reassembly Algorithm
+
+TCP/IPv4 GRO
+------------
 
-* ``next_pkt_index``: the item array index of the next packet in the same
-  item group. TCP/IPv4 GRO uses ``next_pkt_index`` to chain the packets
-  that have the same criteria value but can't be merged together.
+The table structure used by TCP/IPv4 GRO contains two arrays: flow array
+and item array. The flow array keeps flow information, and the item array
+keeps packet information.
 
-Procedure to Reassemble a Packet
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Header fields used to define a TCP/IPv4 flow include:
 
-To reassemble an incoming packet needs three steps:
+- source and destination: Ethernet and IP address, TCP port
 
-#. Check if the packet should be processed. Packets with one of the
-   following properties aren't processed and are returned immediately:
+- TCP acknowledge number
 
-   * FIN, SYN, RST, URG, PSH, ECE or CWR bit is set.
+TCP/IPv4 packets whose FIN, SYN, RST, URG, PSH, ECE or CWR bit is set
+won't be processed.
 
-   * L4 payload length is 0.
+Header fields deciding if two packets are neighbors include:
 
-#.  Traverse the key array to find a key which has the same criteria
-    value with the incoming packet. If found, go to the next step.
-    Otherwise, insert a new key and a new item for the packet.
+- TCP sequence number
 
-#. Locate the first packet in the item group via ``start_index``. Then
-   traverse all packets in the item group via ``next_pkt_index``. If a
-   packet is found which can be merged with the incoming one, merge them
-   together. If one isn't found, insert the packet into this item group.
-   Note that to merge two packets is to link them together via mbuf's
-   ``next`` field.
+- IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
+  be increased by 1.
 
-When packets are flushed from the reassembly table, TCP/IPv4 GRO updates
-packet header fields for the merged packets. Note that before reassembling
-the packet, TCP/IPv4 GRO doesn't check if the checksums of packets are
-correct. Also, TCP/IPv4 GRO doesn't re-calculate checksums for merged
-packets.
+.. note::
+        We comply RFC 6864 to process the IPv4 ID field. Specifically,
+        we check IPv4 ID fields for the packets whose DF bit is 0 and
+        ignore IPv4 ID fields for the packets whose DF bit is 1.
+        Additionally, packets which have different value of DF bit can't
+        be merged.
diff --git a/doc/guides/prog_guide/img/gro-key-algorithm.svg b/doc/guides/prog_guide/img/gro-key-algorithm.svg
new file mode 100644
index 0000000..94e42f5
--- /dev/null
+++ b/doc/guides/prog_guide/img/gro-key-algorithm.svg
@@ -0,0 +1,223 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+<!-- Generated by Microsoft Visio 11.0, SVG Export, v1.0 gro-key-algorithm.svg Page-1 -->
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ev="http://www.w3.org/2001/xml-events"
+		xmlns:v="http://schemas.microsoft.com/visio/2003/SVGExtensions/" width="6.06163in" height="2.66319in"
+		viewBox="0 0 436.438 191.75" xml:space="preserve" color-interpolation-filters="sRGB" class="st10">
+	<v:documentProperties v:langID="1033" v:viewMarkup="false"/>
+
+	<style type="text/css">
+	<![CDATA[
+		.st1 {fill:url(#grad30-4);stroke:#404040;stroke-linecap:round;stroke-linejoin:round;stroke-width:0.25}
+		.st2 {fill:#000000;font-family:Calibri;font-size:1.00001em}
+		.st3 {font-size:1em;font-weight:bold}
+		.st4 {fill:#000000;font-family:Calibri;font-size:1.00001em;font-weight:bold}
+		.st5 {font-size:1em;font-weight:normal}
+		.st6 {marker-end:url(#mrkr5-38);stroke:#404040;stroke-linecap:round;stroke-linejoin:round;stroke-width:1}
+		.st7 {fill:#404040;fill-opacity:1;stroke:#404040;stroke-opacity:1;stroke-width:0.28409090909091}
+		.st8 {fill:none;stroke:none;stroke-linecap:round;stroke-linejoin:round;stroke-width:0.25}
+		.st9 {fill:#000000;font-family:Calibri;font-size:0.833336em}
+		.st10 {fill:none;fill-rule:evenodd;font-size:12px;overflow:visible;stroke-linecap:square;stroke-miterlimit:3}
+	]]>
+	</style>
+
+	<defs id="Patterns_And_Gradients">
+		<linearGradient id="grad30-4" v:fillPattern="30" v:foreground="#c6d09f" v:background="#d1dab4" x1="0" y1="1" x2="0" y2="0">
+			<stop offset="0" style="stop-color:#c6d09f;stop-opacity:1"/>
+			<stop offset="1" style="stop-color:#d1dab4;stop-opacity:1"/>
+		</linearGradient>
+		<linearGradient id="grad30-35" v:fillPattern="30" v:foreground="#f0f0f0" v:background="#ffffff" x1="0" y1="1" x2="0" y2="0">
+			<stop offset="0" style="stop-color:#f0f0f0;stop-opacity:1"/>
+			<stop offset="1" style="stop-color:#ffffff;stop-opacity:1"/>
+		</linearGradient>
+	</defs>
+	<defs id="Markers">
+		<g id="lend5">
+			<path d="M 2 1 L 0 0 L 1.98117 -0.993387 C 1.67173 -0.364515 1.67301 0.372641 1.98465 1.00043 " style="stroke:none"/>
+		</g>
+		<marker id="mrkr5-38" class="st7" v:arrowType="5" v:arrowSize="2" v:setback="6.16" refX="-6.16" orient="auto"
+				markerUnits="strokeWidth" overflow="visible">
+			<use xlink:href="#lend5" transform="scale(-3.52,-3.52) "/>
+		</marker>
+	</defs>
+	<g v:mID="0" v:index="1" v:groupContext="foregroundPage">
+		<title>Page-1</title>
+		<v:pageProperties v:drawingScale="1" v:pageScale="1" v:drawingUnits="0" v:shadowOffsetX="9" v:shadowOffsetY="-9"/>
+		<v:layer v:name="Connector" v:index="0"/>
+		<g id="shape1-1" v:mID="1" v:groupContext="shape" transform="translate(0.25,-117.25)">
+			<title>Rounded rectangle</title>
+			<desc>Categorize into an existed “flow”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="90" cy="173.75" width="180" height="36"/>
+			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
+						 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
+					class="st1"/>
+			<text x="8.91" y="177.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Categorize into an <tspan
+						class="st3">existed</tspan><tspan class="st3" v:langID="2052"> </tspan>“<tspan class="st3">flow</tspan>”</text>		</g>
+		<g id="shape2-9" v:mID="2" v:groupContext="shape" transform="translate(0.25,-58.75)">
+			<title>Rounded rectangle.2</title>
+			<desc>Search for a “neighbor”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="90" cy="173.75" width="180" height="36"/>
+			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
+						 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
+					class="st1"/>
+			<text x="32.19" y="177.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Search for a “<tspan
+						class="st3">neighbor</tspan>”</text>		</g>
+		<g id="shape3-14" v:mID="3" v:groupContext="shape" transform="translate(225.813,-117.25)">
+			<title>Rounded rectangle.3</title>
+			<desc>Insert a new “flow” and store the packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="105.188" cy="173.75" width="210.38" height="36"/>
+			<path d="M201.37 191.75 A9.00007 9.00007 -180 0 0 210.37 182.75 L210.37 164.75 A9.00007 9.00007 -180 0 0 201.37 155.75
+						 L9 155.75 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L201.37 191.75
+						 Z" class="st1"/>
+			<text x="5.45" y="177.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Insert a <tspan
+						class="st3">new </tspan>“<tspan class="st3">flow</tspan>” and <tspan class="st3">store </tspan>the packet</text>		</g>
+		<g id="shape4-21" v:mID="4" v:groupContext="shape" transform="translate(225.25,-58.75)">
+			<title>Rounded rectangle.4</title>
+			<desc>Store the packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="83.25" cy="173.75" width="166.5" height="36"/>
+			<path d="M157.5 191.75 A9.00007 9.00007 -180 0 0 166.5 182.75 L166.5 164.75 A9.00007 9.00007 -180 0 0 157.5 155.75 L9
+						 155.75 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L157.5 191.75 Z"
+					class="st1"/>
+			<text x="42.81" y="177.35" class="st4" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Store <tspan
+						class="st5">the packet</tspan></text>		</g>
+		<g id="shape5-26" v:mID="5" v:groupContext="shape" transform="translate(0.25,-0.25)">
+			<title>Rounded rectangle.5</title>
+			<desc>Merge the packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="90" cy="173.75" width="180" height="36"/>
+			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
+						 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
+					class="st1"/>
+			<text x="46.59" y="177.35" class="st4" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Merge <tspan
+						class="st5">the packet</tspan></text>		</g>
+		<g id="shape6-31" v:mID="6" v:groupContext="shape" v:layerMember="0" transform="translate(81.25,-175.75)">
+			<title>Dynamic connector</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M9 191.75 L9 208.09" class="st6"/>
+		</g>
+		<g id="shape7-39" v:mID="7" v:groupContext="shape" v:layerMember="0" transform="translate(81.25,-117.25)">
+			<title>Dynamic connector.7</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M9 191.75 L9 208.09" class="st6"/>
+		</g>
+		<g id="shape8-45" v:mID="8" v:groupContext="shape" v:layerMember="0" transform="translate(81.25,-58.75)">
+			<title>Dynamic connector.8</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M9 191.75 L9 208.09" class="st6"/>
+		</g>
+		<g id="shape9-51" v:mID="9" v:groupContext="shape" v:layerMember="0" transform="translate(180.25,-126.25)">
+			<title>Dynamic connector.9</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M0 182.75 L39.4 182.75" class="st6"/>
+		</g>
+		<g id="shape10-57" v:mID="10" v:groupContext="shape" v:layerMember="0" transform="translate(180.25,-67.75)">
+			<title>Dynamic connector.10</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M0 182.75 L38.84 182.75" class="st6"/>
+		</g>
+		<g id="shape11-63" v:mID="11" v:groupContext="shape" transform="translate(65.5,-173.5)">
+			<title>Sheet.11</title>
+			<desc>packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="24.75" cy="182.75" width="49.5" height="18"/>
+			<rect x="0" y="173.75" width="49.5" height="18" class="st8"/>
+			<text x="8.46" y="186.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>packet</text>		</g>
+		<g id="shape14-66" v:mID="14" v:groupContext="shape" transform="translate(98.125,-98.125)">
+			<title>Sheet.14</title>
+			<desc>find a “flow”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="32.0625" cy="183.875" width="64.13" height="15.75"/>
+			<rect x="0" y="176" width="64.125" height="15.75" class="st8"/>
+			<text x="6.41" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>find a “flow”</text>		</g>
+		<g id="shape15-69" v:mID="15" v:groupContext="shape" transform="translate(99.25,-39.625)">
+			<title>Sheet.15</title>
+			<desc>find a “neighbor”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="40.5" cy="183.875" width="81" height="15.75"/>
+			<rect x="0" y="176" width="81" height="15.75" class="st8"/>
+			<text x="5.48" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>find a “neighbor”</text>		</g>
+		<g id="shape13-72" v:mID="13" v:groupContext="shape" transform="translate(181.375,-79)">
+			<title>Sheet.13</title>
+			<desc>not find</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="21.375" cy="183.875" width="42.75" height="15.75"/>
+			<rect x="0" y="176" width="42.75" height="15.75" class="st8"/>
+			<text x="5.38" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>not find</text>		</g>
+		<g id="shape12-75" v:mID="12" v:groupContext="shape" transform="translate(181.375,-137.5)">
+			<title>Sheet.12</title>
+			<desc>not find</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="21.375" cy="183.875" width="42.75" height="15.75"/>
+			<rect x="0" y="176" width="42.75" height="15.75" class="st8"/>
+			<text x="5.38" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>not find</text>		</g>
+	</g>
+</svg>
diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 03e5ccf..27af23e 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -6,8 +6,6 @@
 #include <rte_mbuf.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
 
 #include "gro_tcp4.h"
 
@@ -44,20 +42,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
 	}
 	tbl->max_item_num = entries_num;
 
-	size = sizeof(struct gro_tcp4_key) * entries_num;
-	tbl->keys = rte_zmalloc_socket(__func__,
+	size = sizeof(struct gro_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
 			size,
 			RTE_CACHE_LINE_SIZE,
 			socket_id);
-	if (tbl->keys == NULL) {
+	if (tbl->flows == NULL) {
 		rte_free(tbl->items);
 		rte_free(tbl);
 		return NULL;
 	}
-	/* INVALID_ARRAY_INDEX indicates empty key */
+	/* INVALID_ARRAY_INDEX indicates an empty flow */
 	for (i = 0; i < entries_num; i++)
-		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
-	tbl->max_key_num = entries_num;
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
 
 	return tbl;
 }
@@ -69,116 +67,15 @@ gro_tcp4_tbl_destroy(void *tbl)
 
 	if (tcp_tbl) {
 		rte_free(tcp_tbl->items);
-		rte_free(tcp_tbl->keys);
+		rte_free(tcp_tbl->flows);
 	}
 	rte_free(tcp_tbl);
 }
 
-/*
- * merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
-		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
-		int cmp)
-{
-	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t tcp_datalen;
-
-	if (cmp > 0) {
-		pkt_head = item_src->firstseg;
-		pkt_tail = pkt;
-	} else {
-		pkt_head = pkt;
-		pkt_tail = item_src->firstseg;
-	}
-
-	/* check if the packet length will be beyond the max value */
-	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
-		pkt_tail->l3_len - pkt_tail->l4_len;
-	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
-			TCP4_MAX_L3_LENGTH)
-		return 0;
-
-	/* remove packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail,
-			pkt_tail->l2_len +
-			pkt_tail->l3_len +
-			pkt_tail->l4_len);
-
-	/* chain two packets together */
-	if (cmp > 0) {
-		item_src->lastseg->next = pkt;
-		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
-		/* update IP ID to the larger value */
-		item_src->ip_id = ip_id;
-	} else {
-		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item_src->firstseg;
-		item_src->firstseg = pkt;
-		/* update sent_seq to the smaller value */
-		item_src->sent_seq = sent_seq;
-	}
-	item_src->nb_merged++;
-
-	/* update mbuf metadata for the merged packet */
-	pkt_head->nb_segs += pkt_tail->nb_segs;
-	pkt_head->pkt_len += pkt_tail->pkt_len;
-
-	return 1;
-}
-
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
-		struct tcp_hdr *tcp_hdr,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
-		uint16_t ip_id,
-		uint32_t sent_seq)
-{
-	struct rte_mbuf *pkt0 = item->firstseg;
-	struct ipv4_hdr *ipv4_hdr0;
-	struct tcp_hdr *tcp_hdr0;
-	uint16_t tcp_hl0, tcp_dl0;
-	uint16_t len;
-
-	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
-			pkt0->l2_len);
-	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
-	tcp_hl0 = pkt0->l4_len;
-
-	/* check if TCP option fields equal. If not, return 0. */
-	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
-	if ((tcp_hl != tcp_hl0) ||
-			((len > 0) && (memcmp(tcp_hdr + 1,
-					tcp_hdr0 + 1,
-					len) != 0)))
-		return 0;
-
-	/* check if the two packets are neighbors */
-	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
-	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
-			(ip_id == (item->ip_id + 1)))
-		/* append the new packet */
-		return 1;
-	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
-			((ip_id + item->nb_merged) == item->ip_id))
-		/* pre-pend the new packet */
-		return -1;
-	else
-		return 0;
-}
-
 static inline uint32_t
 find_an_empty_item(struct gro_tcp4_tbl *tbl)
 {
-	uint32_t i;
-	uint32_t max_item_num = tbl->max_item_num;
+	uint32_t max_item_num = tbl->max_item_num, i;
 
 	for (i = 0; i < max_item_num; i++)
 		if (tbl->items[i].firstseg == NULL)
@@ -187,13 +84,12 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
 }
 
 static inline uint32_t
-find_an_empty_key(struct gro_tcp4_tbl *tbl)
+find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 {
-	uint32_t i;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num, i;
 
-	for (i = 0; i < max_key_num; i++)
-		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
 			return i;
 	return INVALID_ARRAY_INDEX;
 }
@@ -201,10 +97,11 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
 static inline uint32_t
 insert_new_item(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
+		uint64_t start_time,
 		uint32_t prev_idx,
-		uint64_t start_time)
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint8_t is_atomic)
 {
 	uint32_t item_idx;
 
@@ -219,9 +116,10 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 	tbl->items[item_idx].sent_seq = sent_seq;
 	tbl->items[item_idx].ip_id = ip_id;
 	tbl->items[item_idx].nb_merged = 1;
+	tbl->items[item_idx].is_atomic = is_atomic;
 	tbl->item_num++;
 
-	/* if the previous packet exists, chain the new one with it */
+	/* If the previous packet exists, chain them together. */
 	if (prev_idx != INVALID_ARRAY_INDEX) {
 		tbl->items[item_idx].next_pkt_idx =
 			tbl->items[prev_idx].next_pkt_idx;
@@ -232,12 +130,13 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 }
 
 static inline uint32_t
-delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
+delete_item(struct gro_tcp4_tbl *tbl,
+		uint32_t item_idx,
 		uint32_t prev_item_idx)
 {
 	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
 
-	/* set NULL to firstseg to indicate it's an empty item */
+	/* NULL indicates an empty item. */
 	tbl->items[item_idx].firstseg = NULL;
 	tbl->item_num--;
 	if (prev_item_idx != INVALID_ARRAY_INDEX)
@@ -247,53 +146,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 }
 
 static inline uint32_t
-insert_new_key(struct gro_tcp4_tbl *tbl,
-		struct tcp4_key *key_src,
+insert_new_flow(struct gro_tcp4_tbl *tbl,
+		struct tcp4_flow_key *src,
 		uint32_t item_idx)
 {
-	struct tcp4_key *key_dst;
-	uint32_t key_idx;
+	struct tcp4_flow_key *dst;
+	uint32_t flow_idx;
 
-	key_idx = find_an_empty_key(tbl);
-	if (key_idx == INVALID_ARRAY_INDEX)
+	flow_idx = find_an_empty_flow(tbl);
+	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
 		return INVALID_ARRAY_INDEX;
 
-	key_dst = &(tbl->keys[key_idx].key);
+	dst = &(tbl->flows[flow_idx].key);
 
-	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
-	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
-	key_dst->ip_src_addr = key_src->ip_src_addr;
-	key_dst->ip_dst_addr = key_src->ip_dst_addr;
-	key_dst->recv_ack = key_src->recv_ack;
-	key_dst->src_port = key_src->src_port;
-	key_dst->dst_port = key_src->dst_port;
+	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
+	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+	dst->ip_src_addr = src->ip_src_addr;
+	dst->ip_dst_addr = src->ip_dst_addr;
+	dst->recv_ack = src->recv_ack;
+	dst->src_port = src->src_port;
+	dst->dst_port = src->dst_port;
 
-	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
-	tbl->keys[key_idx].start_index = item_idx;
-	tbl->key_num++;
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
 
-	return key_idx;
+	return flow_idx;
 }
 
-static inline int
-is_same_key(struct tcp4_key k1, struct tcp4_key k2)
-{
-	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
-		return 0;
-
-	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
-		return 0;
-
-	return ((k1.ip_src_addr == k2.ip_src_addr) &&
-			(k1.ip_dst_addr == k2.ip_dst_addr) &&
-			(k1.recv_ack == k2.recv_ack) &&
-			(k1.src_port == k2.src_port) &&
-			(k1.dst_port == k2.dst_port));
-}
-
-/*
- * update packet length for the flushed packet.
- */
 static inline void
 update_header(struct gro_tcp4_item *item)
 {
@@ -315,84 +194,106 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	struct ipv4_hdr *ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
 	uint32_t sent_seq;
-	uint16_t tcp_dl, ip_id;
+	uint16_t tcp_dl, ip_id, frag_off, hdr_len;
+	uint8_t is_atomic;
 
-	struct tcp4_key key;
+	struct tcp4_flow_key key;
 	uint32_t cur_idx, prev_idx, item_idx;
-	uint32_t i, max_key_num;
+	uint32_t i, max_flow_num, left_flow_num;
 	int cmp;
+	uint8_t find;
 
 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
 	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
 
 	/*
-	 * if FIN, SYN, RST, PSH, URG, ECE or
-	 * CWR is set, return immediately.
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+	 * or CWR set.
 	 */
 	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
 		return -1;
-	/* if payload length is 0, return immediately */
-	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
-		pkt->l4_len;
-	if (tcp_dl == 0)
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
 		return -1;
 
-	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+	/*
+	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IPv4 ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
 	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
 
 	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
 	ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr));
 	key.ip_src_addr = ipv4_hdr->src_addr;
 	key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.recv_ack = tcp_hdr->recv_ack;
 	key.src_port = tcp_hdr->src_port;
 	key.dst_port = tcp_hdr->dst_port;
-	key.recv_ack = tcp_hdr->recv_ack;
 
-	/* search for a key */
-	max_key_num = tbl->max_key_num;
-	for (i = 0; i < max_key_num; i++) {
-		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
-				is_same_key(tbl->keys[i].key, key))
-			break;
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	left_flow_num = tbl->flow_num;
+	find = 0;
+	for (i = 0; i < max_flow_num && left_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
+			if (is_same_tcp4_flow(tbl->flows[i].key, key)) {
+				find = 1;
+				break;
+			}
+			left_flow_num--;
+		}
 	}
 
-	/* can't find a key, so insert a new key and a new item. */
-	if (i == tbl->max_key_num) {
-		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
-				INVALID_ARRAY_INDEX, start_time);
+	/*
+	 * Fail to find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (find == 0) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, ip_id,
+				is_atomic);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
-		if (insert_new_key(tbl, &key, item_idx) ==
+		if (insert_new_flow(tbl, &key, item_idx) ==
 				INVALID_ARRAY_INDEX) {
-			/*
-			 * fail to insert a new key, so
-			 * delete the inserted item
-			 */
+			/* Fail to insert a new flow. */
 			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
 			return -1;
 		}
 		return 0;
 	}
 
-	/* traverse all packets in the item group to find one to merge */
-	cur_idx = tbl->keys[i].start_index;
+	/*
+	 * Check all packets in the flow and try to find a neighbor for
+	 * the input packet.
+	 */
+	cur_idx = tbl->flows[i].start_index;
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				pkt->l4_len, tcp_dl, ip_id, sent_seq);
+				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
+				is_atomic);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, ip_id,
-						sent_seq, cmp))
+						pkt, cmp, sent_seq, ip_id, 0))
 				return 1;
 			/*
-			 * fail to merge two packets since the packet
-			 * length will be greater than the max value.
-			 * So insert the packet into the item group.
+			 * Fail to merge the two packets, as the packet
+			 * length is greater than the max value. Store
+			 * the packet into the flow.
 			 */
-			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
-						prev_idx, start_time) ==
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, ip_id,
+						is_atomic) ==
 					INVALID_ARRAY_INDEX)
 				return -1;
 			return 0;
@@ -401,12 +302,9 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		cur_idx = tbl->items[cur_idx].next_pkt_idx;
 	} while (cur_idx != INVALID_ARRAY_INDEX);
 
-	/*
-	 * can't find a packet in the item group to merge,
-	 * so insert the packet into the item group.
-	 */
-	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
-				start_time) == INVALID_ARRAY_INDEX)
+	/* Fail to find a neighbor, so store the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
 		return -1;
 
 	return 0;
@@ -418,46 +316,35 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf **out,
 		uint16_t nb_out)
 {
-	uint16_t k = 0;
+	uint32_t max_flow_num = tbl->max_flow_num;
 	uint32_t i, j;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint16_t k = 0;
 
-	for (i = 0; i < max_key_num; i++) {
-		/* all keys have been checked, return immediately */
-		if (tbl->key_num == 0)
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
 			return k;
 
-		j = tbl->keys[i].start_index;
+		j = tbl->flows[i].start_index;
 		while (j != INVALID_ARRAY_INDEX) {
 			if (tbl->items[j].start_time <= flush_timestamp) {
 				out[k++] = tbl->items[j].firstseg;
 				if (tbl->items[j].nb_merged > 1)
 					update_header(&(tbl->items[j]));
 				/*
-				 * delete the item and get
-				 * the next packet index
+				 * Delete the packet and get the next
+				 * packet in the flow.
 				 */
-				j = delete_item(tbl, j,
-						INVALID_ARRAY_INDEX);
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
 
-				/*
-				 * delete the key as all of
-				 * packets are flushed
-				 */
-				if (j == INVALID_ARRAY_INDEX) {
-					tbl->keys[i].start_index =
-						INVALID_ARRAY_INDEX;
-					tbl->key_num--;
-				} else
-					/* update start_index of the key */
-					tbl->keys[i].start_index = j;
-
-				if (k == nb_out)
+				if (unlikely(k == nb_out))
 					return k;
 			} else
 				/*
-				 * left packets of this key won't be
-				 * timeout, so go to check other keys.
+				 * The left packets in this flow won't be
+				 * timeout. Go to check other flows.
 				 */
 				break;
 		}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index d129523..c2b66a8 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -5,17 +5,20 @@
 #ifndef _GRO_TCP4_H_
 #define _GRO_TCP4_H_
 
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
 /*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of the L3
+ * header, the L4 header and the data payload.
  */
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
 
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow */
+struct tcp4_flow_key {
 	struct ether_addr eth_saddr;
 	struct ether_addr eth_daddr;
 	uint32_t ip_src_addr;
@@ -26,77 +29,76 @@ struct tcp4_key {
 	uint16_t dst_port;
 };
 
-struct gro_tcp4_key {
-	struct tcp4_key key;
+struct gro_tcp4_flow {
+	struct tcp4_flow_key key;
 	/*
-	 * the index of the first packet in the item group.
-	 * If the value is INVALID_ARRAY_INDEX, it means
-	 * the key is empty.
+	 * The index of the first packet in the flow.
+	 * INVALID_ARRAY_INDEX indicates an empty flow.
 	 */
 	uint32_t start_index;
 };
 
 struct gro_tcp4_item {
 	/*
-	 * first segment of the packet. If the value
+	 * The first MBUF segment of the packet. If the value
 	 * is NULL, it means the item is empty.
 	 */
 	struct rte_mbuf *firstseg;
-	/* last segment of the packet */
+	/* The last MBUF segment of the packet */
 	struct rte_mbuf *lastseg;
 	/*
-	 * the time when the first packet is inserted
-	 * into the table. If a packet in the table is
-	 * merged with an incoming packet, this value
-	 * won't be updated. We set this value only
-	 * when the first packet is inserted into the
-	 * table.
+	 * The time when the first packet is inserted into the table.
+	 * This value won't be updated, even if the packet is merged
+	 * with other packets.
 	 */
 	uint64_t start_time;
 	/*
-	 * we use next_pkt_idx to chain the packets that
-	 * have same key value but can't be merged together.
+	 * next_pkt_idx is used to chain the packets that
+	 * are in the same flow but can't be merged together
+	 * (e.g. caused by packet reordering).
 	 */
 	uint32_t next_pkt_idx;
-	/* the sequence number of the packet */
+	/* TCP sequence number of the packet */
 	uint32_t sent_seq;
-	/* the IP ID of the packet */
+	/* IPv4 ID of the packet */
 	uint16_t ip_id;
-	/* the number of merged packets */
+	/* The number of merged packets */
 	uint16_t nb_merged;
+	/* Indicate if IPv4 ID can be ignored */
+	uint8_t is_atomic;
 };
 
 /*
- * TCP/IPv4 reassembly table structure.
+ * TCP/IPv4 reassembly table structure
  */
 struct gro_tcp4_tbl {
 	/* item array */
 	struct gro_tcp4_item *items;
-	/* key array */
-	struct gro_tcp4_key *keys;
+	/* flow array */
+	struct gro_tcp4_flow *flows;
 	/* current item number */
 	uint32_t item_num;
-	/* current key num */
-	uint32_t key_num;
+	/* current flow num */
+	uint32_t flow_num;
 	/* item array size */
 	uint32_t max_item_num;
-	/* key array size */
-	uint32_t max_key_num;
+	/* flow array size */
+	uint32_t max_flow_num;
 };
 
 /**
  * This function creates a TCP/IPv4 reassembly table.
  *
  * @param socket_id
- *  socket index for allocating TCP/IPv4 reassemble table
+ *  Socket index for allocating the TCP/IPv4 reassemble table
  * @param max_flow_num
- *  the maximum number of flows in the TCP/IPv4 GRO table
+ *  The maximum number of flows in the TCP/IPv4 GRO table
  * @param max_item_per_flow
- *  the maximum packet number per flow.
+ *  The maximum number of packets per flow
  *
  * @return
- *  if create successfully, return a pointer which points to the
- *  created TCP/IPv4 GRO table. Otherwise, return NULL.
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
  */
 void *gro_tcp4_tbl_create(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -106,62 +108,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
  * This function destroys a TCP/IPv4 reassembly table.
  *
  * @param tbl
- *  a pointer points to the TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table.
  */
 void gro_tcp4_tbl_destroy(void *tbl);
 
 /**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload.
  *
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==0). It returns the
+ * packet, if the packet has invalid parameters (e.g. SYN bit is set)
+ * or there is no available space in the table.
  *
  * @param pkt
- *  packet to reassemble.
+ *  Packet to reassemble
  * @param tbl
- *  a pointer that points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table
  * @start_time
- *  the start time that the packet is inserted into the table
+ *  The time when the packet is inserted into the table
  *
  * @return
- *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- *  or URG bit is set, or there is no available space in the table to
- *  insert a new item or a new key, return a negative value. If the
- *  packet is merged successfully, return an positive value. If the
- *  packet is inserted into the table, return 0.
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
  */
 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		struct gro_tcp4_tbl *tbl,
 		uint64_t start_time);
 
 /**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
  *
  * @param tbl
- *  a pointer that points to a TCP GRO table.
+ *  TCP/IPv4 reassembly table pointer
  * @param flush_timestamp
- *  this function flushes packets which are inserted into the table
- *  before or at the flush_timestamp.
+ *  Flush packets which are inserted into the table before or at the
+ *  flush_timestamp.
  * @param out
- *  pointer array which is used to keep flushed packets.
+ *  Pointer array used to keep flushed packets
  * @param nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number in 'out'. It also determines the maximum number of
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of packets that are returned.
+ *  The number of flushed packets
  */
 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		uint64_t flush_timestamp,
@@ -173,10 +169,131 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
  * reassembly table.
  *
  * @param tbl
- *  pointer points to a TCP/IPv4 reassembly table.
+ *  TCP/IPv4 reassembly table pointer
  *
  * @return
- *  the number of packets in the table
+ *  The number of packets in the table
  */
 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
+			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
+			(k1.ip_src_addr == k2.ip_src_addr) &&
+			(k1.ip_dst_addr == k2.ip_dst_addr) &&
+			(k1.recv_ack == k2.recv_ack) &&
+			(k1.src_port == k2.src_port) &&
+			(k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+		struct tcp_hdr *tcph,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint16_t l2_offset,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	struct ipv4_hdr *iph_orig;
+	struct tcp_hdr *tcph_orig;
+	uint16_t len, l4_len_orig;
+
+	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			l2_offset + pkt_orig->l2_len);
+	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+	l4_len_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
+	if ((tcp_hl != l4_len_orig) || ((len > 0) &&
+				(memcmp(tcph + 1, tcph_orig + 1,
+					len) != 0)))
+		return 0;
+
+	/* Don't merge packets whose DF bits are different */
+	if (unlikely(item->is_atomic ^ is_atomic))
+		return 0;
+
+	/* Check if the two packets are neighbors */
+	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+		pkt_orig->l3_len - l4_len_orig;
+	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
+				(ip_id == item->ip_id + item->nb_merged)))
+		/* Append the new packet */
+		return 1;
+	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
+				(ip_id + 1 == item->ip_id)))
+		/* Pre-pend the new packet */
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+	uint16_t hdr_len, l2_len;
+
+	if (cmp > 0) {
+		pkt_head = item->firstseg;
+		pkt_tail = pkt;
+	} else {
+		pkt_head = pkt;
+		pkt_tail = item->firstseg;
+	}
+
+	/* Check if the IPv4 packet length is greater than the max value */
+	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+		pkt_head->l4_len;
+	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
+	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - hdr_len >
+			MAX_IPV4_PKT_LENGTH))
+		return 0;
+
+	/* Remove the packet header */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+	/* Chain two packets together */
+	if (cmp > 0) {
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
+	} else {
+		lastseg = rte_pktmbuf_lastseg(pkt);
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
+		/* Update sent_seq and ip_id */
+		item->sent_seq = sent_seq;
+		item->ip_id = ip_id;
+	}
+	item->nb_merged++;
+
+	/* Update MBUF metadata for the merged packet */
+	pkt_head->nb_segs += pkt_tail->nb_segs;
+	pkt_head->pkt_len += pkt_tail->pkt_len;
+
+	return 1;
+}
 #endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index d6b8cd1..7176c0e 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -23,11 +23,14 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, NULL};
 
+#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
+
 /*
- * GRO context structure, which is used to merge packets. It keeps
- * many reassembly tables of desired GRO types. Applications need to
- * create GRO context objects before using rte_gro_reassemble to
- * perform GRO.
+ * GRO context structure. It keeps the table structures, which are
+ * used to merge packets, for different GRO types. Before using
+ * rte_gro_reassemble(), applications need to create the GRO context
+ * first.
  */
 struct gro_ctx {
 	/* GRO types to perform */
@@ -65,7 +68,7 @@ rte_gro_ctx_create(const struct rte_gro_param *param)
 				param->max_flow_num,
 				param->max_item_per_flow);
 		if (gro_ctx->tbls[i] == NULL) {
-			/* destroy all created tables */
+			/* Destroy all created tables */
 			gro_ctx->gro_types = gro_types;
 			rte_gro_ctx_destroy(gro_ctx);
 			return NULL;
@@ -85,8 +88,6 @@ rte_gro_ctx_destroy(void *ctx)
 	uint64_t gro_type_flag;
 	uint8_t i;
 
-	if (gro_ctx == NULL)
-		return;
 	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
 		gro_type_flag = 1ULL << i;
 		if ((gro_ctx->gro_types & gro_type_flag) == 0)
@@ -103,62 +104,54 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		const struct rte_gro_param *param)
 {
-	uint16_t i;
-	uint16_t nb_after_gro = nb_pkts;
-	uint32_t item_num;
-
-	/* allocate a reassembly table for TCP/IPv4 GRO */
+	/* Allocate a reassembly table for TCP/IPv4 GRO */
 	struct gro_tcp4_tbl tcp_tbl;
-	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
-	uint16_t unprocess_num = 0;
+	uint32_t item_num;
 	int32_t ret;
-	uint64_t current_time;
+	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
 
-	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
 		return nb_pkts;
 
-	/* get the actual number of packets */
+	/* Get the maximum number of packets */
 	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
-			param->max_item_per_flow));
+				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
 	for (i = 0; i < item_num; i++)
-		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
 
-	tcp_tbl.keys = tcp_keys;
+	tcp_tbl.flows = tcp_flows;
 	tcp_tbl.items = tcp_items;
-	tcp_tbl.key_num = 0;
+	tcp_tbl.flow_num = 0;
 	tcp_tbl.item_num = 0;
-	tcp_tbl.max_key_num = item_num;
+	tcp_tbl.max_flow_num = item_num;
 	tcp_tbl.max_item_num = item_num;
 
-	current_time = rte_rdtsc();
-
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			ret = gro_tcp4_reassemble(pkts[i],
-					&tcp_tbl,
-					current_time);
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			/*
+			 * The timestamp is ignored, since all packets
+			 * will be flushed from the tables.
+			 */
+			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
 			if (ret > 0)
-				/* merge successfully */
+				/* Merge successfully */
 				nb_after_gro--;
-			else if (ret < 0) {
-				unprocess_pkts[unprocess_num++] =
-					pkts[i];
-			}
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 
-	/* re-arrange GROed packets */
 	if (nb_after_gro < nb_pkts) {
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
-				pkts, nb_pkts);
+		/* Flush all packets from the tables */
+		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
+		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
 					sizeof(struct rte_mbuf *) *
@@ -174,31 +167,28 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		void *ctx)
 {
-	uint16_t i, unprocess_num = 0;
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
+	void *tcp_tbl;
 	uint64_t current_time;
+	uint16_t i, unprocess_num = 0;
 
-	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
 		return nb_pkts;
 
+	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			if (gro_tcp4_reassemble(pkts[i],
-						gro_ctx->tbls
-						[RTE_GRO_TCP_IPV4_INDEX],
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 	if (unprocess_num > 0) {
-		memcpy(pkts, unprocess_pkts,
-				sizeof(struct rte_mbuf *) *
+		memcpy(pkts, unprocess_pkts, sizeof(struct rte_mbuf *) *
 				unprocess_num);
 	}
 
@@ -224,6 +214,7 @@ rte_gro_timeout_flush(void *ctx,
 				flush_timestamp,
 				out, max_nb_out);
 	}
+
 	return 0;
 }
 
@@ -232,19 +223,20 @@ rte_gro_get_pkt_count(void *ctx)
 {
 	struct gro_ctx *gro_ctx = ctx;
 	gro_tbl_pkt_count_fn pkt_count_fn;
+	uint64_t gro_types = gro_ctx->gro_types, flag;
 	uint64_t item_num = 0;
-	uint64_t gro_type_flag;
 	uint8_t i;
 
-	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
-		gro_type_flag = 1ULL << i;
-		if ((gro_ctx->gro_types & gro_type_flag) == 0)
+	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM && gro_types; i++) {
+		flag = 1ULL << i;
+		if ((gro_types & flag) == 0)
 			continue;
 
+		gro_types ^= flag;
 		pkt_count_fn = tbl_pkt_count_fn[i];
-		if (pkt_count_fn == NULL)
-			continue;
-		item_num += pkt_count_fn(gro_ctx->tbls[i]);
+		if (pkt_count_fn)
+			item_num += pkt_count_fn(gro_ctx->tbls[i]);
 	}
+
 	return item_num;
 }
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index 81a2eac..7979a59 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -31,8 +31,8 @@ extern "C" {
 /**< TCP/IPv4 GRO flag */
 
 /**
- * A structure which is used to create GRO context objects or tell
- * rte_gro_reassemble_burst() what reassembly rules are demanded.
+ * Structure used to create GRO context objects or used to pass
+ * application-determined parameters to rte_gro_reassemble_burst().
  */
 struct rte_gro_param {
 	uint64_t gro_types;
@@ -78,26 +78,23 @@ void rte_gro_ctx_destroy(void *ctx);
 
 /**
  * This is one of the main reassembly APIs, which merges numbers of
- * packets at a time. It assumes that all inputted packets are with
- * correct checksums. That is, applications should guarantee all
- * inputted packets are correct. Besides, it doesn't re-calculate
- * checksums for merged packets. If inputted packets are IP fragmented,
- * this function assumes them are complete (i.e. with L4 header). After
- * finishing processing, it returns all GROed packets to applications
- * immediately.
+ * packets at a time. It doesn't check if input packets have correct
+ * checksums and doesn't re-calculate checksums for merged packets.
+ * It assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==1). The GROed packets
+ * are returned as soon as the function finishes.
  *
  * @param pkts
- *  a pointer array which points to the packets to reassemble. Besides,
- *  it keeps mbuf addresses for the GROed packets.
+ *  Pointer array pointing to the packets to reassemble. Besides, it
+ *  keeps MBUF addresses for the GROed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param param
- *  applications use it to tell rte_gro_reassemble_burst() what rules
- *  are demanded.
+ *  Application-determined parameters for reassembling packets.
  *
  * @return
- *  the number of packets after been GROed. If no packets are merged,
- *  the returned value is nb_pkts.
+ *  The number of packets after been GROed. If no packets are merged,
+ *  the return value is equals to nb_pkts.
  */
 uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -107,32 +104,28 @@ uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * Reassembly function, which tries to merge inputted packets with
- * the packets in the reassembly tables of a given GRO context. This
- * function assumes all inputted packets are with correct checksums.
- * And it won't update checksums if two packets are merged. Besides,
- * if inputted packets are IP fragmented, this function assumes they
- * are complete packets (i.e. with L4 header).
+ * Reassembly function, which tries to merge input packets with the
+ * existed packets in the reassembly tables of a given GRO context.
+ * It doesn't check if input packets have correct checksums and doesn't
+ * re-calculate checksums for merged packets. Additionally, it assumes
+ * the packets are complete (i.e., MF==0 && frag_off==0), when IP
+ * fragmentation is possible (i.e., DF==1).
  *
- * If the inputted packets don't have data or are with unsupported GRO
- * types etc., they won't be processed and are returned to applications.
- * Otherwise, the inputted packets are either merged or inserted into
- * the table. If applications want get packets in the table, they need
- * to call flush API.
+ * If the input packets have invalid parameters (e.g. no data payload,
+ * unsupported GRO types), they are returned to applications. Otherwise,
+ * they are either merged or inserted into the table. Applications need
+ * to flush packets from the tables by flush API, if they want to get the
+ * GROed packets.
  *
  * @param pkts
- *  packet to reassemble. Besides, after this function finishes, it
- *  keeps the unprocessed packets (e.g. without data or unsupported
- *  GRO types).
+ *  Packets to reassemble. It's also used to store the unprocessed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer
  *
  * @return
- *  return the number of unprocessed packets (e.g. without data or
- *  unsupported GRO types). If all packets are processed (merged or
- *  inserted into the table), return 0.
+ *  The number of unprocessed packets.
  */
 uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -142,29 +135,28 @@ uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * This function flushes the timeout packets from reassembly tables of
- * desired GRO types. The max number of flushed timeout packets is the
- * element number of the array which is used to keep the flushed packets.
+ * This function flushes the timeout packets from the reassembly tables
+ * of desired GRO types. The max number of flushed packets is the
+ * element number of 'out'.
  *
- * Besides, this function won't re-calculate checksums for merged
- * packets in the tables. That is, the returned packets may be with
- * wrong checksums.
+ * Additionally, the flushed packets may have incorrect checksums, since
+ * this function doesn't re-calculate checksums for merged packets.
  *
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer.
  * @param timeout_cycles
- *  max TTL for packets in reassembly tables, measured in nanosecond.
+ *  The max TTL for packets in reassembly tables, measured in nanosecond.
  * @param gro_types
- *  this function only flushes packets which belong to the GRO types
- *  specified by gro_types.
+ *  This function flushes packets whose GRO types are specified by
+ *  gro_types.
  * @param out
- *  a pointer array that is used to keep flushed timeout packets.
+ *  Pointer array used to keep flushed packets.
  * @param max_nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number of 'out'. It's also the max number of timeout
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of flushed packets. If no packets are flushed, return 0.
+ *  The number of flushed packets.
  */
 uint16_t rte_gro_timeout_flush(void *ctx,
 		uint64_t timeout_cycles,
@@ -180,10 +172,10 @@ uint16_t rte_gro_timeout_flush(void *ctx,
  * of a given GRO context.
  *
  * @param ctx
- *  pointer points to a GRO context object.
+ *  GRO context object pointer.
  *
  * @return
- *  the number of packets in all reassembly tables.
+ *  The number of packets in the tables.
  */
 uint64_t rte_gro_get_pkt_count(void *ctx);
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v4 2/2] gro: support VxLAN GRO
  2018-01-05  6:12     ` [PATCH v4 " Jiayu Hu
  2018-01-05  6:12       ` [PATCH v4 1/2] gro: code cleanup Jiayu Hu
@ 2018-01-05  6:12       ` Jiayu Hu
  2018-01-10 14:03       ` [PATCH v5 0/3] Support " Jiayu Hu
  2 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2018-01-05  6:12 UTC (permalink / raw)
  To: dev
  Cc: bruce.richardson, junjie.j.chen, jianfeng.tan, stephen,
	ferruh.yigit, konstantin.ananyev, lei.a.yao, Jiayu Hu

This patch adds a framework that allows GRO on tunneled packets.
Furthermore, it leverages that framework to provide GRO support for
VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
IPv4 header, and contain an inner TCP/IPv4 packet.

VxLAN GRO doesn't check if input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it assumes
the packets are complete (i.e., MF==0 && frag_off==0), when IP
fragmentation is possible (i.e., DF==0).

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Junjie Chen <junjie.j.chen@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     |  31 +-
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_vxlan_tcp4.c                    | 521 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 184 ++++++++
 lib/librte_gro/rte_gro.c                           | 129 ++++-
 lib/librte_gro/rte_gro.h                           |   5 +-
 6 files changed, 843 insertions(+), 28 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index c2d7a41..078bec0 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
 fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
 6864 to process the IPv4 ID field.
 
-Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets and
+VxLAN packets which contain an outer IPv4 header and an inner TCP/IPv4
+packet.
 
 Two Sets of API
 ---------------
@@ -108,7 +110,8 @@ Reassembly Algorithm
 
 The reassembly algorithm is used for reassembling packets. In the GRO
 library, different GRO types can use different algorithms. In this
-section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
+and VxLAN GRO.
 
 Challenges
 ~~~~~~~~~~
@@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors include:
 - IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
   be increased by 1.
 
+VxLAN GRO
+---------
+
+The table structure used by VxLAN GRO, which is in charge of processing
+VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
+similar with that of TCP/IPv4 GRO. Differently, the header fields used
+to define a VxLAN flow include:
+
+- outer source and destination: Ethernet and IP address, UDP port
+
+- VxLAN header (VNI and flag)
+
+- inner source and destination: Ethernet and IP address, TCP port
+
+Header fields deciding if packets are neighbors include:
+
+- outer IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
+  outer IPv4 header is 0, should be increased by 1.
+
+- inner TCP sequence number
+
+- inner IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
+  inner IPv4 header is 0, should be increased by 1.
+
 .. note::
         We comply RFC 6864 to process the IPv4 ID field. Specifically,
         we check IPv4 ID fields for the packets whose DF bit is 0 and
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index 63df236..bec248f 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -17,6 +17,7 @@ LIBABIVER := 1
 # source files
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c
new file mode 100644
index 0000000..26262f6
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.c
@@ -0,0 +1,521 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_udp.h>
+
+#include "gro_vxlan_tcp4.h"
+
+void *
+gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_vxlan_tcp4_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_vxlan_tcp4_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_vxlan_tcp4_tbl_destroy(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
+
+	if (vxlan_tbl) {
+		rte_free(vxlan_tbl->items);
+		rte_free(vxlan_tbl->flows);
+	}
+	rte_free(vxlan_tbl);
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_item_num = tbl->max_item_num, i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (tbl->items[i].inner_item.firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_flow_num = tbl->max_flow_num, i;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
+		struct rte_mbuf *pkt,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(tbl);
+	if (item_idx == INVALID_ARRAY_INDEX)
+		return INVALID_ARRAY_INDEX;
+
+	tbl->items[item_idx].inner_item.firstseg = pkt;
+	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
+	tbl->items[item_idx].inner_item.start_time = start_time;
+	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
+	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
+	tbl->items[item_idx].inner_item.ip_id = ip_id;
+	tbl->items[item_idx].inner_item.nb_merged = 1;
+	tbl->items[item_idx].inner_item.is_atomic = is_atomic;
+	tbl->items[item_idx].outer_ip_id = outer_ip_id;
+	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
+	tbl->item_num++;
+
+	/* If the previous packet exists, chain the new one with it. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		tbl->items[item_idx].inner_item.next_pkt_idx =
+			tbl->items[prev_idx].inner_item.next_pkt_idx;
+		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_vxlan_tcp4_tbl *tbl,
+		uint32_t item_idx,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
+
+	/* NULL indicates an empty item. */
+	tbl->items[item_idx].inner_item.firstseg = NULL;
+	tbl->item_num--;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
+		struct vxlan_tcp4_flow_key *src,
+		uint32_t item_idx)
+{
+	struct vxlan_tcp4_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	ether_addr_copy(&(src->inner_key.eth_saddr),
+			&(dst->inner_key.eth_saddr));
+	ether_addr_copy(&(src->inner_key.eth_daddr),
+			&(dst->inner_key.eth_daddr));
+	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
+	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
+	dst->inner_key.recv_ack = src->inner_key.recv_ack;
+	dst->inner_key.src_port = src->inner_key.src_port;
+	dst->inner_key.dst_port = src->inner_key.dst_port;
+
+	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
+	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
+	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
+	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
+	dst->outer_ip_src_addr = src->outer_ip_src_addr;
+	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
+	dst->outer_src_port = src->outer_src_port;
+	dst->outer_dst_port = src->outer_dst_port;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+static inline int
+is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
+		struct vxlan_tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr) &&
+			is_same_ether_addr(&k1.outer_eth_daddr,
+				&k2.outer_eth_daddr) &&
+			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
+			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
+			(k1.outer_src_port == k2.outer_src_port) &&
+			(k1.outer_dst_port == k2.outer_dst_port) &&
+			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
+			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
+			is_same_tcp4_flow(k1.inner_key, k2.inner_key));
+}
+
+static inline int
+check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
+		struct tcp_hdr *tcp_hdr,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	int cmp;
+	uint16_t l2_offset;
+
+	/* Don't merge packets whose outer DF bits are different. */
+	if (unlikely(item->outer_is_atomic ^ outer_is_atomic))
+		return 0;
+
+	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
+	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
+			tcp_hl, tcp_dl, l2_offset, is_atomic);
+	if ((cmp == 1) && (outer_is_atomic ||
+				(outer_ip_id == item->outer_ip_id +
+				 item->inner_item.nb_merged)))
+		/* Append the packet. */
+		return 1;
+	else if ((cmp == -1) && (outer_is_atomic ||
+				(outer_ip_id + 1 == item->outer_ip_id)))
+		/* Prepend the packet. */
+		return -1;
+
+	return 0;
+}
+
+static inline int
+merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id)
+{
+	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
+				ip_id, pkt->outer_l2_len +
+				pkt->outer_l3_len)) {
+		item->outer_ip_id = cmp < 0 ? outer_ip_id : item->outer_ip_id;
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void
+update_vxlan_header(struct gro_vxlan_tcp4_item *item)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct udp_hdr *udp_hdr;
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	uint16_t len;
+
+	/* Update the outer IPv4 header. */
+	len = pkt->pkt_len - pkt->outer_l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Update the outer UDP header. */
+	len -= pkt->outer_l3_len;
+	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
+
+	/* Update the inner IPv4 header. */
+	len -= pkt->l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+}
+
+int32_t
+gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time)
+{
+	struct ether_hdr *outer_eth_hdr, *eth_hdr;
+	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
+	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
+	struct vxlan_hdr *vxlan_hdr;
+	uint32_t sent_seq;
+	uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
+	uint8_t outer_is_atomic, is_atomic;
+
+	struct vxlan_tcp4_flow_key key;
+	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t i, max_flow_num, left_flow_num;
+	int cmp;
+	uint16_t hdr_len;
+	uint8_t find;
+
+	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
+			pkt->outer_l2_len);
+	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
+			pkt->outer_l3_len);
+	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
+			sizeof(struct udp_hdr));
+	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
+			sizeof(struct vxlan_hdr));
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+
+	/*
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG,
+	 * ECE or CWR set.
+	 */
+	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
+		return -1;
+
+	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len + pkt->l4_len;
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
+		return -1;
+
+	/*
+	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IPv4 ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
+	outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	outer_ip_id = outer_is_atomic ? 0 :
+		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
+	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
+	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
+	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.inner_key.recv_ack = tcp_hdr->recv_ack;
+	key.inner_key.src_port = tcp_hdr->src_port;
+	key.inner_key.dst_port = tcp_hdr->dst_port;
+
+	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
+	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
+	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
+	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
+	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
+	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
+	key.outer_src_port = udp_hdr->src_port;
+	key.outer_dst_port = udp_hdr->dst_port;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	left_flow_num = tbl->flow_num;
+	find = 0;
+	for (i = 0; i < max_flow_num && left_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
+			if (is_same_vxlan_tcp4_flow(tbl->flows[i].key, key)) {
+				find = 1;
+				break;
+			}
+			left_flow_num--;
+		}
+	}
+
+	/*
+	 * Can't find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (find == 0) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
+				ip_id, outer_is_atomic, is_atomic);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+				INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so
+			 * delete the inserted packet.
+			 */
+			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+
+	/* Check all packets in the flow and try to find a neighbor. */
+	cur_idx = tbl->flows[i].start_index;
+	prev_idx = cur_idx;
+	do {
+		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
+				sent_seq, outer_ip_id, ip_id, pkt->l4_len,
+				tcp_dl, outer_is_atomic, is_atomic);
+		if (cmp) {
+			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
+						pkt, cmp, sent_seq,
+						outer_ip_id, ip_id))
+				return 1;
+			/*
+			 * Can't merge two packets, as the packet
+			 * length will be greater than the max value.
+			 * Insert the packet into the flow.
+			 */
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, outer_ip_id,
+						ip_id, outer_is_atomic,
+						is_atomic) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+		prev_idx = cur_idx;
+		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Can't find neighbor. Insert the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				outer_ip_id, ip_id, outer_is_atomic,
+				is_atomic) == INVALID_ARRAY_INDEX)
+		return -1;
+
+	return 0;
+}
+
+uint16_t
+gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].inner_item.start_time <=
+					flush_timestamp) {
+				out[k++] = tbl->items[j].inner_item.firstseg;
+				if (tbl->items[j].inner_item.nb_merged > 1)
+					update_vxlan_header(&(tbl->items[j]));
+				/*
+				 * Delete the item and get the next packet
+				 * index.
+				 */
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in the flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_vxlan_tcp4_tbl_pkt_count(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
new file mode 100644
index 0000000..66baf73
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.h
@@ -0,0 +1,184 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GRO_VXLAN_TCP4_H_
+#define _GRO_VXLAN_TCP4_H_
+
+#include "gro_tcp4.h"
+
+#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a VxLAN flow */
+struct vxlan_tcp4_flow_key {
+	struct tcp4_flow_key inner_key;
+	struct vxlan_hdr vxlan_hdr;
+
+	struct ether_addr outer_eth_saddr;
+	struct ether_addr outer_eth_daddr;
+
+	uint32_t outer_ip_src_addr;
+	uint32_t outer_ip_dst_addr;
+
+	/* Outer UDP ports */
+	uint16_t outer_src_port;
+	uint16_t outer_dst_port;
+
+};
+
+struct gro_vxlan_tcp4_flow {
+	struct vxlan_tcp4_flow_key key;
+	/*
+	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
+	 * indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+struct gro_vxlan_tcp4_item {
+	struct gro_tcp4_item inner_item;
+	/* IPv4 ID in the outer IPv4 header */
+	uint16_t outer_ip_id;
+	/* Indicate if outer IPv4 ID can be ignored */
+	uint8_t outer_is_atomic;
+};
+
+/*
+ * VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet)
+ * reassembly table structure
+ */
+struct gro_vxlan_tcp4_tbl {
+	/* item array */
+	struct gro_vxlan_tcp4_item *items;
+	/* flow array */
+	struct gro_vxlan_tcp4_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow number */
+	uint32_t flow_num;
+	/* the maximum item number */
+	uint32_t max_item_num;
+	/* the maximum flow number */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a VxLAN reassembly table for VxLAN packets
+ * which have an outer IPv4 header and an inner TCP/IPv4 packet.
+ *
+ * @param socket_id
+ *  Socket index for allocating the table
+ * @param max_flow_num
+ *  The maximum number of flows in the table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a VxLAN reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ */
+void gro_vxlan_tcp4_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a VxLAN packet which has an outer IPv4 header and
+ * an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP
+ * header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which
+ * doesn't have payload.
+ *
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0), when
+ * IP fragmentation is possible (i.e., DF==0). It returns the packet, if
+ * the packet has invalid parameters (e.g. SYN bit is set) or there is no
+ * available space in the table.
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
+ */
+int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in the VxLAN reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  Pointer pointing to a VxLAN GRO table
+ * @param flush_timestamp
+ *  This function flushes packets which are inserted into the table
+ *  before or at the flush_timestamp.
+ * @param out
+ *  Pointer array used to keep flushed packets
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets
+ */
+uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a VxLAN
+ * reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ *
+ * @return
+ *  The number of packets in the table
+ */
+uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 7176c0e..17d7741 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -9,6 +9,7 @@
 
 #include "rte_gro.h"
 #include "gro_tcp4.h"
+#include "gro_vxlan_tcp4.h"
 
 typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -17,15 +18,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
 typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
-		gro_tcp4_tbl_create, NULL};
+		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_destroy, NULL};
+			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
+			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_pkt_count, NULL};
+			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
+			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
 		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
 
+#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
+		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
+		 RTE_PTYPE_TUNNEL_VXLAN) && \
+		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
+		  RTE_PTYPE_INNER_L4_TCP) && \
+		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
+		    (RTE_PTYPE_INNER_L3_IPV4 | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
+
 /*
  * GRO context structure. It keeps the table structures, which are
  * used to merge packets, for different GRO types. Before using
@@ -109,12 +123,20 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
+	/* Allocate a reassembly table for VXLAN GRO */
+	struct gro_vxlan_tcp4_tbl vxlan_tbl;
+	struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
+		{{0}, 0, 0} };
+
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	uint32_t item_num;
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
+	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0;
 
-	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
+	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0))
 		return nb_pkts;
 
 	/* Get the maximum number of packets */
@@ -122,22 +144,47 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
-	for (i = 0; i < item_num; i++)
-		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_tbl.flows = vxlan_flows;
+		vxlan_tbl.items = vxlan_items;
+		vxlan_tbl.flow_num = 0;
+		vxlan_tbl.item_num = 0;
+		vxlan_tbl.max_flow_num = item_num;
+		vxlan_tbl.max_item_num = item_num;
+		do_vxlan_gro = 1;
+	}
 
-	tcp_tbl.flows = tcp_flows;
-	tcp_tbl.items = tcp_items;
-	tcp_tbl.flow_num = 0;
-	tcp_tbl.item_num = 0;
-	tcp_tbl.max_flow_num = item_num;
-	tcp_tbl.max_item_num = item_num;
+	if (param->gro_types & RTE_GRO_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		tcp_tbl.flows = tcp_flows;
+		tcp_tbl.items = tcp_items;
+		tcp_tbl.flow_num = 0;
+		tcp_tbl.item_num = 0;
+		tcp_tbl.max_flow_num = item_num;
+		tcp_tbl.max_item_num = item_num;
+		do_tcp4_gro = 1;
+	}
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
-			/*
-			 * The timestamp is ignored, since all packets
-			 * will be flushed from the tables.
-			 */
+		/*
+		 * The timestamp is ignored, since all packets
+		 * will be flushed from the tables.
+		 */
+		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_gro) {
+			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0);
+			if (ret > 0)
+				/* Merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp4_gro) {
 			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
 			if (ret > 0)
 				/* Merge successfully */
@@ -149,8 +196,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	}
 
 	if (nb_after_gro < nb_pkts) {
+		i = 0;
 		/* Flush all packets from the tables */
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
+		if (do_vxlan_gro) {
+			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
+					0, pkts, nb_pkts);
+		}
+		if (do_tcp4_gro) {
+			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
+					&pkts[i], nb_pkts - i);
+		}
 		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
@@ -169,18 +224,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 {
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
-	void *tcp_tbl;
+	void *tcp_tbl, *vxlan_tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
+	uint8_t do_tcp4_gro, do_vxlan_gro;
 
-	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
+	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0))
 		return nb_pkts;
 
 	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
+	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
+
+	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
+		RTE_GRO_TCP_IPV4;
+	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
+		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
+
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_gro) {
+			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp4_gro) {
 			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
@@ -204,18 +274,27 @@ rte_gro_timeout_flush(void *ctx,
 {
 	struct gro_ctx *gro_ctx = ctx;
 	uint64_t flush_timestamp;
+	uint16_t num = 0;
 
 	gro_types = gro_types & gro_ctx->gro_types;
 	flush_timestamp = rte_rdtsc() - timeout_cycles;
 
-	if (gro_types & RTE_GRO_TCP_IPV4) {
-		return gro_tcp4_tbl_timeout_flush(
+	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
+				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
+				flush_timestamp, out, max_nb_out);
+		max_nb_out -= num;
+	}
+
+	/* If no available space in 'out', stop flushing. */
+	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
+		num += gro_tcp4_tbl_timeout_flush(
 				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
 				flush_timestamp,
-				out, max_nb_out);
+				&out[num], max_nb_out);
 	}
 
-	return 0;
+	return num;
 }
 
 uint64_t
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index 7979a59..f310ae8 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -23,12 +23,15 @@ extern "C" {
  */
 #define RTE_GRO_TYPE_MAX_NUM 64
 /**< the max number of supported GRO types */
-#define RTE_GRO_TYPE_SUPPORT_NUM 1
+#define RTE_GRO_TYPE_SUPPORT_NUM 2
 /**< the number of currently supported GRO types */
 
 #define RTE_GRO_TCP_IPV4_INDEX 0
 #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)
 /**< TCP/IPv4 GRO flag */
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
+/**< VxLAN GRO flag. */
 
 /**
  * Structure used to create GRO context objects or used to pass
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v4 1/2] gro: code cleanup
  2018-01-05  6:12       ` [PATCH v4 1/2] gro: code cleanup Jiayu Hu
@ 2018-01-08  1:15         ` Yao, Lei A
  2018-01-10  0:09         ` Thomas Monjalon
  1 sibling, 0 replies; 31+ messages in thread
From: Yao, Lei A @ 2018-01-08  1:15 UTC (permalink / raw)
  To: Hu, Jiayu, dev
  Cc: Richardson, Bruce, Chen, Junjie J, Tan, Jianfeng, stephen, Yigit,
	Ferruh, Ananyev, Konstantin



> -----Original Message-----
> From: Hu, Jiayu
> Sent: Friday, January 5, 2018 2:13 PM
> To: dev@dpdk.org
> Cc: Richardson, Bruce <bruce.richardson@intel.com>; Chen, Junjie J
> <junjie.j.chen@intel.com>; Tan, Jianfeng <jianfeng.tan@intel.com>;
> stephen@networkplumber.org; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>; Yao, Lei A
> <lei.a.yao@intel.com>; Hu, Jiayu <jiayu.hu@intel.com>
> Subject: [PATCH v4 1/2] gro: code cleanup
> 
> - Remove needless check and variants
> - For better understanding, update the programmer guide and rename
>   internal functions and variants
> - For supporting tunneled gro, move common internal functions from
>   gro_tcp4.c to gro_tcp4.h
> - Comply RFC 6864 to process the IPv4 ID field
> 
> Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
> Reviewed-by: Junjie Chen <junjie.j.chen@intel.com>
Tested-by: Lei Yao<lei.a.yao@intel.com>
I have tested this patch with following traffic follow:
NIC1(In kernel)-->NIC2(pmd, GRO on)-->vhost-user->virtio-net(in VM)
The Iperf test with 1 stream show that GRO VxLAN can improve the 
performance from 6 Gbps(GRO off) to 16 Gbps(GRO on).

> ---
>  .../prog_guide/generic_receive_offload_lib.rst     | 246 ++++++++-------
>  doc/guides/prog_guide/img/gro-key-algorithm.svg    | 223
> ++++++++++++++
>  lib/librte_gro/gro_tcp4.c                          | 339 +++++++--------------
>  lib/librte_gro/gro_tcp4.h                          | 253 ++++++++++-----
>  lib/librte_gro/rte_gro.c                           | 102 +++----
>  lib/librte_gro/rte_gro.h                           |  92 +++---
>  6 files changed, 750 insertions(+), 505 deletions(-)
>  create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.svg
> 
> diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> index 22e50ec..c2d7a41 100644
> --- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
> +++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
> @@ -32,128 +32,162 @@ Generic Receive Offload Library
>  ===============================
> 
>  Generic Receive Offload (GRO) is a widely used SW-based offloading
> -technique to reduce per-packet processing overhead. It gains performance
> -by reassembling small packets into large ones. To enable more flexibility
> -to applications, DPDK implements GRO as a standalone library. Applications
> -explicitly use the GRO library to merge small packets into large ones.
> -
> -The GRO library assumes all input packets have correct checksums. In
> -addition, the GRO library doesn't re-calculate checksums for merged
> -packets. If input packets are IP fragmented, the GRO library assumes
> -they are complete packets (i.e. with L4 headers).
> -
> -Currently, the GRO library implements TCP/IPv4 packet reassembly.
> -
> -Reassembly Modes
> -----------------
> -
> -The GRO library provides two reassembly modes: lightweight and
> -heavyweight mode. If applications want to merge packets in a simple way,
> -they can use the lightweight mode API. If applications want more
> -fine-grained controls, they can choose the heavyweight mode API.
> -
> -Lightweight Mode
> -~~~~~~~~~~~~~~~~
> -
> -The ``rte_gro_reassemble_burst()`` function is used for reassembly in
> -lightweight mode. It tries to merge N input packets at a time, where
> -N should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
> -
> -In each invocation, ``rte_gro_reassemble_burst()`` allocates temporary
> -reassembly tables for the desired GRO types. Note that the reassembly
> -table is a table structure used to reassemble packets and different GRO
> -types (e.g. TCP/IPv4 GRO and TCP/IPv6 GRO) have different reassembly
> table
> -structures. The ``rte_gro_reassemble_burst()`` function uses the
> reassembly
> -tables to merge the N input packets.
> -
> -For applications, performing GRO in lightweight mode is simple. They
> -just need to invoke ``rte_gro_reassemble_burst()``. Applications can get
> -GROed packets as soon as ``rte_gro_reassemble_burst()`` returns.
> -
> -Heavyweight Mode
> -~~~~~~~~~~~~~~~~
> -
> -The ``rte_gro_reassemble()`` function is used for reassembly in
> heavyweight
> -mode. Compared with the lightweight mode, performing GRO in
> heavyweight mode
> -is relatively complicated.
> -
> -Before performing GRO, applications need to create a GRO context object
> -by calling ``rte_gro_ctx_create()``. A GRO context object holds the
> -reassembly tables of desired GRO types. Note that all update/lookup
> -operations on the context object are not thread safe. So if different
> -processes or threads want to access the same context object
> simultaneously,
> -some external syncing mechanisms must be used.
> -
> -Once the GRO context is created, applications can then use the
> -``rte_gro_reassemble()`` function to merge packets. In each invocation,
> -``rte_gro_reassemble()`` tries to merge input packets with the packets
> -in the reassembly tables. If an input packet is an unsupported GRO type,
> -or other errors happen (e.g. SYN bit is set), ``rte_gro_reassemble()``
> -returns the packet to applications. Otherwise, the input packet is either
> -merged or inserted into a reassembly table.
> -
> -When applications want to get GRO processed packets, they need to use
> -``rte_gro_timeout_flush()`` to flush them from the tables manually.
> +technique to reduce per-packet processing overheads. By reassembling
> +small packets into larger ones, GRO enables applications to process
> +fewer large packets directly, thus reducing the number of packets to
> +be processed. To benefit DPDK-based applications, like Open vSwitch,
> +DPDK also provides own GRO implementation. In DPDK, GRO is
> implemented
> +as a standalone library. Applications explicitly use the GRO library to
> +reassemble packets.
> +
> +Overview
> +--------
> +
> +In the GRO library, there are many GRO types which are defined by packet
> +types. One GRO type is in charge of process one kind of packets. For
> +example, TCP/IPv4 GRO processes TCP/IPv4 packets.
> +
> +Each GRO type has a reassembly function, which defines own algorithm and
> +table structure to reassemble packets. We assign input packets to the
> +corresponding GRO functions by MBUF->packet_type.
> +
> +The GRO library doesn't check if input packets have correct checksums and
> +doesn't re-calculate checksums for merged packets. The GRO library
> +assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
> +fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
> +6864 to process the IPv4 ID field.
> 
> -TCP/IPv4 GRO
> -------------
> +Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
> +
> +Two Sets of API
> +---------------
> +
> +For different usage scenarios, the GRO library provides two sets of API.
> +The one is called the lightweight mode API, which enables applications to
> +merge a small number of packets rapidly; the other is called the
> +heavyweight mode API, which provides fine-grained controls to
> +applications and supports to merge a large number of packets.
> +
> +Lightweight Mode API
> +~~~~~~~~~~~~~~~~~~~~
> +
> +The lightweight mode only has one function ``rte_gro_reassemble_burst()``,
> +which process N packets at a time. Using the lightweight mode API to
> +merge packets is very simple. Calling ``rte_gro_reassemble_burst()`` is
> +enough. The GROed packets are returned to applications as soon as it
> +finishes.
> +
> +In ``rte_gro_reassemble_burst()``, table structures of different GRO
> +types are allocated in the stack. This design simplifies applications'
> +operations. However, limited by the stack size, the maximum number of
> +packets that ``rte_gro_reassemble_burst()`` can process in an invocation
> +should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
> +
> +Heavyweight Mode API
> +~~~~~~~~~~~~~~~~~~~~
> +
> +Compared with the lightweight mode, using the heavyweight mode API is
> +relatively complex. Firstly, applications need to create a GRO context
> +by ``rte_gro_ctx_create()``. ``rte_gro_ctx_create()`` allocates tables
> +structures in the heap and stores their pointers in the GRO context.
> +Secondly, applications use ``rte_gro_reassemble()`` to merge packets.
> +If input packets have invalid parameters, ``rte_gro_reassemble()``
> +returns them to applications. For example, packets of unsupported GRO
> +types or TCP SYN packets are returned. Otherwise, the input packets are
> +either merged with the existed packets in the tables or inserted into the
> +tables. Finally, applications use ``rte_gro_timeout_flush()`` to flush
> +packets from the tables, when they want to get the GROed packets.
> +
> +Note that all update/lookup operations on the GRO context are not thread
> +safe. So if different processes or threads want to access the same
> +context object simultaneously, some external syncing mechanisms must be
> +used.
> +
> +Reassembly Algorithm
> +--------------------
> +
> +The reassembly algorithm is used for reassembling packets. In the GRO
> +library, different GRO types can use different algorithms. In this
> +section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
> 
> -TCP/IPv4 GRO supports merging small TCP/IPv4 packets into large ones,
> -using a table structure called the TCP/IPv4 reassembly table.
> +Challenges
> +~~~~~~~~~~
> 
> -TCP/IPv4 Reassembly Table
> -~~~~~~~~~~~~~~~~~~~~~~~~~
> +The reassembly algorithm determines the efficiency of GRO. There are two
> +challenges in the algorithm design:
> 
> -A TCP/IPv4 reassembly table includes a "key" array and an "item" array.
> -The key array keeps the criteria to merge packets and the item array
> -keeps the packet information.
> +- a high cost algorithm/implementation would cause packet dropping in a
> +  high speed network.
> 
> -Each key in the key array points to an item group, which consists of
> -packets which have the same criteria values but can't be merged. A key
> -in the key array includes two parts:
> +- packet reordering makes it hard to merge packets. For example, Linux
> +  GRO fails to merge packets when encounters packet reordering.
> 
> -* ``criteria``: the criteria to merge packets. If two packets can be
> -  merged, they must have the same criteria values.
> +The above two challenges require our algorithm is:
> 
> -* ``start_index``: the item array index of the first packet in the item
> -  group.
> +- lightweight enough to scale fast networking speed
> 
> -Each element in the item array keeps the information of a packet. An item
> -in the item array mainly includes three parts:
> +- capable of handling packet reordering
> 
> -* ``firstseg``: the mbuf address of the first segment of the packet.
> +In DPDK GRO, we use a key-based algorithm to address the two challenges.
> 
> -* ``lastseg``: the mbuf address of the last segment of the packet.
> +Key-based Reassembly Algorithm
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +:numref:`figure_gro-key-algorithm` illustrates the procedure of the
> +key-based algorithm. Packets are classified into "flows" by some header
> +fields (we call them as "key"). To process an input packet, the algorithm
> +searches for a matched "flow" (i.e., the same value of key) for the
> +packet first, then checks all packets in the "flow" and tries to find a
> +"neighbor" for it. If find a "neighbor", merge the two packets together.
> +If can't find a "neighbor", store the packet into its "flow". If can't
> +find a matched "flow", insert a new "flow" and store the packet into the
> +"flow".
> +
> +.. note::
> +        Packets in the same "flow" that can't merge are always caused
> +        by packet reordering.
> +
> +The key-based algorithm has two characters:
> +
> +- classifying packets into "flows" to accelerate packet aggregation is
> +  simple (address challenge 1).
> +
> +- storing out-of-order packets makes it possible to merge later (address
> +  challenge 2).
> +
> +.. _figure_gro-key-algorithm:
> +
> +.. figure:: img/gro-key-algorithm.*
> +   :align: center
> +
> +   Key-based Reassembly Algorithm
> +
> +TCP/IPv4 GRO
> +------------
> 
> -* ``next_pkt_index``: the item array index of the next packet in the same
> -  item group. TCP/IPv4 GRO uses ``next_pkt_index`` to chain the packets
> -  that have the same criteria value but can't be merged together.
> +The table structure used by TCP/IPv4 GRO contains two arrays: flow array
> +and item array. The flow array keeps flow information, and the item array
> +keeps packet information.
> 
> -Procedure to Reassemble a Packet
> -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +Header fields used to define a TCP/IPv4 flow include:
> 
> -To reassemble an incoming packet needs three steps:
> +- source and destination: Ethernet and IP address, TCP port
> 
> -#. Check if the packet should be processed. Packets with one of the
> -   following properties aren't processed and are returned immediately:
> +- TCP acknowledge number
> 
> -   * FIN, SYN, RST, URG, PSH, ECE or CWR bit is set.
> +TCP/IPv4 packets whose FIN, SYN, RST, URG, PSH, ECE or CWR bit is set
> +won't be processed.
> 
> -   * L4 payload length is 0.
> +Header fields deciding if two packets are neighbors include:
> 
> -#.  Traverse the key array to find a key which has the same criteria
> -    value with the incoming packet. If found, go to the next step.
> -    Otherwise, insert a new key and a new item for the packet.
> +- TCP sequence number
> 
> -#. Locate the first packet in the item group via ``start_index``. Then
> -   traverse all packets in the item group via ``next_pkt_index``. If a
> -   packet is found which can be merged with the incoming one, merge them
> -   together. If one isn't found, insert the packet into this item group.
> -   Note that to merge two packets is to link them together via mbuf's
> -   ``next`` field.
> +- IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
> +  be increased by 1.
> 
> -When packets are flushed from the reassembly table, TCP/IPv4 GRO
> updates
> -packet header fields for the merged packets. Note that before reassembling
> -the packet, TCP/IPv4 GRO doesn't check if the checksums of packets are
> -correct. Also, TCP/IPv4 GRO doesn't re-calculate checksums for merged
> -packets.
> +.. note::
> +        We comply RFC 6864 to process the IPv4 ID field. Specifically,
> +        we check IPv4 ID fields for the packets whose DF bit is 0 and
> +        ignore IPv4 ID fields for the packets whose DF bit is 1.
> +        Additionally, packets which have different value of DF bit can't
> +        be merged.
> diff --git a/doc/guides/prog_guide/img/gro-key-algorithm.svg
> b/doc/guides/prog_guide/img/gro-key-algorithm.svg
> new file mode 100644
> index 0000000..94e42f5
> --- /dev/null
> +++ b/doc/guides/prog_guide/img/gro-key-algorithm.svg
> @@ -0,0 +1,223 @@
> +<?xml version="1.0" encoding="UTF-8" standalone="no"?>
> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"
> "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
> +<!-- Generated by Microsoft Visio 11.0, SVG Export, v1.0 gro-key-
> algorithm.svg Page-1 -->
> +<svg xmlns="http://www.w3.org/2000/svg"
> xmlns:xlink="http://www.w3.org/1999/xlink"
> xmlns:ev="http://www.w3.org/2001/xml-events"
> +
> 	xmlns:v="http://schemas.microsoft.com/visio/2003/SVGExtensions/
> " width="6.06163in" height="2.66319in"
> +		viewBox="0 0 436.438 191.75" xml:space="preserve" color-
> interpolation-filters="sRGB" class="st10">
> +	<v:documentProperties v:langID="1033" v:viewMarkup="false"/>
> +
> +	<style type="text/css">
> +	<![CDATA[
> +		.st1 {fill:url(#grad30-4);stroke:#404040;stroke-
> linecap:round;stroke-linejoin:round;stroke-width:0.25}
> +		.st2 {fill:#000000;font-family:Calibri;font-size:1.00001em}
> +		.st3 {font-size:1em;font-weight:bold}
> +		.st4 {fill:#000000;font-family:Calibri;font-size:1.00001em;font-
> weight:bold}
> +		.st5 {font-size:1em;font-weight:normal}
> +		.st6 {marker-end:url(#mrkr5-38);stroke:#404040;stroke-
> linecap:round;stroke-linejoin:round;stroke-width:1}
> +		.st7 {fill:#404040;fill-opacity:1;stroke:#404040;stroke-
> opacity:1;stroke-width:0.28409090909091}
> +		.st8 {fill:none;stroke:none;stroke-linecap:round;stroke-
> linejoin:round;stroke-width:0.25}
> +		.st9 {fill:#000000;font-family:Calibri;font-size:0.833336em}
> +		.st10 {fill:none;fill-rule:evenodd;font-
> size:12px;overflow:visible;stroke-linecap:square;stroke-miterlimit:3}
> +	]]>
> +	</style>
> +
> +	<defs id="Patterns_And_Gradients">
> +		<linearGradient id="grad30-4" v:fillPattern="30"
> v:foreground="#c6d09f" v:background="#d1dab4" x1="0" y1="1" x2="0"
> y2="0">
> +			<stop offset="0" style="stop-color:#c6d09f;stop-
> opacity:1"/>
> +			<stop offset="1" style="stop-color:#d1dab4;stop-
> opacity:1"/>
> +		</linearGradient>
> +		<linearGradient id="grad30-35" v:fillPattern="30"
> v:foreground="#f0f0f0" v:background="#ffffff" x1="0" y1="1" x2="0" y2="0">
> +			<stop offset="0" style="stop-color:#f0f0f0;stop-
> opacity:1"/>
> +			<stop offset="1" style="stop-color:#ffffff;stop-
> opacity:1"/>
> +		</linearGradient>
> +	</defs>
> +	<defs id="Markers">
> +		<g id="lend5">
> +			<path d="M 2 1 L 0 0 L 1.98117 -0.993387 C 1.67173 -
> 0.364515 1.67301 0.372641 1.98465 1.00043 " style="stroke:none"/>
> +		</g>
> +		<marker id="mrkr5-38" class="st7" v:arrowType="5"
> v:arrowSize="2" v:setback="6.16" refX="-6.16" orient="auto"
> +				markerUnits="strokeWidth"
> overflow="visible">
> +			<use xlink:href="#lend5" transform="scale(-3.52,-
> 3.52) "/>
> +		</marker>
> +	</defs>
> +	<g v:mID="0" v:index="1" v:groupContext="foregroundPage">
> +		<title>Page-1</title>
> +		<v:pageProperties v:drawingScale="1" v:pageScale="1"
> v:drawingUnits="0" v:shadowOffsetX="9" v:shadowOffsetY="-9"/>
> +		<v:layer v:name="Connector" v:index="0"/>
> +		<g id="shape1-1" v:mID="1" v:groupContext="shape"
> transform="translate(0.25,-117.25)">
> +			<title>Rounded rectangle</title>
> +			<desc>Categorize into an existed “flow”</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="90" cy="173.75" width="180"
> height="36"/>
> +			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180
> 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
> +						 A9.00007 9.00007 -180 0 0 -0
> 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
> +					class="st1"/>
> +			<text x="8.91" y="177.35" class="st2"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Categorize into
> an <tspan
> +
> 	class="st3">existed</tspan><tspan class="st3" v:langID="2052">
> </tspan>“<tspan class="st3">flow</tspan>”</text>		</g>
> +		<g id="shape2-9" v:mID="2" v:groupContext="shape"
> transform="translate(0.25,-58.75)">
> +			<title>Rounded rectangle.2</title>
> +			<desc>Search for a “neighbor”</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="90" cy="173.75" width="180"
> height="36"/>
> +			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180
> 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
> +						 A9.00007 9.00007 -180 0 0 -0
> 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
> +					class="st1"/>
> +			<text x="32.19" y="177.35" class="st2"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Search for a
> “<tspan
> +
> 	class="st3">neighbor</tspan>”</text>		</g>
> +		<g id="shape3-14" v:mID="3" v:groupContext="shape"
> transform="translate(225.813,-117.25)">
> +			<title>Rounded rectangle.3</title>
> +			<desc>Insert a new “flow” and store the
> packet</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="105.188" cy="173.75" width="210.38"
> height="36"/>
> +			<path d="M201.37 191.75 A9.00007 9.00007 -180 0 0
> 210.37 182.75 L210.37 164.75 A9.00007 9.00007 -180 0 0 201.37 155.75
> +						 L9 155.75 A9.00007 9.00007 -
> 180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L201.37 191.75
> +						 Z" class="st1"/>
> +			<text x="5.45" y="177.35" class="st2"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Insert a <tspan
> +						class="st3">new
> </tspan>“<tspan class="st3">flow</tspan>” and <tspan class="st3">store
> </tspan>the packet</text>		</g>
> +		<g id="shape4-21" v:mID="4" v:groupContext="shape"
> transform="translate(225.25,-58.75)">
> +			<title>Rounded rectangle.4</title>
> +			<desc>Store the packet</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="83.25" cy="173.75" width="166.5"
> height="36"/>
> +			<path d="M157.5 191.75 A9.00007 9.00007 -180 0 0
> 166.5 182.75 L166.5 164.75 A9.00007 9.00007 -180 0 0 157.5 155.75 L9
> +						 155.75 A9.00007 9.00007 -180
> 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L157.5 191.75 Z"
> +					class="st1"/>
> +			<text x="42.81" y="177.35" class="st4"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Store <tspan
> +						class="st5">the
> packet</tspan></text>		</g>
> +		<g id="shape5-26" v:mID="5" v:groupContext="shape"
> transform="translate(0.25,-0.25)">
> +			<title>Rounded rectangle.5</title>
> +			<desc>Merge the packet</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="90" cy="173.75" width="180"
> height="36"/>
> +			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180
> 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
> +						 A9.00007 9.00007 -180 0 0 -0
> 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
> +					class="st1"/>
> +			<text x="46.59" y="177.35" class="st4"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Merge <tspan
> +						class="st5">the
> packet</tspan></text>		</g>
> +		<g id="shape6-31" v:mID="6" v:groupContext="shape"
> v:layerMember="0" transform="translate(81.25,-175.75)">
> +			<title>Dynamic connector</title>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<path d="M9 191.75 L9 208.09" class="st6"/>
> +		</g>
> +		<g id="shape7-39" v:mID="7" v:groupContext="shape"
> v:layerMember="0" transform="translate(81.25,-117.25)">
> +			<title>Dynamic connector.7</title>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<path d="M9 191.75 L9 208.09" class="st6"/>
> +		</g>
> +		<g id="shape8-45" v:mID="8" v:groupContext="shape"
> v:layerMember="0" transform="translate(81.25,-58.75)">
> +			<title>Dynamic connector.8</title>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<path d="M9 191.75 L9 208.09" class="st6"/>
> +		</g>
> +		<g id="shape9-51" v:mID="9" v:groupContext="shape"
> v:layerMember="0" transform="translate(180.25,-126.25)">
> +			<title>Dynamic connector.9</title>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<path d="M0 182.75 L39.4 182.75" class="st6"/>
> +		</g>
> +		<g id="shape10-57" v:mID="10" v:groupContext="shape"
> v:layerMember="0" transform="translate(180.25,-67.75)">
> +			<title>Dynamic connector.10</title>
> +			<v:userDefs>
> +				<v:ud v:nameU="visVersion"
> v:val="VT0(14):26"/>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<path d="M0 182.75 L38.84 182.75" class="st6"/>
> +		</g>
> +		<g id="shape11-63" v:mID="11" v:groupContext="shape"
> transform="translate(65.5,-173.5)">
> +			<title>Sheet.11</title>
> +			<desc>packet</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="24.75" cy="182.75" width="49.5"
> height="18"/>
> +			<rect x="0" y="173.75" width="49.5" height="18"
> class="st8"/>
> +			<text x="8.46" y="186.35" class="st2"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>packet</text>
> 		</g>
> +		<g id="shape14-66" v:mID="14" v:groupContext="shape"
> transform="translate(98.125,-98.125)">
> +			<title>Sheet.14</title>
> +			<desc>find a “flow”</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="32.0625" cy="183.875" width="64.13"
> height="15.75"/>
> +			<rect x="0" y="176" width="64.125" height="15.75"
> class="st8"/>
> +			<text x="6.41" y="186.88" class="st9"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>find a
> “flow”</text>		</g>
> +		<g id="shape15-69" v:mID="15" v:groupContext="shape"
> transform="translate(99.25,-39.625)">
> +			<title>Sheet.15</title>
> +			<desc>find a “neighbor”</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="40.5" cy="183.875" width="81"
> height="15.75"/>
> +			<rect x="0" y="176" width="81" height="15.75"
> class="st8"/>
> +			<text x="5.48" y="186.88" class="st9"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>find a
> “neighbor”</text>		</g>
> +		<g id="shape13-72" v:mID="13" v:groupContext="shape"
> transform="translate(181.375,-79)">
> +			<title>Sheet.13</title>
> +			<desc>not find</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="21.375" cy="183.875" width="42.75"
> height="15.75"/>
> +			<rect x="0" y="176" width="42.75" height="15.75"
> class="st8"/>
> +			<text x="5.38" y="186.88" class="st9"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>not find</text>
> 		</g>
> +		<g id="shape12-75" v:mID="12" v:groupContext="shape"
> transform="translate(181.375,-137.5)">
> +			<title>Sheet.12</title>
> +			<desc>not find</desc>
> +			<v:userDefs>
> +				<v:ud v:nameU="msvThemeColors"
> v:val="VT0(36):26"/>
> +				<v:ud v:nameU="msvThemeEffects"
> v:val="VT0(16):26"/>
> +			</v:userDefs>
> +			<v:textBlock v:margins="rect(4,4,4,4)"/>
> +			<v:textRect cx="21.375" cy="183.875" width="42.75"
> height="15.75"/>
> +			<rect x="0" y="176" width="42.75" height="15.75"
> class="st8"/>
> +			<text x="5.38" y="186.88" class="st9"
> v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>not find</text>
> 		</g>
> +	</g>
> +</svg>
> diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
> index 03e5ccf..27af23e 100644
> --- a/lib/librte_gro/gro_tcp4.c
> +++ b/lib/librte_gro/gro_tcp4.c
> @@ -6,8 +6,6 @@
>  #include <rte_mbuf.h>
>  #include <rte_cycles.h>
>  #include <rte_ethdev.h>
> -#include <rte_ip.h>
> -#include <rte_tcp.h>
> 
>  #include "gro_tcp4.h"
> 
> @@ -44,20 +42,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
>  	}
>  	tbl->max_item_num = entries_num;
> 
> -	size = sizeof(struct gro_tcp4_key) * entries_num;
> -	tbl->keys = rte_zmalloc_socket(__func__,
> +	size = sizeof(struct gro_tcp4_flow) * entries_num;
> +	tbl->flows = rte_zmalloc_socket(__func__,
>  			size,
>  			RTE_CACHE_LINE_SIZE,
>  			socket_id);
> -	if (tbl->keys == NULL) {
> +	if (tbl->flows == NULL) {
>  		rte_free(tbl->items);
>  		rte_free(tbl);
>  		return NULL;
>  	}
> -	/* INVALID_ARRAY_INDEX indicates empty key */
> +	/* INVALID_ARRAY_INDEX indicates an empty flow */
>  	for (i = 0; i < entries_num; i++)
> -		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
> -	tbl->max_key_num = entries_num;
> +		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
> +	tbl->max_flow_num = entries_num;
> 
>  	return tbl;
>  }
> @@ -69,116 +67,15 @@ gro_tcp4_tbl_destroy(void *tbl)
> 
>  	if (tcp_tbl) {
>  		rte_free(tcp_tbl->items);
> -		rte_free(tcp_tbl->keys);
> +		rte_free(tcp_tbl->flows);
>  	}
>  	rte_free(tcp_tbl);
>  }
> 
> -/*
> - * merge two TCP/IPv4 packets without updating checksums.
> - * If cmp is larger than 0, append the new packet to the
> - * original packet. Otherwise, pre-pend the new packet to
> - * the original packet.
> - */
> -static inline int
> -merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
> -		struct rte_mbuf *pkt,
> -		uint16_t ip_id,
> -		uint32_t sent_seq,
> -		int cmp)
> -{
> -	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> -	uint16_t tcp_datalen;
> -
> -	if (cmp > 0) {
> -		pkt_head = item_src->firstseg;
> -		pkt_tail = pkt;
> -	} else {
> -		pkt_head = pkt;
> -		pkt_tail = item_src->firstseg;
> -	}
> -
> -	/* check if the packet length will be beyond the max value */
> -	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
> -		pkt_tail->l3_len - pkt_tail->l4_len;
> -	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
> -			TCP4_MAX_L3_LENGTH)
> -		return 0;
> -
> -	/* remove packet header for the tail packet */
> -	rte_pktmbuf_adj(pkt_tail,
> -			pkt_tail->l2_len +
> -			pkt_tail->l3_len +
> -			pkt_tail->l4_len);
> -
> -	/* chain two packets together */
> -	if (cmp > 0) {
> -		item_src->lastseg->next = pkt;
> -		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
> -		/* update IP ID to the larger value */
> -		item_src->ip_id = ip_id;
> -	} else {
> -		lastseg = rte_pktmbuf_lastseg(pkt);
> -		lastseg->next = item_src->firstseg;
> -		item_src->firstseg = pkt;
> -		/* update sent_seq to the smaller value */
> -		item_src->sent_seq = sent_seq;
> -	}
> -	item_src->nb_merged++;
> -
> -	/* update mbuf metadata for the merged packet */
> -	pkt_head->nb_segs += pkt_tail->nb_segs;
> -	pkt_head->pkt_len += pkt_tail->pkt_len;
> -
> -	return 1;
> -}
> -
> -static inline int
> -check_seq_option(struct gro_tcp4_item *item,
> -		struct tcp_hdr *tcp_hdr,
> -		uint16_t tcp_hl,
> -		uint16_t tcp_dl,
> -		uint16_t ip_id,
> -		uint32_t sent_seq)
> -{
> -	struct rte_mbuf *pkt0 = item->firstseg;
> -	struct ipv4_hdr *ipv4_hdr0;
> -	struct tcp_hdr *tcp_hdr0;
> -	uint16_t tcp_hl0, tcp_dl0;
> -	uint16_t len;
> -
> -	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
> -			pkt0->l2_len);
> -	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
> -	tcp_hl0 = pkt0->l4_len;
> -
> -	/* check if TCP option fields equal. If not, return 0. */
> -	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
> -	if ((tcp_hl != tcp_hl0) ||
> -			((len > 0) && (memcmp(tcp_hdr + 1,
> -					tcp_hdr0 + 1,
> -					len) != 0)))
> -		return 0;
> -
> -	/* check if the two packets are neighbors */
> -	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
> -	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
> -			(ip_id == (item->ip_id + 1)))
> -		/* append the new packet */
> -		return 1;
> -	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
> -			((ip_id + item->nb_merged) == item->ip_id))
> -		/* pre-pend the new packet */
> -		return -1;
> -	else
> -		return 0;
> -}
> -
>  static inline uint32_t
>  find_an_empty_item(struct gro_tcp4_tbl *tbl)
>  {
> -	uint32_t i;
> -	uint32_t max_item_num = tbl->max_item_num;
> +	uint32_t max_item_num = tbl->max_item_num, i;
> 
>  	for (i = 0; i < max_item_num; i++)
>  		if (tbl->items[i].firstseg == NULL)
> @@ -187,13 +84,12 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
>  }
> 
>  static inline uint32_t
> -find_an_empty_key(struct gro_tcp4_tbl *tbl)
> +find_an_empty_flow(struct gro_tcp4_tbl *tbl)
>  {
> -	uint32_t i;
> -	uint32_t max_key_num = tbl->max_key_num;
> +	uint32_t max_flow_num = tbl->max_flow_num, i;
> 
> -	for (i = 0; i < max_key_num; i++)
> -		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
> +	for (i = 0; i < max_flow_num; i++)
> +		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
>  			return i;
>  	return INVALID_ARRAY_INDEX;
>  }
> @@ -201,10 +97,11 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
>  static inline uint32_t
>  insert_new_item(struct gro_tcp4_tbl *tbl,
>  		struct rte_mbuf *pkt,
> -		uint16_t ip_id,
> -		uint32_t sent_seq,
> +		uint64_t start_time,
>  		uint32_t prev_idx,
> -		uint64_t start_time)
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint8_t is_atomic)
>  {
>  	uint32_t item_idx;
> 
> @@ -219,9 +116,10 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
>  	tbl->items[item_idx].sent_seq = sent_seq;
>  	tbl->items[item_idx].ip_id = ip_id;
>  	tbl->items[item_idx].nb_merged = 1;
> +	tbl->items[item_idx].is_atomic = is_atomic;
>  	tbl->item_num++;
> 
> -	/* if the previous packet exists, chain the new one with it */
> +	/* If the previous packet exists, chain them together. */
>  	if (prev_idx != INVALID_ARRAY_INDEX) {
>  		tbl->items[item_idx].next_pkt_idx =
>  			tbl->items[prev_idx].next_pkt_idx;
> @@ -232,12 +130,13 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
>  }
> 
>  static inline uint32_t
> -delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
> +delete_item(struct gro_tcp4_tbl *tbl,
> +		uint32_t item_idx,
>  		uint32_t prev_item_idx)
>  {
>  	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
> 
> -	/* set NULL to firstseg to indicate it's an empty item */
> +	/* NULL indicates an empty item. */
>  	tbl->items[item_idx].firstseg = NULL;
>  	tbl->item_num--;
>  	if (prev_item_idx != INVALID_ARRAY_INDEX)
> @@ -247,53 +146,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t
> item_idx,
>  }
> 
>  static inline uint32_t
> -insert_new_key(struct gro_tcp4_tbl *tbl,
> -		struct tcp4_key *key_src,
> +insert_new_flow(struct gro_tcp4_tbl *tbl,
> +		struct tcp4_flow_key *src,
>  		uint32_t item_idx)
>  {
> -	struct tcp4_key *key_dst;
> -	uint32_t key_idx;
> +	struct tcp4_flow_key *dst;
> +	uint32_t flow_idx;
> 
> -	key_idx = find_an_empty_key(tbl);
> -	if (key_idx == INVALID_ARRAY_INDEX)
> +	flow_idx = find_an_empty_flow(tbl);
> +	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
>  		return INVALID_ARRAY_INDEX;
> 
> -	key_dst = &(tbl->keys[key_idx].key);
> +	dst = &(tbl->flows[flow_idx].key);
> 
> -	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
> -	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
> -	key_dst->ip_src_addr = key_src->ip_src_addr;
> -	key_dst->ip_dst_addr = key_src->ip_dst_addr;
> -	key_dst->recv_ack = key_src->recv_ack;
> -	key_dst->src_port = key_src->src_port;
> -	key_dst->dst_port = key_src->dst_port;
> +	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
> +	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
> +	dst->ip_src_addr = src->ip_src_addr;
> +	dst->ip_dst_addr = src->ip_dst_addr;
> +	dst->recv_ack = src->recv_ack;
> +	dst->src_port = src->src_port;
> +	dst->dst_port = src->dst_port;
> 
> -	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
> -	tbl->keys[key_idx].start_index = item_idx;
> -	tbl->key_num++;
> +	tbl->flows[flow_idx].start_index = item_idx;
> +	tbl->flow_num++;
> 
> -	return key_idx;
> +	return flow_idx;
>  }
> 
> -static inline int
> -is_same_key(struct tcp4_key k1, struct tcp4_key k2)
> -{
> -	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
> -		return 0;
> -
> -	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
> -		return 0;
> -
> -	return ((k1.ip_src_addr == k2.ip_src_addr) &&
> -			(k1.ip_dst_addr == k2.ip_dst_addr) &&
> -			(k1.recv_ack == k2.recv_ack) &&
> -			(k1.src_port == k2.src_port) &&
> -			(k1.dst_port == k2.dst_port));
> -}
> -
> -/*
> - * update packet length for the flushed packet.
> - */
>  static inline void
>  update_header(struct gro_tcp4_item *item)
>  {
> @@ -315,84 +194,106 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  	struct ipv4_hdr *ipv4_hdr;
>  	struct tcp_hdr *tcp_hdr;
>  	uint32_t sent_seq;
> -	uint16_t tcp_dl, ip_id;
> +	uint16_t tcp_dl, ip_id, frag_off, hdr_len;
> +	uint8_t is_atomic;
> 
> -	struct tcp4_key key;
> +	struct tcp4_flow_key key;
>  	uint32_t cur_idx, prev_idx, item_idx;
> -	uint32_t i, max_key_num;
> +	uint32_t i, max_flow_num, left_flow_num;
>  	int cmp;
> +	uint8_t find;
> 
>  	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
>  	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
>  	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
> +	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> 
>  	/*
> -	 * if FIN, SYN, RST, PSH, URG, ECE or
> -	 * CWR is set, return immediately.
> +	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> +	 * or CWR set.
>  	 */
>  	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
>  		return -1;
> -	/* if payload length is 0, return immediately */
> -	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
> -		pkt->l4_len;
> -	if (tcp_dl == 0)
> +	/*
> +	 * Don't process the packet whose payload length is less than or
> +	 * equal to 0.
> +	 */
> +	tcp_dl = pkt->pkt_len - hdr_len;
> +	if (tcp_dl <= 0)
>  		return -1;
> 
> -	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
> +	/*
> +	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
> +	 * whose DF bit is 1, IPv4 ID is ignored.
> +	 */
> +	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
> +	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
> +	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
>  	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
> 
>  	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
>  	ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr));
>  	key.ip_src_addr = ipv4_hdr->src_addr;
>  	key.ip_dst_addr = ipv4_hdr->dst_addr;
> +	key.recv_ack = tcp_hdr->recv_ack;
>  	key.src_port = tcp_hdr->src_port;
>  	key.dst_port = tcp_hdr->dst_port;
> -	key.recv_ack = tcp_hdr->recv_ack;
> 
> -	/* search for a key */
> -	max_key_num = tbl->max_key_num;
> -	for (i = 0; i < max_key_num; i++) {
> -		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
> -				is_same_key(tbl->keys[i].key, key))
> -			break;
> +	/* Search for a matched flow. */
> +	max_flow_num = tbl->max_flow_num;
> +	left_flow_num = tbl->flow_num;
> +	find = 0;
> +	for (i = 0; i < max_flow_num && left_flow_num; i++) {
> +		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
> +			if (is_same_tcp4_flow(tbl->flows[i].key, key)) {
> +				find = 1;
> +				break;
> +			}
> +			left_flow_num--;
> +		}
>  	}
> 
> -	/* can't find a key, so insert a new key and a new item. */
> -	if (i == tbl->max_key_num) {
> -		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
> -				INVALID_ARRAY_INDEX, start_time);
> +	/*
> +	 * Fail to find a matched flow. Insert a new flow and store the
> +	 * packet into the flow.
> +	 */
> +	if (find == 0) {
> +		item_idx = insert_new_item(tbl, pkt, start_time,
> +				INVALID_ARRAY_INDEX, sent_seq, ip_id,
> +				is_atomic);
>  		if (item_idx == INVALID_ARRAY_INDEX)
>  			return -1;
> -		if (insert_new_key(tbl, &key, item_idx) ==
> +		if (insert_new_flow(tbl, &key, item_idx) ==
>  				INVALID_ARRAY_INDEX) {
> -			/*
> -			 * fail to insert a new key, so
> -			 * delete the inserted item
> -			 */
> +			/* Fail to insert a new flow. */
>  			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
>  			return -1;
>  		}
>  		return 0;
>  	}
> 
> -	/* traverse all packets in the item group to find one to merge */
> -	cur_idx = tbl->keys[i].start_index;
> +	/*
> +	 * Check all packets in the flow and try to find a neighbor for
> +	 * the input packet.
> +	 */
> +	cur_idx = tbl->flows[i].start_index;
>  	prev_idx = cur_idx;
>  	do {
>  		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
> -				pkt->l4_len, tcp_dl, ip_id, sent_seq);
> +				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
> +				is_atomic);
>  		if (cmp) {
>  			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
> -						pkt, ip_id,
> -						sent_seq, cmp))
> +						pkt, cmp, sent_seq, ip_id, 0))
>  				return 1;
>  			/*
> -			 * fail to merge two packets since the packet
> -			 * length will be greater than the max value.
> -			 * So insert the packet into the item group.
> +			 * Fail to merge the two packets, as the packet
> +			 * length is greater than the max value. Store
> +			 * the packet into the flow.
>  			 */
> -			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
> -						prev_idx, start_time) ==
> +			if (insert_new_item(tbl, pkt, start_time, prev_idx,
> +						sent_seq, ip_id,
> +						is_atomic) ==
>  					INVALID_ARRAY_INDEX)
>  				return -1;
>  			return 0;
> @@ -401,12 +302,9 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  		cur_idx = tbl->items[cur_idx].next_pkt_idx;
>  	} while (cur_idx != INVALID_ARRAY_INDEX);
> 
> -	/*
> -	 * can't find a packet in the item group to merge,
> -	 * so insert the packet into the item group.
> -	 */
> -	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
> -				start_time) == INVALID_ARRAY_INDEX)
> +	/* Fail to find a neighbor, so store the packet into the flow. */
> +	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
> +				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
>  		return -1;
> 
>  	return 0;
> @@ -418,46 +316,35 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl
> *tbl,
>  		struct rte_mbuf **out,
>  		uint16_t nb_out)
>  {
> -	uint16_t k = 0;
> +	uint32_t max_flow_num = tbl->max_flow_num;
>  	uint32_t i, j;
> -	uint32_t max_key_num = tbl->max_key_num;
> +	uint16_t k = 0;
> 
> -	for (i = 0; i < max_key_num; i++) {
> -		/* all keys have been checked, return immediately */
> -		if (tbl->key_num == 0)
> +	for (i = 0; i < max_flow_num; i++) {
> +		if (unlikely(tbl->flow_num == 0))
>  			return k;
> 
> -		j = tbl->keys[i].start_index;
> +		j = tbl->flows[i].start_index;
>  		while (j != INVALID_ARRAY_INDEX) {
>  			if (tbl->items[j].start_time <= flush_timestamp) {
>  				out[k++] = tbl->items[j].firstseg;
>  				if (tbl->items[j].nb_merged > 1)
>  					update_header(&(tbl->items[j]));
>  				/*
> -				 * delete the item and get
> -				 * the next packet index
> +				 * Delete the packet and get the next
> +				 * packet in the flow.
>  				 */
> -				j = delete_item(tbl, j,
> -						INVALID_ARRAY_INDEX);
> +				j = delete_item(tbl, j,
> INVALID_ARRAY_INDEX);
> +				tbl->flows[i].start_index = j;
> +				if (j == INVALID_ARRAY_INDEX)
> +					tbl->flow_num--;
> 
> -				/*
> -				 * delete the key as all of
> -				 * packets are flushed
> -				 */
> -				if (j == INVALID_ARRAY_INDEX) {
> -					tbl->keys[i].start_index =
> -						INVALID_ARRAY_INDEX;
> -					tbl->key_num--;
> -				} else
> -					/* update start_index of the key */
> -					tbl->keys[i].start_index = j;
> -
> -				if (k == nb_out)
> +				if (unlikely(k == nb_out))
>  					return k;
>  			} else
>  				/*
> -				 * left packets of this key won't be
> -				 * timeout, so go to check other keys.
> +				 * The left packets in this flow won't be
> +				 * timeout. Go to check other flows.
>  				 */
>  				break;
>  		}
> diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
> index d129523..c2b66a8 100644
> --- a/lib/librte_gro/gro_tcp4.h
> +++ b/lib/librte_gro/gro_tcp4.h
> @@ -5,17 +5,20 @@
>  #ifndef _GRO_TCP4_H_
>  #define _GRO_TCP4_H_
> 
> +#include <rte_ip.h>
> +#include <rte_tcp.h>
> +
>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>  #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> 
>  /*
> - * the max L3 length of a TCP/IPv4 packet. The L3 length
> - * is the sum of ipv4 header, tcp header and L4 payload.
> + * The max length of a IPv4 packet, which includes the length of the L3
> + * header, the L4 header and the data payload.
>   */
> -#define TCP4_MAX_L3_LENGTH UINT16_MAX
> +#define MAX_IPV4_PKT_LENGTH UINT16_MAX
> 
> -/* criteria of mergeing packets */
> -struct tcp4_key {
> +/* Header fields representing a TCP/IPv4 flow */
> +struct tcp4_flow_key {
>  	struct ether_addr eth_saddr;
>  	struct ether_addr eth_daddr;
>  	uint32_t ip_src_addr;
> @@ -26,77 +29,76 @@ struct tcp4_key {
>  	uint16_t dst_port;
>  };
> 
> -struct gro_tcp4_key {
> -	struct tcp4_key key;
> +struct gro_tcp4_flow {
> +	struct tcp4_flow_key key;
>  	/*
> -	 * the index of the first packet in the item group.
> -	 * If the value is INVALID_ARRAY_INDEX, it means
> -	 * the key is empty.
> +	 * The index of the first packet in the flow.
> +	 * INVALID_ARRAY_INDEX indicates an empty flow.
>  	 */
>  	uint32_t start_index;
>  };
> 
>  struct gro_tcp4_item {
>  	/*
> -	 * first segment of the packet. If the value
> +	 * The first MBUF segment of the packet. If the value
>  	 * is NULL, it means the item is empty.
>  	 */
>  	struct rte_mbuf *firstseg;
> -	/* last segment of the packet */
> +	/* The last MBUF segment of the packet */
>  	struct rte_mbuf *lastseg;
>  	/*
> -	 * the time when the first packet is inserted
> -	 * into the table. If a packet in the table is
> -	 * merged with an incoming packet, this value
> -	 * won't be updated. We set this value only
> -	 * when the first packet is inserted into the
> -	 * table.
> +	 * The time when the first packet is inserted into the table.
> +	 * This value won't be updated, even if the packet is merged
> +	 * with other packets.
>  	 */
>  	uint64_t start_time;
>  	/*
> -	 * we use next_pkt_idx to chain the packets that
> -	 * have same key value but can't be merged together.
> +	 * next_pkt_idx is used to chain the packets that
> +	 * are in the same flow but can't be merged together
> +	 * (e.g. caused by packet reordering).
>  	 */
>  	uint32_t next_pkt_idx;
> -	/* the sequence number of the packet */
> +	/* TCP sequence number of the packet */
>  	uint32_t sent_seq;
> -	/* the IP ID of the packet */
> +	/* IPv4 ID of the packet */
>  	uint16_t ip_id;
> -	/* the number of merged packets */
> +	/* The number of merged packets */
>  	uint16_t nb_merged;
> +	/* Indicate if IPv4 ID can be ignored */
> +	uint8_t is_atomic;
>  };
> 
>  /*
> - * TCP/IPv4 reassembly table structure.
> + * TCP/IPv4 reassembly table structure
>   */
>  struct gro_tcp4_tbl {
>  	/* item array */
>  	struct gro_tcp4_item *items;
> -	/* key array */
> -	struct gro_tcp4_key *keys;
> +	/* flow array */
> +	struct gro_tcp4_flow *flows;
>  	/* current item number */
>  	uint32_t item_num;
> -	/* current key num */
> -	uint32_t key_num;
> +	/* current flow num */
> +	uint32_t flow_num;
>  	/* item array size */
>  	uint32_t max_item_num;
> -	/* key array size */
> -	uint32_t max_key_num;
> +	/* flow array size */
> +	uint32_t max_flow_num;
>  };
> 
>  /**
>   * This function creates a TCP/IPv4 reassembly table.
>   *
>   * @param socket_id
> - *  socket index for allocating TCP/IPv4 reassemble table
> + *  Socket index for allocating the TCP/IPv4 reassemble table
>   * @param max_flow_num
> - *  the maximum number of flows in the TCP/IPv4 GRO table
> + *  The maximum number of flows in the TCP/IPv4 GRO table
>   * @param max_item_per_flow
> - *  the maximum packet number per flow.
> + *  The maximum number of packets per flow
>   *
>   * @return
> - *  if create successfully, return a pointer which points to the
> - *  created TCP/IPv4 GRO table. Otherwise, return NULL.
> + *  - Return the table pointer on success.
> + *  - Return NULL on failure.
>   */
>  void *gro_tcp4_tbl_create(uint16_t socket_id,
>  		uint16_t max_flow_num,
> @@ -106,62 +108,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
>   * This function destroys a TCP/IPv4 reassembly table.
>   *
>   * @param tbl
> - *  a pointer points to the TCP/IPv4 reassembly table.
> + *  Pointer pointing to the TCP/IPv4 reassembly table.
>   */
>  void gro_tcp4_tbl_destroy(void *tbl);
> 
>  /**
> - * This function searches for a packet in the TCP/IPv4 reassembly table
> - * to merge with the inputted one. To merge two packets is to chain them
> - * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
> - * CWR, ECE or URG bit is set, are returned immediately. Packets which
> - * only have packet headers (i.e. without data) are also returned
> - * immediately. Otherwise, the packet is either merged, or inserted into
> - * the table. Besides, if there is no available space to insert the
> - * packet, this function returns immediately too.
> + * This function merges a TCP/IPv4 packet. It doesn't process the packet,
> + * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
> + * payload.
>   *
> - * This function assumes the inputted packet is with correct IPv4 and
> - * TCP checksums. And if two packets are merged, it won't re-calculate
> - * IPv4 and TCP checksums. Besides, if the inputted packet is IP
> - * fragmented, it assumes the packet is complete (with TCP header).
> + * This function doesn't check if the packet has correct checksums and
> + * doesn't re-calculate checksums for the merged packet. Additionally,
> + * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
> + * when IP fragmentation is possible (i.e., DF==0). It returns the
> + * packet, if the packet has invalid parameters (e.g. SYN bit is set)
> + * or there is no available space in the table.
>   *
>   * @param pkt
> - *  packet to reassemble.
> + *  Packet to reassemble
>   * @param tbl
> - *  a pointer that points to a TCP/IPv4 reassembly table.
> + *  Pointer pointing to the TCP/IPv4 reassembly table
>   * @start_time
> - *  the start time that the packet is inserted into the table
> + *  The time when the packet is inserted into the table
>   *
>   * @return
> - *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
> - *  or URG bit is set, or there is no available space in the table to
> - *  insert a new item or a new key, return a negative value. If the
> - *  packet is merged successfully, return an positive value. If the
> - *  packet is inserted into the table, return 0.
> + *  - Return a positive value if the packet is merged.
> + *  - Return zero if the packet isn't merged but stored in the table.
> + *  - Return a negative value for invalid parameters or no available
> + *    space in the table.
>   */
>  int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
>  		struct gro_tcp4_tbl *tbl,
>  		uint64_t start_time);
> 
>  /**
> - * This function flushes timeout packets in a TCP/IPv4 reassembly table
> - * to applications, and without updating checksums for merged packets.
> - * The max number of flushed timeout packets is the element number of
> - * the array which is used to keep flushed packets.
> + * This function flushes timeout packets in a TCP/IPv4 reassembly table,
> + * and without updating checksums.
>   *
>   * @param tbl
> - *  a pointer that points to a TCP GRO table.
> + *  TCP/IPv4 reassembly table pointer
>   * @param flush_timestamp
> - *  this function flushes packets which are inserted into the table
> - *  before or at the flush_timestamp.
> + *  Flush packets which are inserted into the table before or at the
> + *  flush_timestamp.
>   * @param out
> - *  pointer array which is used to keep flushed packets.
> + *  Pointer array used to keep flushed packets
>   * @param nb_out
> - *  the element number of out. It's also the max number of timeout
> + *  The element number in 'out'. It also determines the maximum number
> of
>   *  packets that can be flushed finally.
>   *
>   * @return
> - *  the number of packets that are returned.
> + *  The number of flushed packets
>   */
>  uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
>  		uint64_t flush_timestamp,
> @@ -173,10 +169,131 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct
> gro_tcp4_tbl *tbl,
>   * reassembly table.
>   *
>   * @param tbl
> - *  pointer points to a TCP/IPv4 reassembly table.
> + *  TCP/IPv4 reassembly table pointer
>   *
>   * @return
> - *  the number of packets in the table
> + *  The number of packets in the table
>   */
>  uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
> +
> +/*
> + * Check if two TCP/IPv4 packets belong to the same flow.
> + */
> +static inline int
> +is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
> +{
> +	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
> +			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr)
> &&
> +			(k1.ip_src_addr == k2.ip_src_addr) &&
> +			(k1.ip_dst_addr == k2.ip_dst_addr) &&
> +			(k1.recv_ack == k2.recv_ack) &&
> +			(k1.src_port == k2.src_port) &&
> +			(k1.dst_port == k2.dst_port));
> +}
> +
> +/*
> + * Check if two TCP/IPv4 packets are neighbors.
> + */
> +static inline int
> +check_seq_option(struct gro_tcp4_item *item,
> +		struct tcp_hdr *tcph,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint16_t tcp_hl,
> +		uint16_t tcp_dl,
> +		uint16_t l2_offset,
> +		uint8_t is_atomic)
> +{
> +	struct rte_mbuf *pkt_orig = item->firstseg;
> +	struct ipv4_hdr *iph_orig;
> +	struct tcp_hdr *tcph_orig;
> +	uint16_t len, l4_len_orig;
> +
> +	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
> +			l2_offset + pkt_orig->l2_len);
> +	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
> +	l4_len_orig = pkt_orig->l4_len;
> +
> +	/* Check if TCP option fields equal */
> +	len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
> +	if ((tcp_hl != l4_len_orig) || ((len > 0) &&
> +				(memcmp(tcph + 1, tcph_orig + 1,
> +					len) != 0)))
> +		return 0;
> +
> +	/* Don't merge packets whose DF bits are different */
> +	if (unlikely(item->is_atomic ^ is_atomic))
> +		return 0;
> +
> +	/* Check if the two packets are neighbors */
> +	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
> +		pkt_orig->l3_len - l4_len_orig;
> +	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
> +				(ip_id == item->ip_id + item->nb_merged)))
> +		/* Append the new packet */
> +		return 1;
> +	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
> +				(ip_id + 1 == item->ip_id)))
> +		/* Pre-pend the new packet */
> +		return -1;
> +
> +	return 0;
> +}
> +
> +/*
> + * Merge two TCP/IPv4 packets without updating checksums.
> + * If cmp is larger than 0, append the new packet to the
> + * original packet. Otherwise, pre-pend the new packet to
> + * the original packet.
> + */
> +static inline int
> +merge_two_tcp4_packets(struct gro_tcp4_item *item,
> +		struct rte_mbuf *pkt,
> +		int cmp,
> +		uint32_t sent_seq,
> +		uint16_t ip_id,
> +		uint16_t l2_offset)
> +{
> +	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> +	uint16_t hdr_len, l2_len;
> +
> +	if (cmp > 0) {
> +		pkt_head = item->firstseg;
> +		pkt_tail = pkt;
> +	} else {
> +		pkt_head = pkt;
> +		pkt_tail = item->firstseg;
> +	}
> +
> +	/* Check if the IPv4 packet length is greater than the max value */
> +	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> +		pkt_head->l4_len;
> +	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> +	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - hdr_len >
> +			MAX_IPV4_PKT_LENGTH))
> +		return 0;
> +
> +	/* Remove the packet header */
> +	rte_pktmbuf_adj(pkt_tail, hdr_len);
> +
> +	/* Chain two packets together */
> +	if (cmp > 0) {
> +		item->lastseg->next = pkt;
> +		item->lastseg = rte_pktmbuf_lastseg(pkt);
> +	} else {
> +		lastseg = rte_pktmbuf_lastseg(pkt);
> +		lastseg->next = item->firstseg;
> +		item->firstseg = pkt;
> +		/* Update sent_seq and ip_id */
> +		item->sent_seq = sent_seq;
> +		item->ip_id = ip_id;
> +	}
> +	item->nb_merged++;
> +
> +	/* Update MBUF metadata for the merged packet */
> +	pkt_head->nb_segs += pkt_tail->nb_segs;
> +	pkt_head->pkt_len += pkt_tail->pkt_len;
> +
> +	return 1;
> +}
>  #endif
> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
> index d6b8cd1..7176c0e 100644
> --- a/lib/librte_gro/rte_gro.c
> +++ b/lib/librte_gro/rte_gro.c
> @@ -23,11 +23,14 @@ static gro_tbl_destroy_fn
> tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
>  static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM]
> = {
>  			gro_tcp4_tbl_pkt_count, NULL};
> 
> +#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
> +		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
> +
>  /*
> - * GRO context structure, which is used to merge packets. It keeps
> - * many reassembly tables of desired GRO types. Applications need to
> - * create GRO context objects before using rte_gro_reassemble to
> - * perform GRO.
> + * GRO context structure. It keeps the table structures, which are
> + * used to merge packets, for different GRO types. Before using
> + * rte_gro_reassemble(), applications need to create the GRO context
> + * first.
>   */
>  struct gro_ctx {
>  	/* GRO types to perform */
> @@ -65,7 +68,7 @@ rte_gro_ctx_create(const struct rte_gro_param *param)
>  				param->max_flow_num,
>  				param->max_item_per_flow);
>  		if (gro_ctx->tbls[i] == NULL) {
> -			/* destroy all created tables */
> +			/* Destroy all created tables */
>  			gro_ctx->gro_types = gro_types;
>  			rte_gro_ctx_destroy(gro_ctx);
>  			return NULL;
> @@ -85,8 +88,6 @@ rte_gro_ctx_destroy(void *ctx)
>  	uint64_t gro_type_flag;
>  	uint8_t i;
> 
> -	if (gro_ctx == NULL)
> -		return;
>  	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
>  		gro_type_flag = 1ULL << i;
>  		if ((gro_ctx->gro_types & gro_type_flag) == 0)
> @@ -103,62 +104,54 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
>  		const struct rte_gro_param *param)
>  {
> -	uint16_t i;
> -	uint16_t nb_after_gro = nb_pkts;
> -	uint32_t item_num;
> -
> -	/* allocate a reassembly table for TCP/IPv4 GRO */
> +	/* Allocate a reassembly table for TCP/IPv4 GRO */
>  	struct gro_tcp4_tbl tcp_tbl;
> -	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
> +	struct gro_tcp4_flow
> tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>  	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> = {{0} };
> 
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
> -	uint16_t unprocess_num = 0;
> +	uint32_t item_num;
>  	int32_t ret;
> -	uint64_t current_time;
> +	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> 
> -	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
> +	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
>  		return nb_pkts;
> 
> -	/* get the actual number of packets */
> +	/* Get the maximum number of packets */
>  	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
> -			param->max_item_per_flow));
> +				param->max_item_per_flow));
>  	item_num = RTE_MIN(item_num,
> RTE_GRO_MAX_BURST_ITEM_NUM);
> 
>  	for (i = 0; i < item_num; i++)
> -		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
> +		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> 
> -	tcp_tbl.keys = tcp_keys;
> +	tcp_tbl.flows = tcp_flows;
>  	tcp_tbl.items = tcp_items;
> -	tcp_tbl.key_num = 0;
> +	tcp_tbl.flow_num = 0;
>  	tcp_tbl.item_num = 0;
> -	tcp_tbl.max_key_num = item_num;
> +	tcp_tbl.max_flow_num = item_num;
>  	tcp_tbl.max_item_num = item_num;
> 
> -	current_time = rte_rdtsc();
> -
>  	for (i = 0; i < nb_pkts; i++) {
> -		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
> -					RTE_PTYPE_L4_TCP)) ==
> -				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP))
> {
> -			ret = gro_tcp4_reassemble(pkts[i],
> -					&tcp_tbl,
> -					current_time);
> +		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> +			/*
> +			 * The timestamp is ignored, since all packets
> +			 * will be flushed from the tables.
> +			 */
> +			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
>  			if (ret > 0)
> -				/* merge successfully */
> +				/* Merge successfully */
>  				nb_after_gro--;
> -			else if (ret < 0) {
> -				unprocess_pkts[unprocess_num++] =
> -					pkts[i];
> -			}
> +			else if (ret < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
>  		} else
>  			unprocess_pkts[unprocess_num++] = pkts[i];
>  	}
> 
> -	/* re-arrange GROed packets */
>  	if (nb_after_gro < nb_pkts) {
> -		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
> -				pkts, nb_pkts);
> +		/* Flush all packets from the tables */
> +		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
> +		/* Copy unprocessed packets */
>  		if (unprocess_num > 0) {
>  			memcpy(&pkts[i], unprocess_pkts,
>  					sizeof(struct rte_mbuf *) *
> @@ -174,31 +167,28 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
>  		void *ctx)
>  {
> -	uint16_t i, unprocess_num = 0;
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	struct gro_ctx *gro_ctx = ctx;
> +	void *tcp_tbl;
>  	uint64_t current_time;
> +	uint16_t i, unprocess_num = 0;
> 
> -	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
> +	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
>  		return nb_pkts;
> 
> +	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>  	current_time = rte_rdtsc();
> 
>  	for (i = 0; i < nb_pkts; i++) {
> -		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
> -					RTE_PTYPE_L4_TCP)) ==
> -				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP))
> {
> -			if (gro_tcp4_reassemble(pkts[i],
> -						gro_ctx->tbls
> -						[RTE_GRO_TCP_IPV4_INDEX],
> +		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
> +			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
>  						current_time) < 0)
>  				unprocess_pkts[unprocess_num++] = pkts[i];
>  		} else
>  			unprocess_pkts[unprocess_num++] = pkts[i];
>  	}
>  	if (unprocess_num > 0) {
> -		memcpy(pkts, unprocess_pkts,
> -				sizeof(struct rte_mbuf *) *
> +		memcpy(pkts, unprocess_pkts, sizeof(struct rte_mbuf *) *
>  				unprocess_num);
>  	}
> 
> @@ -224,6 +214,7 @@ rte_gro_timeout_flush(void *ctx,
>  				flush_timestamp,
>  				out, max_nb_out);
>  	}
> +
>  	return 0;
>  }
> 
> @@ -232,19 +223,20 @@ rte_gro_get_pkt_count(void *ctx)
>  {
>  	struct gro_ctx *gro_ctx = ctx;
>  	gro_tbl_pkt_count_fn pkt_count_fn;
> +	uint64_t gro_types = gro_ctx->gro_types, flag;
>  	uint64_t item_num = 0;
> -	uint64_t gro_type_flag;
>  	uint8_t i;
> 
> -	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
> -		gro_type_flag = 1ULL << i;
> -		if ((gro_ctx->gro_types & gro_type_flag) == 0)
> +	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM && gro_types; i++) {
> +		flag = 1ULL << i;
> +		if ((gro_types & flag) == 0)
>  			continue;
> 
> +		gro_types ^= flag;
>  		pkt_count_fn = tbl_pkt_count_fn[i];
> -		if (pkt_count_fn == NULL)
> -			continue;
> -		item_num += pkt_count_fn(gro_ctx->tbls[i]);
> +		if (pkt_count_fn)
> +			item_num += pkt_count_fn(gro_ctx->tbls[i]);
>  	}
> +
>  	return item_num;
>  }
> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
> index 81a2eac..7979a59 100644
> --- a/lib/librte_gro/rte_gro.h
> +++ b/lib/librte_gro/rte_gro.h
> @@ -31,8 +31,8 @@ extern "C" {
>  /**< TCP/IPv4 GRO flag */
> 
>  /**
> - * A structure which is used to create GRO context objects or tell
> - * rte_gro_reassemble_burst() what reassembly rules are demanded.
> + * Structure used to create GRO context objects or used to pass
> + * application-determined parameters to rte_gro_reassemble_burst().
>   */
>  struct rte_gro_param {
>  	uint64_t gro_types;
> @@ -78,26 +78,23 @@ void rte_gro_ctx_destroy(void *ctx);
> 
>  /**
>   * This is one of the main reassembly APIs, which merges numbers of
> - * packets at a time. It assumes that all inputted packets are with
> - * correct checksums. That is, applications should guarantee all
> - * inputted packets are correct. Besides, it doesn't re-calculate
> - * checksums for merged packets. If inputted packets are IP fragmented,
> - * this function assumes them are complete (i.e. with L4 header). After
> - * finishing processing, it returns all GROed packets to applications
> - * immediately.
> + * packets at a time. It doesn't check if input packets have correct
> + * checksums and doesn't re-calculate checksums for merged packets.
> + * It assumes the packets are complete (i.e., MF==0 && frag_off==0),
> + * when IP fragmentation is possible (i.e., DF==1). The GROed packets
> + * are returned as soon as the function finishes.
>   *
>   * @param pkts
> - *  a pointer array which points to the packets to reassemble. Besides,
> - *  it keeps mbuf addresses for the GROed packets.
> + *  Pointer array pointing to the packets to reassemble. Besides, it
> + *  keeps MBUF addresses for the GROed packets.
>   * @param nb_pkts
> - *  the number of packets to reassemble.
> + *  The number of packets to reassemble
>   * @param param
> - *  applications use it to tell rte_gro_reassemble_burst() what rules
> - *  are demanded.
> + *  Application-determined parameters for reassembling packets.
>   *
>   * @return
> - *  the number of packets after been GROed. If no packets are merged,
> - *  the returned value is nb_pkts.
> + *  The number of packets after been GROed. If no packets are merged,
> + *  the return value is equals to nb_pkts.
>   */
>  uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
> @@ -107,32 +104,28 @@ uint16_t rte_gro_reassemble_burst(struct
> rte_mbuf **pkts,
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
>   *
> - * Reassembly function, which tries to merge inputted packets with
> - * the packets in the reassembly tables of a given GRO context. This
> - * function assumes all inputted packets are with correct checksums.
> - * And it won't update checksums if two packets are merged. Besides,
> - * if inputted packets are IP fragmented, this function assumes they
> - * are complete packets (i.e. with L4 header).
> + * Reassembly function, which tries to merge input packets with the
> + * existed packets in the reassembly tables of a given GRO context.
> + * It doesn't check if input packets have correct checksums and doesn't
> + * re-calculate checksums for merged packets. Additionally, it assumes
> + * the packets are complete (i.e., MF==0 && frag_off==0), when IP
> + * fragmentation is possible (i.e., DF==1).
>   *
> - * If the inputted packets don't have data or are with unsupported GRO
> - * types etc., they won't be processed and are returned to applications.
> - * Otherwise, the inputted packets are either merged or inserted into
> - * the table. If applications want get packets in the table, they need
> - * to call flush API.
> + * If the input packets have invalid parameters (e.g. no data payload,
> + * unsupported GRO types), they are returned to applications. Otherwise,
> + * they are either merged or inserted into the table. Applications need
> + * to flush packets from the tables by flush API, if they want to get the
> + * GROed packets.
>   *
>   * @param pkts
> - *  packet to reassemble. Besides, after this function finishes, it
> - *  keeps the unprocessed packets (e.g. without data or unsupported
> - *  GRO types).
> + *  Packets to reassemble. It's also used to store the unprocessed packets.
>   * @param nb_pkts
> - *  the number of packets to reassemble.
> + *  The number of packets to reassemble
>   * @param ctx
> - *  a pointer points to a GRO context object.
> + *  GRO context object pointer
>   *
>   * @return
> - *  return the number of unprocessed packets (e.g. without data or
> - *  unsupported GRO types). If all packets are processed (merged or
> - *  inserted into the table), return 0.
> + *  The number of unprocessed packets.
>   */
>  uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
>  		uint16_t nb_pkts,
> @@ -142,29 +135,28 @@ uint16_t rte_gro_reassemble(struct rte_mbuf
> **pkts,
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
>   *
> - * This function flushes the timeout packets from reassembly tables of
> - * desired GRO types. The max number of flushed timeout packets is the
> - * element number of the array which is used to keep the flushed packets.
> + * This function flushes the timeout packets from the reassembly tables
> + * of desired GRO types. The max number of flushed packets is the
> + * element number of 'out'.
>   *
> - * Besides, this function won't re-calculate checksums for merged
> - * packets in the tables. That is, the returned packets may be with
> - * wrong checksums.
> + * Additionally, the flushed packets may have incorrect checksums, since
> + * this function doesn't re-calculate checksums for merged packets.
>   *
>   * @param ctx
> - *  a pointer points to a GRO context object.
> + *  GRO context object pointer.
>   * @param timeout_cycles
> - *  max TTL for packets in reassembly tables, measured in nanosecond.
> + *  The max TTL for packets in reassembly tables, measured in nanosecond.
>   * @param gro_types
> - *  this function only flushes packets which belong to the GRO types
> - *  specified by gro_types.
> + *  This function flushes packets whose GRO types are specified by
> + *  gro_types.
>   * @param out
> - *  a pointer array that is used to keep flushed timeout packets.
> + *  Pointer array used to keep flushed packets.
>   * @param max_nb_out
> - *  the element number of out. It's also the max number of timeout
> + *  The element number of 'out'. It's also the max number of timeout
>   *  packets that can be flushed finally.
>   *
>   * @return
> - *  the number of flushed packets. If no packets are flushed, return 0.
> + *  The number of flushed packets.
>   */
>  uint16_t rte_gro_timeout_flush(void *ctx,
>  		uint64_t timeout_cycles,
> @@ -180,10 +172,10 @@ uint16_t rte_gro_timeout_flush(void *ctx,
>   * of a given GRO context.
>   *
>   * @param ctx
> - *  pointer points to a GRO context object.
> + *  GRO context object pointer.
>   *
>   * @return
> - *  the number of packets in all reassembly tables.
> + *  The number of packets in the tables.
>   */
>  uint64_t rte_gro_get_pkt_count(void *ctx);
> 
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v4 1/2] gro: code cleanup
  2018-01-05  6:12       ` [PATCH v4 1/2] gro: code cleanup Jiayu Hu
  2018-01-08  1:15         ` Yao, Lei A
@ 2018-01-10  0:09         ` Thomas Monjalon
  2018-01-10  1:55           ` Hu, Jiayu
  1 sibling, 1 reply; 31+ messages in thread
From: Thomas Monjalon @ 2018-01-10  0:09 UTC (permalink / raw)
  To: Jiayu Hu
  Cc: dev, bruce.richardson, junjie.j.chen, jianfeng.tan, stephen,
	ferruh.yigit, konstantin.ananyev, lei.a.yao

Hi,

05/01/2018 07:12, Jiayu Hu:
> - Remove needless check and variants
> - For better understanding, update the programmer guide and rename
>   internal functions and variants
> - For supporting tunneled gro, move common internal functions from
>   gro_tcp4.c to gro_tcp4.h
> - Comply RFC 6864 to process the IPv4 ID field

I think you could split this patch in several ones.
Please remind that the git history can be used later to understand
why the changes were done.

Thanks

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v4 1/2] gro: code cleanup
  2018-01-10  0:09         ` Thomas Monjalon
@ 2018-01-10  1:55           ` Hu, Jiayu
  0 siblings, 0 replies; 31+ messages in thread
From: Hu, Jiayu @ 2018-01-10  1:55 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Richardson, Bruce, Chen, Junjie J, Tan, Jianfeng, stephen,
	Yigit, Ferruh, Ananyev, Konstantin, Yao, Lei A



> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Wednesday, January 10, 2018 8:09 AM
> To: Hu, Jiayu <jiayu.hu@intel.com>
> Cc: dev@dpdk.org; Richardson, Bruce <bruce.richardson@intel.com>; Chen,
> Junjie J <junjie.j.chen@intel.com>; Tan, Jianfeng <jianfeng.tan@intel.com>;
> stephen@networkplumber.org; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>; Yao, Lei A
> <lei.a.yao@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v4 1/2] gro: code cleanup
> 
> Hi,
> 
> 05/01/2018 07:12, Jiayu Hu:
> > - Remove needless check and variants
> > - For better understanding, update the programmer guide and rename
> >   internal functions and variants
> > - For supporting tunneled gro, move common internal functions from
> >   gro_tcp4.c to gro_tcp4.h
> > - Comply RFC 6864 to process the IPv4 ID field
> 
> I think you could split this patch in several ones.
> Please remind that the git history can be used later to understand
> why the changes were done.

Thanks for your suggestion. I will split this patch into three patches: code cleanup,
comply RFC 6864 to process IPv4 ID field and extract common functions for supporting
tunneled GRO.

Regards,
Jiayu
> 
> Thanks

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v5 0/3] Support VxLAN GRO
  2018-01-05  6:12     ` [PATCH v4 " Jiayu Hu
  2018-01-05  6:12       ` [PATCH v4 1/2] gro: code cleanup Jiayu Hu
  2018-01-05  6:12       ` [PATCH v4 2/2] gro: support VxLAN GRO Jiayu Hu
@ 2018-01-10 14:03       ` Jiayu Hu
  2018-01-10 14:03         ` [PATCH v5 1/3] gro: codes cleanup Jiayu Hu
                           ` (3 more replies)
  2 siblings, 4 replies; 31+ messages in thread
From: Jiayu Hu @ 2018-01-10 14:03 UTC (permalink / raw)
  To: dev; +Cc: thomas, junjie.j.chen, jianfeng.tan, lei.a.yao, Jiayu Hu

VxLAN is one of the most widely used tunneled protocols. Providing GRO
support for VxLAN-encapsulated packets can benefit many per-packet based
applications, like Open vSwitch.

This patchset is to support VxLAN GRO. The first patch cleans up current
codes and updates the document for better understanding. The second patch
complies RFC 6848 to process IPv4 ID for TCP/IPv4 packets. The third
patch supports GRO on the VxLAN packets which have an outer IPv4 header
and an inner TCP/IPv4 packet.

Change log
===========
v5:
- split the patch set into three patches
- convert license headers to SPDX tags
v4:
- replace gro-key-algorithm.png with gro-key-algorithm.svg
- reduce key comparison times in gro_tcp4_reassemble() and
  gro_vxlan_tcp4_reassemble()
- reduce comparison times in rte_gro_get_pkt_count()
v3:
- remove needless check
- add "likely()" and "unlikely()" to optimize branch prediction
- fix a bug in merge_two_tcp4_packets(): for VxLAN packets, check if
  the outer IPv4 packet length is less than or equal to UINT16_MAX,
  rather than the inner IPv4 packet length.
- fix a bug in rte_gro.h: change RTE_GRO_TYPE_SUPPORT_NUM to 2
- Avoid inserting timestamp in rte_gro_reassemble_burst(), since all
  packets in the tables will be flushed.
- fix typos
v2:
- comply RFC 6848 to process IP ID fields. Specifically, we require the
  IP ID fields of neighbor packets whose DF bit is 0 to be increased by
  1. We don't check IP ID for the packets whose DF bit is 1.
  Additionally, packets whose DF bits are different cannot be merged.
- update the programmer guide and function comments

Jiayu Hu (3):
  gro: codes cleanup
  gro: comply RFC 6864 to process IPv4 ID
  gro: support VxLAN GRO

 .../prog_guide/generic_receive_offload_lib.rst     | 269 ++++++-----
 doc/guides/prog_guide/img/gro-key-algorithm.svg    | 223 ++++++++++
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_tcp4.c                          | 325 +++++---------
 lib/librte_gro/gro_tcp4.h                          | 251 ++++++++---
 lib/librte_gro/gro_vxlan_tcp4.c                    | 494 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 156 +++++++
 lib/librte_gro/rte_gro.c                           | 197 +++++---
 lib/librte_gro/rte_gro.h                           |  97 ++--
 9 files changed, 1513 insertions(+), 500 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.svg
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

-- 
2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v5 1/3] gro: codes cleanup
  2018-01-10 14:03       ` [PATCH v5 0/3] Support " Jiayu Hu
@ 2018-01-10 14:03         ` Jiayu Hu
  2018-01-10 14:03         ` [PATCH v5 2/3] gro: comply RFC 6864 to process IPv4 ID Jiayu Hu
                           ` (2 subsequent siblings)
  3 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2018-01-10 14:03 UTC (permalink / raw)
  To: dev; +Cc: thomas, junjie.j.chen, jianfeng.tan, lei.a.yao, Jiayu Hu

This patch updates codes as follows:
- change appropriate names for internal structures, variants and functions
- update comments and the content of the gro programmer guide for better
  understanding
- remove needless check and redundant comments

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Junjie Chen <junjie.j.chen@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     | 238 +++++++++-------
 doc/guides/prog_guide/img/gro-key-algorithm.svg    | 223 +++++++++++++++
 lib/librte_gro/gro_tcp4.c                          | 306 ++++++++++-----------
 lib/librte_gro/gro_tcp4.h                          | 123 ++++-----
 lib/librte_gro/rte_gro.c                           |  96 +++----
 lib/librte_gro/rte_gro.h                           |  92 +++----
 6 files changed, 649 insertions(+), 429 deletions(-)
 create mode 100644 doc/guides/prog_guide/img/gro-key-algorithm.svg

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index 22e50ec..1652e64 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -32,128 +32,154 @@ Generic Receive Offload Library
 ===============================
 
 Generic Receive Offload (GRO) is a widely used SW-based offloading
-technique to reduce per-packet processing overhead. It gains performance
-by reassembling small packets into large ones. To enable more flexibility
-to applications, DPDK implements GRO as a standalone library. Applications
-explicitly use the GRO library to merge small packets into large ones.
-
-The GRO library assumes all input packets have correct checksums. In
-addition, the GRO library doesn't re-calculate checksums for merged
-packets. If input packets are IP fragmented, the GRO library assumes
-they are complete packets (i.e. with L4 headers).
-
-Currently, the GRO library implements TCP/IPv4 packet reassembly.
-
-Reassembly Modes
-----------------
-
-The GRO library provides two reassembly modes: lightweight and
-heavyweight mode. If applications want to merge packets in a simple way,
-they can use the lightweight mode API. If applications want more
-fine-grained controls, they can choose the heavyweight mode API.
-
-Lightweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble_burst()`` function is used for reassembly in
-lightweight mode. It tries to merge N input packets at a time, where
-N should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
-
-In each invocation, ``rte_gro_reassemble_burst()`` allocates temporary
-reassembly tables for the desired GRO types. Note that the reassembly
-table is a table structure used to reassemble packets and different GRO
-types (e.g. TCP/IPv4 GRO and TCP/IPv6 GRO) have different reassembly table
-structures. The ``rte_gro_reassemble_burst()`` function uses the reassembly
-tables to merge the N input packets.
-
-For applications, performing GRO in lightweight mode is simple. They
-just need to invoke ``rte_gro_reassemble_burst()``. Applications can get
-GROed packets as soon as ``rte_gro_reassemble_burst()`` returns.
-
-Heavyweight Mode
-~~~~~~~~~~~~~~~~
-
-The ``rte_gro_reassemble()`` function is used for reassembly in heavyweight
-mode. Compared with the lightweight mode, performing GRO in heavyweight mode
-is relatively complicated.
-
-Before performing GRO, applications need to create a GRO context object
-by calling ``rte_gro_ctx_create()``. A GRO context object holds the
-reassembly tables of desired GRO types. Note that all update/lookup
-operations on the context object are not thread safe. So if different
-processes or threads want to access the same context object simultaneously,
-some external syncing mechanisms must be used.
-
-Once the GRO context is created, applications can then use the
-``rte_gro_reassemble()`` function to merge packets. In each invocation,
-``rte_gro_reassemble()`` tries to merge input packets with the packets
-in the reassembly tables. If an input packet is an unsupported GRO type,
-or other errors happen (e.g. SYN bit is set), ``rte_gro_reassemble()``
-returns the packet to applications. Otherwise, the input packet is either
-merged or inserted into a reassembly table.
-
-When applications want to get GRO processed packets, they need to use
-``rte_gro_timeout_flush()`` to flush them from the tables manually.
+technique to reduce per-packet processing overheads. By reassembling
+small packets into larger ones, GRO enables applications to process
+fewer large packets directly, thus reducing the number of packets to
+be processed. To benefit DPDK-based applications, like Open vSwitch,
+DPDK also provides own GRO implementation. In DPDK, GRO is implemented
+as a standalone library. Applications explicitly use the GRO library to
+reassemble packets.
+
+Overview
+--------
+
+In the GRO library, there are many GRO types which are defined by packet
+types. One GRO type is in charge of process one kind of packets. For
+example, TCP/IPv4 GRO processes TCP/IPv4 packets.
+
+Each GRO type has a reassembly function, which defines own algorithm and
+table structure to reassemble packets. We assign input packets to the
+corresponding GRO functions by MBUF->packet_type.
+
+The GRO library doesn't check if input packets have correct checksums and
+doesn't re-calculate checksums for merged packets. The GRO library
+assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
+fragmentation is possible (i.e., DF==0). Additionally, it requires IPv4
+ID to be increased by one.
 
-TCP/IPv4 GRO
-------------
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+
+Two Sets of API
+---------------
+
+For different usage scenarios, the GRO library provides two sets of API.
+The one is called the lightweight mode API, which enables applications to
+merge a small number of packets rapidly; the other is called the
+heavyweight mode API, which provides fine-grained controls to
+applications and supports to merge a large number of packets.
+
+Lightweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+The lightweight mode only has one function ``rte_gro_reassemble_burst()``,
+which process N packets at a time. Using the lightweight mode API to
+merge packets is very simple. Calling ``rte_gro_reassemble_burst()`` is
+enough. The GROed packets are returned to applications as soon as it
+finishes.
+
+In ``rte_gro_reassemble_burst()``, table structures of different GRO
+types are allocated in the stack. This design simplifies applications'
+operations. However, limited by the stack size, the maximum number of
+packets that ``rte_gro_reassemble_burst()`` can process in an invocation
+should be less than or equal to ``RTE_GRO_MAX_BURST_ITEM_NUM``.
+
+Heavyweight Mode API
+~~~~~~~~~~~~~~~~~~~~
+
+Compared with the lightweight mode, using the heavyweight mode API is
+relatively complex. Firstly, applications need to create a GRO context
+by ``rte_gro_ctx_create()``. ``rte_gro_ctx_create()`` allocates tables
+structures in the heap and stores their pointers in the GRO context.
+Secondly, applications use ``rte_gro_reassemble()`` to merge packets.
+If input packets have invalid parameters, ``rte_gro_reassemble()``
+returns them to applications. For example, packets of unsupported GRO
+types or TCP SYN packets are returned. Otherwise, the input packets are
+either merged with the existed packets in the tables or inserted into the
+tables. Finally, applications use ``rte_gro_timeout_flush()`` to flush
+packets from the tables, when they want to get the GROed packets.
+
+Note that all update/lookup operations on the GRO context are not thread
+safe. So if different processes or threads want to access the same
+context object simultaneously, some external syncing mechanisms must be
+used.
+
+Reassembly Algorithm
+--------------------
 
-TCP/IPv4 GRO supports merging small TCP/IPv4 packets into large ones,
-using a table structure called the TCP/IPv4 reassembly table.
+The reassembly algorithm is used for reassembling packets. In the GRO
+library, different GRO types can use different algorithms. In this
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
 
-TCP/IPv4 Reassembly Table
-~~~~~~~~~~~~~~~~~~~~~~~~~
+Challenges
+~~~~~~~~~~
 
-A TCP/IPv4 reassembly table includes a "key" array and an "item" array.
-The key array keeps the criteria to merge packets and the item array
-keeps the packet information.
+The reassembly algorithm determines the efficiency of GRO. There are two
+challenges in the algorithm design:
 
-Each key in the key array points to an item group, which consists of
-packets which have the same criteria values but can't be merged. A key
-in the key array includes two parts:
+- a high cost algorithm/implementation would cause packet dropping in a
+  high speed network.
 
-* ``criteria``: the criteria to merge packets. If two packets can be
-  merged, they must have the same criteria values.
+- packet reordering makes it hard to merge packets. For example, Linux
+  GRO fails to merge packets when encounters packet reordering.
 
-* ``start_index``: the item array index of the first packet in the item
-  group.
+The above two challenges require our algorithm is:
 
-Each element in the item array keeps the information of a packet. An item
-in the item array mainly includes three parts:
+- lightweight enough to scale fast networking speed
 
-* ``firstseg``: the mbuf address of the first segment of the packet.
+- capable of handling packet reordering
 
-* ``lastseg``: the mbuf address of the last segment of the packet.
+In DPDK GRO, we use a key-based algorithm to address the two challenges.
 
-* ``next_pkt_index``: the item array index of the next packet in the same
-  item group. TCP/IPv4 GRO uses ``next_pkt_index`` to chain the packets
-  that have the same criteria value but can't be merged together.
+Key-based Reassembly Algorithm
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:numref:`figure_gro-key-algorithm` illustrates the procedure of the
+key-based algorithm. Packets are classified into "flows" by some header
+fields (we call them as "key"). To process an input packet, the algorithm
+searches for a matched "flow" (i.e., the same value of key) for the
+packet first, then checks all packets in the "flow" and tries to find a
+"neighbor" for it. If find a "neighbor", merge the two packets together.
+If can't find a "neighbor", store the packet into its "flow". If can't
+find a matched "flow", insert a new "flow" and store the packet into the
+"flow".
+
+.. note::
+        Packets in the same "flow" that can't merge are always caused
+        by packet reordering.
+
+The key-based algorithm has two characters:
+
+- classifying packets into "flows" to accelerate packet aggregation is
+  simple (address challenge 1).
+
+- storing out-of-order packets makes it possible to merge later (address
+  challenge 2).
+
+.. _figure_gro-key-algorithm:
+
+.. figure:: img/gro-key-algorithm.*
+   :align: center
+
+   Key-based Reassembly Algorithm
+
+TCP/IPv4 GRO
+------------
 
-Procedure to Reassemble a Packet
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The table structure used by TCP/IPv4 GRO contains two arrays: flow array
+and item array. The flow array keeps flow information, and the item array
+keeps packet information.
 
-To reassemble an incoming packet needs three steps:
+Header fields used to define a TCP/IPv4 flow include:
 
-#. Check if the packet should be processed. Packets with one of the
-   following properties aren't processed and are returned immediately:
+- source and destination: Ethernet and IP address, TCP port
 
-   * FIN, SYN, RST, URG, PSH, ECE or CWR bit is set.
+- TCP acknowledge number
 
-   * L4 payload length is 0.
+TCP/IPv4 packets whose FIN, SYN, RST, URG, PSH, ECE or CWR bit is set
+won't be processed.
 
-#.  Traverse the key array to find a key which has the same criteria
-    value with the incoming packet. If found, go to the next step.
-    Otherwise, insert a new key and a new item for the packet.
+Header fields deciding if two packets are neighbors include:
 
-#. Locate the first packet in the item group via ``start_index``. Then
-   traverse all packets in the item group via ``next_pkt_index``. If a
-   packet is found which can be merged with the incoming one, merge them
-   together. If one isn't found, insert the packet into this item group.
-   Note that to merge two packets is to link them together via mbuf's
-   ``next`` field.
+- TCP sequence number
 
-When packets are flushed from the reassembly table, TCP/IPv4 GRO updates
-packet header fields for the merged packets. Note that before reassembling
-the packet, TCP/IPv4 GRO doesn't check if the checksums of packets are
-correct. Also, TCP/IPv4 GRO doesn't re-calculate checksums for merged
-packets.
+- IPv4 ID. The IPv4 ID fields of the packets should be increased by 1.
diff --git a/doc/guides/prog_guide/img/gro-key-algorithm.svg b/doc/guides/prog_guide/img/gro-key-algorithm.svg
new file mode 100644
index 0000000..94e42f5
--- /dev/null
+++ b/doc/guides/prog_guide/img/gro-key-algorithm.svg
@@ -0,0 +1,223 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+<!-- Generated by Microsoft Visio 11.0, SVG Export, v1.0 gro-key-algorithm.svg Page-1 -->
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ev="http://www.w3.org/2001/xml-events"
+		xmlns:v="http://schemas.microsoft.com/visio/2003/SVGExtensions/" width="6.06163in" height="2.66319in"
+		viewBox="0 0 436.438 191.75" xml:space="preserve" color-interpolation-filters="sRGB" class="st10">
+	<v:documentProperties v:langID="1033" v:viewMarkup="false"/>
+
+	<style type="text/css">
+	<![CDATA[
+		.st1 {fill:url(#grad30-4);stroke:#404040;stroke-linecap:round;stroke-linejoin:round;stroke-width:0.25}
+		.st2 {fill:#000000;font-family:Calibri;font-size:1.00001em}
+		.st3 {font-size:1em;font-weight:bold}
+		.st4 {fill:#000000;font-family:Calibri;font-size:1.00001em;font-weight:bold}
+		.st5 {font-size:1em;font-weight:normal}
+		.st6 {marker-end:url(#mrkr5-38);stroke:#404040;stroke-linecap:round;stroke-linejoin:round;stroke-width:1}
+		.st7 {fill:#404040;fill-opacity:1;stroke:#404040;stroke-opacity:1;stroke-width:0.28409090909091}
+		.st8 {fill:none;stroke:none;stroke-linecap:round;stroke-linejoin:round;stroke-width:0.25}
+		.st9 {fill:#000000;font-family:Calibri;font-size:0.833336em}
+		.st10 {fill:none;fill-rule:evenodd;font-size:12px;overflow:visible;stroke-linecap:square;stroke-miterlimit:3}
+	]]>
+	</style>
+
+	<defs id="Patterns_And_Gradients">
+		<linearGradient id="grad30-4" v:fillPattern="30" v:foreground="#c6d09f" v:background="#d1dab4" x1="0" y1="1" x2="0" y2="0">
+			<stop offset="0" style="stop-color:#c6d09f;stop-opacity:1"/>
+			<stop offset="1" style="stop-color:#d1dab4;stop-opacity:1"/>
+		</linearGradient>
+		<linearGradient id="grad30-35" v:fillPattern="30" v:foreground="#f0f0f0" v:background="#ffffff" x1="0" y1="1" x2="0" y2="0">
+			<stop offset="0" style="stop-color:#f0f0f0;stop-opacity:1"/>
+			<stop offset="1" style="stop-color:#ffffff;stop-opacity:1"/>
+		</linearGradient>
+	</defs>
+	<defs id="Markers">
+		<g id="lend5">
+			<path d="M 2 1 L 0 0 L 1.98117 -0.993387 C 1.67173 -0.364515 1.67301 0.372641 1.98465 1.00043 " style="stroke:none"/>
+		</g>
+		<marker id="mrkr5-38" class="st7" v:arrowType="5" v:arrowSize="2" v:setback="6.16" refX="-6.16" orient="auto"
+				markerUnits="strokeWidth" overflow="visible">
+			<use xlink:href="#lend5" transform="scale(-3.52,-3.52) "/>
+		</marker>
+	</defs>
+	<g v:mID="0" v:index="1" v:groupContext="foregroundPage">
+		<title>Page-1</title>
+		<v:pageProperties v:drawingScale="1" v:pageScale="1" v:drawingUnits="0" v:shadowOffsetX="9" v:shadowOffsetY="-9"/>
+		<v:layer v:name="Connector" v:index="0"/>
+		<g id="shape1-1" v:mID="1" v:groupContext="shape" transform="translate(0.25,-117.25)">
+			<title>Rounded rectangle</title>
+			<desc>Categorize into an existed “flow”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="90" cy="173.75" width="180" height="36"/>
+			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
+						 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
+					class="st1"/>
+			<text x="8.91" y="177.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Categorize into an <tspan
+						class="st3">existed</tspan><tspan class="st3" v:langID="2052"> </tspan>“<tspan class="st3">flow</tspan>”</text>		</g>
+		<g id="shape2-9" v:mID="2" v:groupContext="shape" transform="translate(0.25,-58.75)">
+			<title>Rounded rectangle.2</title>
+			<desc>Search for a “neighbor”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="90" cy="173.75" width="180" height="36"/>
+			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
+						 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
+					class="st1"/>
+			<text x="32.19" y="177.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Search for a “<tspan
+						class="st3">neighbor</tspan>”</text>		</g>
+		<g id="shape3-14" v:mID="3" v:groupContext="shape" transform="translate(225.813,-117.25)">
+			<title>Rounded rectangle.3</title>
+			<desc>Insert a new “flow” and store the packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="105.188" cy="173.75" width="210.38" height="36"/>
+			<path d="M201.37 191.75 A9.00007 9.00007 -180 0 0 210.37 182.75 L210.37 164.75 A9.00007 9.00007 -180 0 0 201.37 155.75
+						 L9 155.75 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L201.37 191.75
+						 Z" class="st1"/>
+			<text x="5.45" y="177.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Insert a <tspan
+						class="st3">new </tspan>“<tspan class="st3">flow</tspan>” and <tspan class="st3">store </tspan>the packet</text>		</g>
+		<g id="shape4-21" v:mID="4" v:groupContext="shape" transform="translate(225.25,-58.75)">
+			<title>Rounded rectangle.4</title>
+			<desc>Store the packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="83.25" cy="173.75" width="166.5" height="36"/>
+			<path d="M157.5 191.75 A9.00007 9.00007 -180 0 0 166.5 182.75 L166.5 164.75 A9.00007 9.00007 -180 0 0 157.5 155.75 L9
+						 155.75 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L157.5 191.75 Z"
+					class="st1"/>
+			<text x="42.81" y="177.35" class="st4" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Store <tspan
+						class="st5">the packet</tspan></text>		</g>
+		<g id="shape5-26" v:mID="5" v:groupContext="shape" transform="translate(0.25,-0.25)">
+			<title>Rounded rectangle.5</title>
+			<desc>Merge the packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="90" cy="173.75" width="180" height="36"/>
+			<path d="M171 191.75 A9.00007 9.00007 -180 0 0 180 182.75 L180 164.75 A9.00007 9.00007 -180 0 0 171 155.75 L9 155.75
+						 A9.00007 9.00007 -180 0 0 -0 164.75 L0 182.75 A9.00007 9.00007 -180 0 0 9 191.75 L171 191.75 Z"
+					class="st1"/>
+			<text x="46.59" y="177.35" class="st4" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>Merge <tspan
+						class="st5">the packet</tspan></text>		</g>
+		<g id="shape6-31" v:mID="6" v:groupContext="shape" v:layerMember="0" transform="translate(81.25,-175.75)">
+			<title>Dynamic connector</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M9 191.75 L9 208.09" class="st6"/>
+		</g>
+		<g id="shape7-39" v:mID="7" v:groupContext="shape" v:layerMember="0" transform="translate(81.25,-117.25)">
+			<title>Dynamic connector.7</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M9 191.75 L9 208.09" class="st6"/>
+		</g>
+		<g id="shape8-45" v:mID="8" v:groupContext="shape" v:layerMember="0" transform="translate(81.25,-58.75)">
+			<title>Dynamic connector.8</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M9 191.75 L9 208.09" class="st6"/>
+		</g>
+		<g id="shape9-51" v:mID="9" v:groupContext="shape" v:layerMember="0" transform="translate(180.25,-126.25)">
+			<title>Dynamic connector.9</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M0 182.75 L39.4 182.75" class="st6"/>
+		</g>
+		<g id="shape10-57" v:mID="10" v:groupContext="shape" v:layerMember="0" transform="translate(180.25,-67.75)">
+			<title>Dynamic connector.10</title>
+			<v:userDefs>
+				<v:ud v:nameU="visVersion" v:val="VT0(14):26"/>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<path d="M0 182.75 L38.84 182.75" class="st6"/>
+		</g>
+		<g id="shape11-63" v:mID="11" v:groupContext="shape" transform="translate(65.5,-173.5)">
+			<title>Sheet.11</title>
+			<desc>packet</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="24.75" cy="182.75" width="49.5" height="18"/>
+			<rect x="0" y="173.75" width="49.5" height="18" class="st8"/>
+			<text x="8.46" y="186.35" class="st2" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>packet</text>		</g>
+		<g id="shape14-66" v:mID="14" v:groupContext="shape" transform="translate(98.125,-98.125)">
+			<title>Sheet.14</title>
+			<desc>find a “flow”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="32.0625" cy="183.875" width="64.13" height="15.75"/>
+			<rect x="0" y="176" width="64.125" height="15.75" class="st8"/>
+			<text x="6.41" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>find a “flow”</text>		</g>
+		<g id="shape15-69" v:mID="15" v:groupContext="shape" transform="translate(99.25,-39.625)">
+			<title>Sheet.15</title>
+			<desc>find a “neighbor”</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="40.5" cy="183.875" width="81" height="15.75"/>
+			<rect x="0" y="176" width="81" height="15.75" class="st8"/>
+			<text x="5.48" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>find a “neighbor”</text>		</g>
+		<g id="shape13-72" v:mID="13" v:groupContext="shape" transform="translate(181.375,-79)">
+			<title>Sheet.13</title>
+			<desc>not find</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="21.375" cy="183.875" width="42.75" height="15.75"/>
+			<rect x="0" y="176" width="42.75" height="15.75" class="st8"/>
+			<text x="5.38" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>not find</text>		</g>
+		<g id="shape12-75" v:mID="12" v:groupContext="shape" transform="translate(181.375,-137.5)">
+			<title>Sheet.12</title>
+			<desc>not find</desc>
+			<v:userDefs>
+				<v:ud v:nameU="msvThemeColors" v:val="VT0(36):26"/>
+				<v:ud v:nameU="msvThemeEffects" v:val="VT0(16):26"/>
+			</v:userDefs>
+			<v:textBlock v:margins="rect(4,4,4,4)"/>
+			<v:textRect cx="21.375" cy="183.875" width="42.75" height="15.75"/>
+			<rect x="0" y="176" width="42.75" height="15.75" class="st8"/>
+			<text x="5.38" y="186.88" class="st9" v:langID="1033"><v:paragraph v:horizAlign="1"/><v:tabList/>not find</text>		</g>
+	</g>
+</svg>
diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 03e5ccf..a38a06e 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -44,20 +44,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
 	}
 	tbl->max_item_num = entries_num;
 
-	size = sizeof(struct gro_tcp4_key) * entries_num;
-	tbl->keys = rte_zmalloc_socket(__func__,
+	size = sizeof(struct gro_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
 			size,
 			RTE_CACHE_LINE_SIZE,
 			socket_id);
-	if (tbl->keys == NULL) {
+	if (tbl->flows == NULL) {
 		rte_free(tbl->items);
 		rte_free(tbl);
 		return NULL;
 	}
-	/* INVALID_ARRAY_INDEX indicates empty key */
+	/* INVALID_ARRAY_INDEX indicates an empty flow */
 	for (i = 0; i < entries_num; i++)
-		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
-	tbl->max_key_num = entries_num;
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
 
 	return tbl;
 }
@@ -69,7 +69,7 @@ gro_tcp4_tbl_destroy(void *tbl)
 
 	if (tcp_tbl) {
 		rte_free(tcp_tbl->items);
-		rte_free(tcp_tbl->keys);
+		rte_free(tcp_tbl->flows);
 	}
 	rte_free(tcp_tbl);
 }
@@ -81,50 +81,46 @@ gro_tcp4_tbl_destroy(void *tbl)
  * the original packet.
  */
 static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
 		struct rte_mbuf *pkt,
-		uint16_t ip_id,
+		int cmp,
 		uint32_t sent_seq,
-		int cmp)
+		uint16_t ip_id)
 {
 	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t tcp_datalen;
+	uint16_t hdr_len;
 
 	if (cmp > 0) {
-		pkt_head = item_src->firstseg;
+		pkt_head = item->firstseg;
 		pkt_tail = pkt;
 	} else {
 		pkt_head = pkt;
-		pkt_tail = item_src->firstseg;
+		pkt_tail = item->firstseg;
 	}
 
-	/* check if the packet length will be beyond the max value */
-	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
-		pkt_tail->l3_len - pkt_tail->l4_len;
-	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
-			TCP4_MAX_L3_LENGTH)
+	/* check if the IPv4 packet length is greater than the max value */
+	hdr_len = pkt_head->l2_len + pkt_head->l3_len + pkt_head->l4_len;
+	if (unlikely(pkt_head->pkt_len - pkt_head->l2_len + pkt_tail->pkt_len -
+				hdr_len > MAX_IPV4_PKT_LENGTH))
 		return 0;
 
-	/* remove packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail,
-			pkt_tail->l2_len +
-			pkt_tail->l3_len +
-			pkt_tail->l4_len);
+	/* remove the packet header for the tail packet */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
 
 	/* chain two packets together */
 	if (cmp > 0) {
-		item_src->lastseg->next = pkt;
-		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
 		/* update IP ID to the larger value */
-		item_src->ip_id = ip_id;
+		item->ip_id = ip_id;
 	} else {
 		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item_src->firstseg;
-		item_src->firstseg = pkt;
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
 		/* update sent_seq to the smaller value */
-		item_src->sent_seq = sent_seq;
+		item->sent_seq = sent_seq;
 	}
-	item_src->nb_merged++;
+	item->nb_merged++;
 
 	/* update mbuf metadata for the merged packet */
 	pkt_head->nb_segs += pkt_tail->nb_segs;
@@ -133,45 +129,46 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
 	return 1;
 }
 
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
 static inline int
 check_seq_option(struct gro_tcp4_item *item,
-		struct tcp_hdr *tcp_hdr,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
+		struct tcp_hdr *tcph,
+		uint32_t sent_seq,
 		uint16_t ip_id,
-		uint32_t sent_seq)
+		uint16_t tcp_hl,
+		uint16_t tcp_dl)
 {
-	struct rte_mbuf *pkt0 = item->firstseg;
-	struct ipv4_hdr *ipv4_hdr0;
-	struct tcp_hdr *tcp_hdr0;
-	uint16_t tcp_hl0, tcp_dl0;
-	uint16_t len;
-
-	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
-			pkt0->l2_len);
-	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
-	tcp_hl0 = pkt0->l4_len;
-
-	/* check if TCP option fields equal. If not, return 0. */
-	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
-	if ((tcp_hl != tcp_hl0) ||
-			((len > 0) && (memcmp(tcp_hdr + 1,
-					tcp_hdr0 + 1,
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	struct ipv4_hdr *iph_orig;
+	struct tcp_hdr *tcph_orig;
+	uint16_t len, tcp_hl_orig;
+
+	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			pkt_orig->l2_len);
+	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+	tcp_hl_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr);
+	if ((tcp_hl != tcp_hl_orig) ||
+			((len > 0) && (memcmp(tcph + 1, tcph_orig + 1,
 					len) != 0)))
 		return 0;
 
 	/* check if the two packets are neighbors */
-	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
-	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
-			(ip_id == (item->ip_id + 1)))
+	len = pkt_orig->pkt_len - pkt_orig->l2_len - pkt_orig->l3_len -
+		tcp_hl_orig;
+	if ((sent_seq == item->sent_seq + len) && (ip_id == item->ip_id + 1))
 		/* append the new packet */
 		return 1;
-	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
-			((ip_id + item->nb_merged) == item->ip_id))
+	else if ((sent_seq + tcp_dl == item->sent_seq) &&
+			(ip_id + item->nb_merged == item->ip_id))
 		/* pre-pend the new packet */
 		return -1;
-	else
-		return 0;
+
+	return 0;
 }
 
 static inline uint32_t
@@ -187,13 +184,13 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
 }
 
 static inline uint32_t
-find_an_empty_key(struct gro_tcp4_tbl *tbl)
+find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 {
 	uint32_t i;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num;
 
-	for (i = 0; i < max_key_num; i++)
-		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
 			return i;
 	return INVALID_ARRAY_INDEX;
 }
@@ -201,10 +198,10 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
 static inline uint32_t
 insert_new_item(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
+		uint64_t start_time,
 		uint32_t prev_idx,
-		uint64_t start_time)
+		uint32_t sent_seq,
+		uint16_t ip_id)
 {
 	uint32_t item_idx;
 
@@ -221,7 +218,7 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 	tbl->items[item_idx].nb_merged = 1;
 	tbl->item_num++;
 
-	/* if the previous packet exists, chain the new one with it */
+	/* if the previous packet exists, chain them together. */
 	if (prev_idx != INVALID_ARRAY_INDEX) {
 		tbl->items[item_idx].next_pkt_idx =
 			tbl->items[prev_idx].next_pkt_idx;
@@ -237,7 +234,7 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 {
 	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
 
-	/* set NULL to firstseg to indicate it's an empty item */
+	/* NULL indicates an empty item */
 	tbl->items[item_idx].firstseg = NULL;
 	tbl->item_num--;
 	if (prev_item_idx != INVALID_ARRAY_INDEX)
@@ -247,44 +244,42 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 }
 
 static inline uint32_t
-insert_new_key(struct gro_tcp4_tbl *tbl,
-		struct tcp4_key *key_src,
+insert_new_flow(struct gro_tcp4_tbl *tbl,
+		struct tcp4_flow_key *src,
 		uint32_t item_idx)
 {
-	struct tcp4_key *key_dst;
-	uint32_t key_idx;
+	struct tcp4_flow_key *dst;
+	uint32_t flow_idx;
 
-	key_idx = find_an_empty_key(tbl);
-	if (key_idx == INVALID_ARRAY_INDEX)
+	flow_idx = find_an_empty_flow(tbl);
+	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
 		return INVALID_ARRAY_INDEX;
 
-	key_dst = &(tbl->keys[key_idx].key);
+	dst = &(tbl->flows[flow_idx].key);
 
-	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
-	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
-	key_dst->ip_src_addr = key_src->ip_src_addr;
-	key_dst->ip_dst_addr = key_src->ip_dst_addr;
-	key_dst->recv_ack = key_src->recv_ack;
-	key_dst->src_port = key_src->src_port;
-	key_dst->dst_port = key_src->dst_port;
+	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
+	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+	dst->ip_src_addr = src->ip_src_addr;
+	dst->ip_dst_addr = src->ip_dst_addr;
+	dst->recv_ack = src->recv_ack;
+	dst->src_port = src->src_port;
+	dst->dst_port = src->dst_port;
 
-	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
-	tbl->keys[key_idx].start_index = item_idx;
-	tbl->key_num++;
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
 
-	return key_idx;
+	return flow_idx;
 }
 
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
 static inline int
-is_same_key(struct tcp4_key k1, struct tcp4_key k2)
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
 {
-	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
-		return 0;
-
-	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
-		return 0;
-
-	return ((k1.ip_src_addr == k2.ip_src_addr) &&
+	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
+			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
+			(k1.ip_src_addr == k2.ip_src_addr) &&
 			(k1.ip_dst_addr == k2.ip_dst_addr) &&
 			(k1.recv_ack == k2.recv_ack) &&
 			(k1.src_port == k2.src_port) &&
@@ -292,7 +287,7 @@ is_same_key(struct tcp4_key k1, struct tcp4_key k2)
 }
 
 /*
- * update packet length for the flushed packet.
+ * update the packet length for the flushed packet.
  */
 static inline void
 update_header(struct gro_tcp4_item *item)
@@ -315,27 +310,31 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	struct ipv4_hdr *ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
 	uint32_t sent_seq;
-	uint16_t tcp_dl, ip_id;
+	uint16_t tcp_dl, ip_id, hdr_len;
 
-	struct tcp4_key key;
+	struct tcp4_flow_key key;
 	uint32_t cur_idx, prev_idx, item_idx;
-	uint32_t i, max_key_num;
+	uint32_t i, max_flow_num, remaining_flow_num;
 	int cmp;
+	uint8_t find;
 
 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
 	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
 
 	/*
-	 * if FIN, SYN, RST, PSH, URG, ECE or
-	 * CWR is set, return immediately.
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+	 * or CWR set.
 	 */
 	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
 		return -1;
-	/* if payload length is 0, return immediately */
-	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
-		pkt->l4_len;
-	if (tcp_dl == 0)
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
 		return -1;
 
 	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
@@ -349,25 +348,34 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	key.dst_port = tcp_hdr->dst_port;
 	key.recv_ack = tcp_hdr->recv_ack;
 
-	/* search for a key */
-	max_key_num = tbl->max_key_num;
-	for (i = 0; i < max_key_num; i++) {
-		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
-				is_same_key(tbl->keys[i].key, key))
-			break;
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	remaining_flow_num = tbl->flow_num;
+	find = 0;
+	for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
+			if (is_same_tcp4_flow(tbl->flows[i].key, key)) {
+				find = 1;
+				break;
+			}
+			remaining_flow_num--;
+		}
 	}
 
-	/* can't find a key, so insert a new key and a new item. */
-	if (i == tbl->max_key_num) {
-		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
-				INVALID_ARRAY_INDEX, start_time);
+	/*
+	 * Fail to find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (find == 0) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, ip_id);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
-		if (insert_new_key(tbl, &key, item_idx) ==
+		if (insert_new_flow(tbl, &key, item_idx) ==
 				INVALID_ARRAY_INDEX) {
 			/*
-			 * fail to insert a new key, so
-			 * delete the inserted item
+			 * Fail to insert a new flow, so delete the
+			 * stored packet.
 			 */
 			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
 			return -1;
@@ -375,24 +383,26 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		return 0;
 	}
 
-	/* traverse all packets in the item group to find one to merge */
-	cur_idx = tbl->keys[i].start_index;
+	/*
+	 * Check all packets in the flow and try to find a neighbor for
+	 * the input packet.
+	 */
+	cur_idx = tbl->flows[i].start_index;
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				pkt->l4_len, tcp_dl, ip_id, sent_seq);
+				sent_seq, ip_id, pkt->l4_len, tcp_dl);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, ip_id,
-						sent_seq, cmp))
+						pkt, cmp, sent_seq, ip_id))
 				return 1;
 			/*
-			 * fail to merge two packets since the packet
-			 * length will be greater than the max value.
-			 * So insert the packet into the item group.
+			 * Fail to merge the two packets, as the packet
+			 * length is greater than the max value. Store
+			 * the packet into the flow.
 			 */
-			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
-						prev_idx, start_time) ==
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, ip_id) ==
 					INVALID_ARRAY_INDEX)
 				return -1;
 			return 0;
@@ -401,12 +411,9 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		cur_idx = tbl->items[cur_idx].next_pkt_idx;
 	} while (cur_idx != INVALID_ARRAY_INDEX);
 
-	/*
-	 * can't find a packet in the item group to merge,
-	 * so insert the packet into the item group.
-	 */
-	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
-				start_time) == INVALID_ARRAY_INDEX)
+	/* Fail to find a neighbor, so store the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				ip_id) == INVALID_ARRAY_INDEX)
 		return -1;
 
 	return 0;
@@ -420,44 +427,33 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 {
 	uint16_t k = 0;
 	uint32_t i, j;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num;
 
-	for (i = 0; i < max_key_num; i++) {
-		/* all keys have been checked, return immediately */
-		if (tbl->key_num == 0)
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
 			return k;
 
-		j = tbl->keys[i].start_index;
+		j = tbl->flows[i].start_index;
 		while (j != INVALID_ARRAY_INDEX) {
 			if (tbl->items[j].start_time <= flush_timestamp) {
 				out[k++] = tbl->items[j].firstseg;
 				if (tbl->items[j].nb_merged > 1)
 					update_header(&(tbl->items[j]));
 				/*
-				 * delete the item and get
-				 * the next packet index
+				 * Delete the packet and get the next
+				 * packet in the flow.
 				 */
-				j = delete_item(tbl, j,
-						INVALID_ARRAY_INDEX);
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
 
-				/*
-				 * delete the key as all of
-				 * packets are flushed
-				 */
-				if (j == INVALID_ARRAY_INDEX) {
-					tbl->keys[i].start_index =
-						INVALID_ARRAY_INDEX;
-					tbl->key_num--;
-				} else
-					/* update start_index of the key */
-					tbl->keys[i].start_index = j;
-
-				if (k == nb_out)
+				if (unlikely(k == nb_out))
 					return k;
 			} else
 				/*
-				 * left packets of this key won't be
-				 * timeout, so go to check other keys.
+				 * The left packets in this flow won't be
+				 * timeout. Go to check other flows.
 				 */
 				break;
 		}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index d129523..49e03b4 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -9,13 +9,13 @@
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
 /*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of the L3
+ * header, the L4 header and the data payload.
  */
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
 
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow */
+struct tcp4_flow_key {
 	struct ether_addr eth_saddr;
 	struct ether_addr eth_daddr;
 	uint32_t ip_src_addr;
@@ -26,41 +26,38 @@ struct tcp4_key {
 	uint16_t dst_port;
 };
 
-struct gro_tcp4_key {
-	struct tcp4_key key;
+struct gro_tcp4_flow {
+	struct tcp4_flow_key key;
 	/*
-	 * the index of the first packet in the item group.
-	 * If the value is INVALID_ARRAY_INDEX, it means
-	 * the key is empty.
+	 * The index of the first packet in the flow.
+	 * INVALID_ARRAY_INDEX indicates an empty flow.
 	 */
 	uint32_t start_index;
 };
 
 struct gro_tcp4_item {
 	/*
-	 * first segment of the packet. If the value
+	 * The first MBUF segment of the packet. If the value
 	 * is NULL, it means the item is empty.
 	 */
 	struct rte_mbuf *firstseg;
-	/* last segment of the packet */
+	/* The last MBUF segment of the packet */
 	struct rte_mbuf *lastseg;
 	/*
-	 * the time when the first packet is inserted
-	 * into the table. If a packet in the table is
-	 * merged with an incoming packet, this value
-	 * won't be updated. We set this value only
-	 * when the first packet is inserted into the
-	 * table.
+	 * The time when the first packet is inserted into the table.
+	 * This value won't be updated, even if the packet is merged
+	 * with other packets.
 	 */
 	uint64_t start_time;
 	/*
-	 * we use next_pkt_idx to chain the packets that
-	 * have same key value but can't be merged together.
+	 * next_pkt_idx is used to chain the packets that
+	 * are in the same flow but can't be merged together
+	 * (e.g. caused by packet reordering).
 	 */
 	uint32_t next_pkt_idx;
-	/* the sequence number of the packet */
+	/* TCP sequence number of the packet */
 	uint32_t sent_seq;
-	/* the IP ID of the packet */
+	/* IPv4 ID of the packet */
 	uint16_t ip_id;
 	/* the number of merged packets */
 	uint16_t nb_merged;
@@ -72,31 +69,31 @@ struct gro_tcp4_item {
 struct gro_tcp4_tbl {
 	/* item array */
 	struct gro_tcp4_item *items;
-	/* key array */
-	struct gro_tcp4_key *keys;
+	/* flow array */
+	struct gro_tcp4_flow *flows;
 	/* current item number */
 	uint32_t item_num;
-	/* current key num */
-	uint32_t key_num;
+	/* current flow num */
+	uint32_t flow_num;
 	/* item array size */
 	uint32_t max_item_num;
-	/* key array size */
-	uint32_t max_key_num;
+	/* flow array size */
+	uint32_t max_flow_num;
 };
 
 /**
  * This function creates a TCP/IPv4 reassembly table.
  *
  * @param socket_id
- *  socket index for allocating TCP/IPv4 reassemble table
+ *  Socket index for allocating the TCP/IPv4 reassemble table
  * @param max_flow_num
- *  the maximum number of flows in the TCP/IPv4 GRO table
+ *  The maximum number of flows in the TCP/IPv4 GRO table
  * @param max_item_per_flow
- *  the maximum packet number per flow.
+ *  The maximum number of packets per flow
  *
  * @return
- *  if create successfully, return a pointer which points to the
- *  created TCP/IPv4 GRO table. Otherwise, return NULL.
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
  */
 void *gro_tcp4_tbl_create(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -106,62 +103,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
  * This function destroys a TCP/IPv4 reassembly table.
  *
  * @param tbl
- *  a pointer points to the TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table.
  */
 void gro_tcp4_tbl_destroy(void *tbl);
 
 /**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload.
  *
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==0). It returns the
+ * packet, if the packet has invalid parameters (e.g. SYN bit is set)
+ * or there is no available space in the table.
  *
  * @param pkt
- *  packet to reassemble.
+ *  Packet to reassemble
  * @param tbl
- *  a pointer that points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table
  * @start_time
- *  the start time that the packet is inserted into the table
+ *  The time when the packet is inserted into the table
  *
  * @return
- *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- *  or URG bit is set, or there is no available space in the table to
- *  insert a new item or a new key, return a negative value. If the
- *  packet is merged successfully, return an positive value. If the
- *  packet is inserted into the table, return 0.
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
  */
 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		struct gro_tcp4_tbl *tbl,
 		uint64_t start_time);
 
 /**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
  *
  * @param tbl
- *  a pointer that points to a TCP GRO table.
+ *  TCP/IPv4 reassembly table pointer
  * @param flush_timestamp
- *  this function flushes packets which are inserted into the table
- *  before or at the flush_timestamp.
+ *  Flush packets which are inserted into the table before or at the
+ *  flush_timestamp.
  * @param out
- *  pointer array which is used to keep flushed packets.
+ *  Pointer array used to keep flushed packets
  * @param nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number in 'out'. It also determines the maximum number of
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of packets that are returned.
+ *  The number of flushed packets
  */
 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		uint64_t flush_timestamp,
@@ -173,10 +164,10 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
  * reassembly table.
  *
  * @param tbl
- *  pointer points to a TCP/IPv4 reassembly table.
+ *  TCP/IPv4 reassembly table pointer
  *
  * @return
- *  the number of packets in the table
+ *  The number of packets in the table
  */
 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
 #endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index d6b8cd1..0b64866 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -23,11 +23,14 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, NULL};
 
+#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
+
 /*
- * GRO context structure, which is used to merge packets. It keeps
- * many reassembly tables of desired GRO types. Applications need to
- * create GRO context objects before using rte_gro_reassemble to
- * perform GRO.
+ * GRO context structure. It keeps the table structures, which are
+ * used to merge packets, for different GRO types. Before using
+ * rte_gro_reassemble(), applications need to create the GRO context
+ * first.
  */
 struct gro_ctx {
 	/* GRO types to perform */
@@ -85,8 +88,6 @@ rte_gro_ctx_destroy(void *ctx)
 	uint64_t gro_type_flag;
 	uint8_t i;
 
-	if (gro_ctx == NULL)
-		return;
 	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
 		gro_type_flag = 1ULL << i;
 		if ((gro_ctx->gro_types & gro_type_flag) == 0)
@@ -103,62 +104,54 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		const struct rte_gro_param *param)
 {
-	uint16_t i;
-	uint16_t nb_after_gro = nb_pkts;
-	uint32_t item_num;
-
 	/* allocate a reassembly table for TCP/IPv4 GRO */
 	struct gro_tcp4_tbl tcp_tbl;
-	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
-	uint16_t unprocess_num = 0;
+	uint32_t item_num;
 	int32_t ret;
-	uint64_t current_time;
+	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
 
-	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
 		return nb_pkts;
 
-	/* get the actual number of packets */
+	/* Get the maximum number of packets */
 	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
-			param->max_item_per_flow));
+				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
 	for (i = 0; i < item_num; i++)
-		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
 
-	tcp_tbl.keys = tcp_keys;
+	tcp_tbl.flows = tcp_flows;
 	tcp_tbl.items = tcp_items;
-	tcp_tbl.key_num = 0;
+	tcp_tbl.flow_num = 0;
 	tcp_tbl.item_num = 0;
-	tcp_tbl.max_key_num = item_num;
+	tcp_tbl.max_flow_num = item_num;
 	tcp_tbl.max_item_num = item_num;
 
-	current_time = rte_rdtsc();
-
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			ret = gro_tcp4_reassemble(pkts[i],
-					&tcp_tbl,
-					current_time);
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			/*
+			 * The timestamp is ignored, since all packets
+			 * will be flushed from the tables.
+			 */
+			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
 			if (ret > 0)
 				/* merge successfully */
 				nb_after_gro--;
-			else if (ret < 0) {
-				unprocess_pkts[unprocess_num++] =
-					pkts[i];
-			}
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 
-	/* re-arrange GROed packets */
 	if (nb_after_gro < nb_pkts) {
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
-				pkts, nb_pkts);
+		/* Flush all packets from the tables */
+		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
+		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
 					sizeof(struct rte_mbuf *) *
@@ -174,31 +167,28 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		void *ctx)
 {
-	uint16_t i, unprocess_num = 0;
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
+	void *tcp_tbl;
 	uint64_t current_time;
+	uint16_t i, unprocess_num = 0;
 
-	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
+	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
 		return nb_pkts;
 
+	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			if (gro_tcp4_reassemble(pkts[i],
-						gro_ctx->tbls
-						[RTE_GRO_TCP_IPV4_INDEX],
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 	if (unprocess_num > 0) {
-		memcpy(pkts, unprocess_pkts,
-				sizeof(struct rte_mbuf *) *
+		memcpy(pkts, unprocess_pkts, sizeof(struct rte_mbuf *) *
 				unprocess_num);
 	}
 
@@ -224,6 +214,7 @@ rte_gro_timeout_flush(void *ctx,
 				flush_timestamp,
 				out, max_nb_out);
 	}
+
 	return 0;
 }
 
@@ -232,19 +223,20 @@ rte_gro_get_pkt_count(void *ctx)
 {
 	struct gro_ctx *gro_ctx = ctx;
 	gro_tbl_pkt_count_fn pkt_count_fn;
+	uint64_t gro_types = gro_ctx->gro_types, flag;
 	uint64_t item_num = 0;
-	uint64_t gro_type_flag;
 	uint8_t i;
 
-	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
-		gro_type_flag = 1ULL << i;
-		if ((gro_ctx->gro_types & gro_type_flag) == 0)
+	for (i = 0; i < RTE_GRO_TYPE_MAX_NUM && gro_types; i++) {
+		flag = 1ULL << i;
+		if ((gro_types & flag) == 0)
 			continue;
 
+		gro_types ^= flag;
 		pkt_count_fn = tbl_pkt_count_fn[i];
-		if (pkt_count_fn == NULL)
-			continue;
-		item_num += pkt_count_fn(gro_ctx->tbls[i]);
+		if (pkt_count_fn)
+			item_num += pkt_count_fn(gro_ctx->tbls[i]);
 	}
+
 	return item_num;
 }
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index 81a2eac..85d8143 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -31,8 +31,8 @@ extern "C" {
 /**< TCP/IPv4 GRO flag */
 
 /**
- * A structure which is used to create GRO context objects or tell
- * rte_gro_reassemble_burst() what reassembly rules are demanded.
+ * Structure used to create GRO context objects or used to pass
+ * application-determined parameters to rte_gro_reassemble_burst().
  */
 struct rte_gro_param {
 	uint64_t gro_types;
@@ -78,26 +78,23 @@ void rte_gro_ctx_destroy(void *ctx);
 
 /**
  * This is one of the main reassembly APIs, which merges numbers of
- * packets at a time. It assumes that all inputted packets are with
- * correct checksums. That is, applications should guarantee all
- * inputted packets are correct. Besides, it doesn't re-calculate
- * checksums for merged packets. If inputted packets are IP fragmented,
- * this function assumes them are complete (i.e. with L4 header). After
- * finishing processing, it returns all GROed packets to applications
- * immediately.
+ * packets at a time. It doesn't check if input packets have correct
+ * checksums and doesn't re-calculate checksums for merged packets.
+ * It assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==0). The GROed packets
+ * are returned as soon as the function finishes.
  *
  * @param pkts
- *  a pointer array which points to the packets to reassemble. Besides,
- *  it keeps mbuf addresses for the GROed packets.
+ *  Pointer array pointing to the packets to reassemble. Besides, it
+ *  keeps MBUF addresses for the GROed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param param
- *  applications use it to tell rte_gro_reassemble_burst() what rules
- *  are demanded.
+ *  Application-determined parameters for reassembling packets.
  *
  * @return
- *  the number of packets after been GROed. If no packets are merged,
- *  the returned value is nb_pkts.
+ *  The number of packets after been GROed. If no packets are merged,
+ *  the return value is equals to nb_pkts.
  */
 uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -107,32 +104,28 @@ uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * Reassembly function, which tries to merge inputted packets with
- * the packets in the reassembly tables of a given GRO context. This
- * function assumes all inputted packets are with correct checksums.
- * And it won't update checksums if two packets are merged. Besides,
- * if inputted packets are IP fragmented, this function assumes they
- * are complete packets (i.e. with L4 header).
+ * Reassembly function, which tries to merge input packets with the
+ * existed packets in the reassembly tables of a given GRO context.
+ * It doesn't check if input packets have correct checksums and doesn't
+ * re-calculate checksums for merged packets. Additionally, it assumes
+ * the packets are complete (i.e., MF==0 && frag_off==0), when IP
+ * fragmentation is possible (i.e., DF==0).
  *
- * If the inputted packets don't have data or are with unsupported GRO
- * types etc., they won't be processed and are returned to applications.
- * Otherwise, the inputted packets are either merged or inserted into
- * the table. If applications want get packets in the table, they need
- * to call flush API.
+ * If the input packets have invalid parameters (e.g. no data payload,
+ * unsupported GRO types), they are returned to applications. Otherwise,
+ * they are either merged or inserted into the table. Applications need
+ * to flush packets from the tables by flush API, if they want to get the
+ * GROed packets.
  *
  * @param pkts
- *  packet to reassemble. Besides, after this function finishes, it
- *  keeps the unprocessed packets (e.g. without data or unsupported
- *  GRO types).
+ *  Packets to reassemble. It's also used to store the unprocessed packets.
  * @param nb_pkts
- *  the number of packets to reassemble.
+ *  The number of packets to reassemble
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer
  *
  * @return
- *  return the number of unprocessed packets (e.g. without data or
- *  unsupported GRO types). If all packets are processed (merged or
- *  inserted into the table), return 0.
+ *  The number of unprocessed packets.
  */
 uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
@@ -142,29 +135,28 @@ uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
- * This function flushes the timeout packets from reassembly tables of
- * desired GRO types. The max number of flushed timeout packets is the
- * element number of the array which is used to keep the flushed packets.
+ * This function flushes the timeout packets from the reassembly tables
+ * of desired GRO types. The max number of flushed packets is the
+ * element number of 'out'.
  *
- * Besides, this function won't re-calculate checksums for merged
- * packets in the tables. That is, the returned packets may be with
- * wrong checksums.
+ * Additionally, the flushed packets may have incorrect checksums, since
+ * this function doesn't re-calculate checksums for merged packets.
  *
  * @param ctx
- *  a pointer points to a GRO context object.
+ *  GRO context object pointer.
  * @param timeout_cycles
- *  max TTL for packets in reassembly tables, measured in nanosecond.
+ *  The max TTL for packets in reassembly tables, measured in nanosecond.
  * @param gro_types
- *  this function only flushes packets which belong to the GRO types
- *  specified by gro_types.
+ *  This function flushes packets whose GRO types are specified by
+ *  gro_types.
  * @param out
- *  a pointer array that is used to keep flushed timeout packets.
+ *  Pointer array used to keep flushed packets.
  * @param max_nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number of 'out'. It's also the max number of timeout
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of flushed packets. If no packets are flushed, return 0.
+ *  The number of flushed packets.
  */
 uint16_t rte_gro_timeout_flush(void *ctx,
 		uint64_t timeout_cycles,
@@ -180,10 +172,10 @@ uint16_t rte_gro_timeout_flush(void *ctx,
  * of a given GRO context.
  *
  * @param ctx
- *  pointer points to a GRO context object.
+ *  GRO context object pointer.
  *
  * @return
- *  the number of packets in all reassembly tables.
+ *  The number of packets in the tables.
  */
 uint64_t rte_gro_get_pkt_count(void *ctx);
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v5 2/3] gro: comply RFC 6864 to process IPv4 ID
  2018-01-10 14:03       ` [PATCH v5 0/3] Support " Jiayu Hu
  2018-01-10 14:03         ` [PATCH v5 1/3] gro: codes cleanup Jiayu Hu
@ 2018-01-10 14:03         ` Jiayu Hu
  2018-01-10 14:03         ` [PATCH v5 3/3] gro: support VxLAN GRO Jiayu Hu
  2018-01-11 22:15         ` [PATCH v5 0/3] Support " Thomas Monjalon
  3 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2018-01-10 14:03 UTC (permalink / raw)
  To: dev; +Cc: thomas, junjie.j.chen, jianfeng.tan, lei.a.yao, Jiayu Hu

This patch complies RFC 6864 to process IPv4 ID fields. Specifically, GRO
ingores IPv4 ID fields for the packets whose DF bit is 1, and checks IPv4
ID fields for the packets whose DF bit is 0.

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Junjie Chen <junjie.j.chen@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     | 14 ++++++--
 lib/librte_gro/gro_tcp4.c                          | 39 ++++++++++++++++------
 lib/librte_gro/gro_tcp4.h                          |  2 ++
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index 1652e64..c2d7a41 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -54,8 +54,8 @@ corresponding GRO functions by MBUF->packet_type.
 The GRO library doesn't check if input packets have correct checksums and
 doesn't re-calculate checksums for merged packets. The GRO library
 assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
-fragmentation is possible (i.e., DF==0). Additionally, it requires IPv4
-ID to be increased by one.
+fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
+6864 to process the IPv4 ID field.
 
 Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
 
@@ -182,4 +182,12 @@ Header fields deciding if two packets are neighbors include:
 
 - TCP sequence number
 
-- IPv4 ID. The IPv4 ID fields of the packets should be increased by 1.
+- IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
+  be increased by 1.
+
+.. note::
+        We comply RFC 6864 to process the IPv4 ID field. Specifically,
+        we check IPv4 ID fields for the packets whose DF bit is 0 and
+        ignore IPv4 ID fields for the packets whose DF bit is 1.
+        Additionally, packets which have different value of DF bit can't
+        be merged.
diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index a38a06e..309cdc7 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -138,7 +138,8 @@ check_seq_option(struct gro_tcp4_item *item,
 		uint32_t sent_seq,
 		uint16_t ip_id,
 		uint16_t tcp_hl,
-		uint16_t tcp_dl)
+		uint16_t tcp_dl,
+		uint8_t is_atomic)
 {
 	struct rte_mbuf *pkt_orig = item->firstseg;
 	struct ipv4_hdr *iph_orig;
@@ -157,14 +158,19 @@ check_seq_option(struct gro_tcp4_item *item,
 					len) != 0)))
 		return 0;
 
+	/* Don't merge packets whose DF bits are different */
+	if (unlikely(item->is_atomic ^ is_atomic))
+		return 0;
+
 	/* check if the two packets are neighbors */
 	len = pkt_orig->pkt_len - pkt_orig->l2_len - pkt_orig->l3_len -
 		tcp_hl_orig;
-	if ((sent_seq == item->sent_seq + len) && (ip_id == item->ip_id + 1))
+	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
+				(ip_id == item->ip_id + 1)))
 		/* append the new packet */
 		return 1;
-	else if ((sent_seq + tcp_dl == item->sent_seq) &&
-			(ip_id + item->nb_merged == item->ip_id))
+	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
+			(ip_id + item->nb_merged == item->ip_id)))
 		/* pre-pend the new packet */
 		return -1;
 
@@ -201,7 +207,8 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 		uint64_t start_time,
 		uint32_t prev_idx,
 		uint32_t sent_seq,
-		uint16_t ip_id)
+		uint16_t ip_id,
+		uint8_t is_atomic)
 {
 	uint32_t item_idx;
 
@@ -216,6 +223,7 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 	tbl->items[item_idx].sent_seq = sent_seq;
 	tbl->items[item_idx].ip_id = ip_id;
 	tbl->items[item_idx].nb_merged = 1;
+	tbl->items[item_idx].is_atomic = is_atomic;
 	tbl->item_num++;
 
 	/* if the previous packet exists, chain them together. */
@@ -310,7 +318,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	struct ipv4_hdr *ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
 	uint32_t sent_seq;
-	uint16_t tcp_dl, ip_id, hdr_len;
+	uint16_t tcp_dl, ip_id, hdr_len, frag_off;
+	uint8_t is_atomic;
 
 	struct tcp4_flow_key key;
 	uint32_t cur_idx, prev_idx, item_idx;
@@ -337,7 +346,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	if (tcp_dl <= 0)
 		return -1;
 
-	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+	/*
+	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IPv4 ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
 	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
 
 	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
@@ -368,7 +383,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	 */
 	if (find == 0) {
 		item_idx = insert_new_item(tbl, pkt, start_time,
-				INVALID_ARRAY_INDEX, sent_seq, ip_id);
+				INVALID_ARRAY_INDEX, sent_seq, ip_id,
+				is_atomic);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
 		if (insert_new_flow(tbl, &key, item_idx) ==
@@ -391,7 +407,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				sent_seq, ip_id, pkt->l4_len, tcp_dl);
+				sent_seq, ip_id, pkt->l4_len, tcp_dl,
+				is_atomic);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
 						pkt, cmp, sent_seq, ip_id))
@@ -402,7 +419,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 			 * the packet into the flow.
 			 */
 			if (insert_new_item(tbl, pkt, start_time, prev_idx,
-						sent_seq, ip_id) ==
+						sent_seq, ip_id, is_atomic) ==
 					INVALID_ARRAY_INDEX)
 				return -1;
 			return 0;
@@ -413,7 +430,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 
 	/* Fail to find a neighbor, so store the packet into the flow. */
 	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
-				ip_id) == INVALID_ARRAY_INDEX)
+				ip_id, is_atomic) == INVALID_ARRAY_INDEX)
 		return -1;
 
 	return 0;
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 49e03b4..af128c9 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -61,6 +61,8 @@ struct gro_tcp4_item {
 	uint16_t ip_id;
 	/* the number of merged packets */
 	uint16_t nb_merged;
+	/* Indicate if IPv4 ID can be ignored */
+	uint8_t is_atomic;
 };
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v5 3/3] gro: support VxLAN GRO
  2018-01-10 14:03       ` [PATCH v5 0/3] Support " Jiayu Hu
  2018-01-10 14:03         ` [PATCH v5 1/3] gro: codes cleanup Jiayu Hu
  2018-01-10 14:03         ` [PATCH v5 2/3] gro: comply RFC 6864 to process IPv4 ID Jiayu Hu
@ 2018-01-10 14:03         ` Jiayu Hu
  2018-01-11 22:15         ` [PATCH v5 0/3] Support " Thomas Monjalon
  3 siblings, 0 replies; 31+ messages in thread
From: Jiayu Hu @ 2018-01-10 14:03 UTC (permalink / raw)
  To: dev; +Cc: thomas, junjie.j.chen, jianfeng.tan, lei.a.yao, Jiayu Hu

This patch adds a framework that allows GRO on tunneled packets.
Furthermore, it leverages that framework to provide GRO support for
VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
IPv4 header, and contain an inner TCP/IPv4 packet.

VxLAN GRO doesn't check if input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it assumes
the packets are complete (i.e., MF==0 && frag_off==0), when IP
fragmentation is possible (i.e., DF==0).

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Junjie Chen <junjie.j.chen@intel.com>
Tested-by: Lei Yao <lei.a.yao@intel.com>
---
 .../prog_guide/generic_receive_offload_lib.rst     |  31 +-
 lib/librte_gro/Makefile                            |   1 +
 lib/librte_gro/gro_tcp4.c                          | 124 +-----
 lib/librte_gro/gro_tcp4.h                          | 126 ++++++
 lib/librte_gro/gro_vxlan_tcp4.c                    | 494 +++++++++++++++++++++
 lib/librte_gro/gro_vxlan_tcp4.h                    | 156 +++++++
 lib/librte_gro/rte_gro.c                           | 129 ++++--
 lib/librte_gro/rte_gro.h                           |   5 +-
 8 files changed, 916 insertions(+), 150 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_tcp4.h

diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog_guide/generic_receive_offload_lib.rst
index c2d7a41..078bec0 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
 fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
 6864 to process the IPv4 ID field.
 
-Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
+Currently, the GRO library provides GRO supports for TCP/IPv4 packets and
+VxLAN packets which contain an outer IPv4 header and an inner TCP/IPv4
+packet.
 
 Two Sets of API
 ---------------
@@ -108,7 +110,8 @@ Reassembly Algorithm
 
 The reassembly algorithm is used for reassembling packets. In the GRO
 library, different GRO types can use different algorithms. In this
-section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
+section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
+and VxLAN GRO.
 
 Challenges
 ~~~~~~~~~~
@@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors include:
 - IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
   be increased by 1.
 
+VxLAN GRO
+---------
+
+The table structure used by VxLAN GRO, which is in charge of processing
+VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
+similar with that of TCP/IPv4 GRO. Differently, the header fields used
+to define a VxLAN flow include:
+
+- outer source and destination: Ethernet and IP address, UDP port
+
+- VxLAN header (VNI and flag)
+
+- inner source and destination: Ethernet and IP address, TCP port
+
+Header fields deciding if packets are neighbors include:
+
+- outer IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
+  outer IPv4 header is 0, should be increased by 1.
+
+- inner TCP sequence number
+
+- inner IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
+  inner IPv4 header is 0, should be increased by 1.
+
 .. note::
         We comply RFC 6864 to process the IPv4 ID field. Specifically,
         we check IPv4 ID fields for the packets whose DF bit is 0 and
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index 63df236..bec248f 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -17,6 +17,7 @@ LIBABIVER := 1
 # source files
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 309cdc7..2c0f35c 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -6,8 +6,6 @@
 #include <rte_mbuf.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
 
 #include "gro_tcp4.h"
 
@@ -74,109 +72,6 @@ gro_tcp4_tbl_destroy(void *tbl)
 	rte_free(tcp_tbl);
 }
 
-/*
- * merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item,
-		struct rte_mbuf *pkt,
-		int cmp,
-		uint32_t sent_seq,
-		uint16_t ip_id)
-{
-	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t hdr_len;
-
-	if (cmp > 0) {
-		pkt_head = item->firstseg;
-		pkt_tail = pkt;
-	} else {
-		pkt_head = pkt;
-		pkt_tail = item->firstseg;
-	}
-
-	/* check if the IPv4 packet length is greater than the max value */
-	hdr_len = pkt_head->l2_len + pkt_head->l3_len + pkt_head->l4_len;
-	if (unlikely(pkt_head->pkt_len - pkt_head->l2_len + pkt_tail->pkt_len -
-				hdr_len > MAX_IPV4_PKT_LENGTH))
-		return 0;
-
-	/* remove the packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail, hdr_len);
-
-	/* chain two packets together */
-	if (cmp > 0) {
-		item->lastseg->next = pkt;
-		item->lastseg = rte_pktmbuf_lastseg(pkt);
-		/* update IP ID to the larger value */
-		item->ip_id = ip_id;
-	} else {
-		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item->firstseg;
-		item->firstseg = pkt;
-		/* update sent_seq to the smaller value */
-		item->sent_seq = sent_seq;
-	}
-	item->nb_merged++;
-
-	/* update mbuf metadata for the merged packet */
-	pkt_head->nb_segs += pkt_tail->nb_segs;
-	pkt_head->pkt_len += pkt_tail->pkt_len;
-
-	return 1;
-}
-
-/*
- * Check if two TCP/IPv4 packets are neighbors.
- */
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
-		struct tcp_hdr *tcph,
-		uint32_t sent_seq,
-		uint16_t ip_id,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
-		uint8_t is_atomic)
-{
-	struct rte_mbuf *pkt_orig = item->firstseg;
-	struct ipv4_hdr *iph_orig;
-	struct tcp_hdr *tcph_orig;
-	uint16_t len, tcp_hl_orig;
-
-	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
-			pkt_orig->l2_len);
-	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
-	tcp_hl_orig = pkt_orig->l4_len;
-
-	/* Check if TCP option fields equal */
-	len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr);
-	if ((tcp_hl != tcp_hl_orig) ||
-			((len > 0) && (memcmp(tcph + 1, tcph_orig + 1,
-					len) != 0)))
-		return 0;
-
-	/* Don't merge packets whose DF bits are different */
-	if (unlikely(item->is_atomic ^ is_atomic))
-		return 0;
-
-	/* check if the two packets are neighbors */
-	len = pkt_orig->pkt_len - pkt_orig->l2_len - pkt_orig->l3_len -
-		tcp_hl_orig;
-	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
-				(ip_id == item->ip_id + 1)))
-		/* append the new packet */
-		return 1;
-	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
-			(ip_id + item->nb_merged == item->ip_id)))
-		/* pre-pend the new packet */
-		return -1;
-
-	return 0;
-}
-
 static inline uint32_t
 find_an_empty_item(struct gro_tcp4_tbl *tbl)
 {
@@ -280,21 +175,6 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
 }
 
 /*
- * Check if two TCP/IPv4 packets belong to the same flow.
- */
-static inline int
-is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
-{
-	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
-			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
-			(k1.ip_src_addr == k2.ip_src_addr) &&
-			(k1.ip_dst_addr == k2.ip_dst_addr) &&
-			(k1.recv_ack == k2.recv_ack) &&
-			(k1.src_port == k2.src_port) &&
-			(k1.dst_port == k2.dst_port));
-}
-
-/*
  * update the packet length for the flushed packet.
  */
 static inline void
@@ -407,11 +287,11 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				sent_seq, ip_id, pkt->l4_len, tcp_dl,
+				sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
 				is_atomic);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, cmp, sent_seq, ip_id))
+						pkt, cmp, sent_seq, ip_id, 0))
 				return 1;
 			/*
 			 * Fail to merge the two packets, as the packet
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index af128c9..6bb30cd 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -5,6 +5,9 @@
 #ifndef _GRO_TCP4_H_
 #define _GRO_TCP4_H_
 
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
@@ -172,4 +175,127 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
  *  The number of packets in the table
  */
 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
+			is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
+			(k1.ip_src_addr == k2.ip_src_addr) &&
+			(k1.ip_dst_addr == k2.ip_dst_addr) &&
+			(k1.recv_ack == k2.recv_ack) &&
+			(k1.src_port == k2.src_port) &&
+			(k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+	uint16_t hdr_len, l2_len;
+
+	if (cmp > 0) {
+		pkt_head = item->firstseg;
+		pkt_tail = pkt;
+	} else {
+		pkt_head = pkt;
+		pkt_tail = item->firstseg;
+	}
+
+	/* check if the IPv4 packet length is greater than the max value */
+	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+		pkt_head->l4_len;
+	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
+	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
+				hdr_len > MAX_IPV4_PKT_LENGTH))
+		return 0;
+
+	/* remove the packet header for the tail packet */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+	/* chain two packets together */
+	if (cmp > 0) {
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
+		/* update IP ID to the larger value */
+		item->ip_id = ip_id;
+	} else {
+		lastseg = rte_pktmbuf_lastseg(pkt);
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
+		/* update sent_seq to the smaller value */
+		item->sent_seq = sent_seq;
+		item->ip_id = ip_id;
+	}
+	item->nb_merged++;
+
+	/* update MBUF metadata for the merged packet */
+	pkt_head->nb_segs += pkt_tail->nb_segs;
+	pkt_head->pkt_len += pkt_tail->pkt_len;
+
+	return 1;
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+		struct tcp_hdr *tcph,
+		uint32_t sent_seq,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint16_t l2_offset,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	struct ipv4_hdr *iph_orig;
+	struct tcp_hdr *tcph_orig;
+	uint16_t len, tcp_hl_orig;
+
+	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			l2_offset + pkt_orig->l2_len);
+	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+	tcp_hl_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr);
+	if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
+				(memcmp(tcph + 1, tcph_orig + 1,
+					len) != 0)))
+		return 0;
+
+	/* Don't merge packets whose DF bits are different */
+	if (unlikely(item->is_atomic ^ is_atomic))
+		return 0;
+
+	/* check if the two packets are neighbors */
+	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+		pkt_orig->l3_len - tcp_hl_orig;
+	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
+				(ip_id == item->ip_id + 1)))
+		/* append the new packet */
+		return 1;
+	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
+				(ip_id + item->nb_merged == item->ip_id)))
+		/* pre-pend the new packet */
+		return -1;
+
+	return 0;
+}
 #endif
diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c
new file mode 100644
index 0000000..ca86f01
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.c
@@ -0,0 +1,494 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_udp.h>
+
+#include "gro_vxlan_tcp4.h"
+
+void *
+gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_vxlan_tcp4_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_vxlan_tcp4_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_vxlan_tcp4_tbl_destroy(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
+
+	if (vxlan_tbl) {
+		rte_free(vxlan_tbl->items);
+		rte_free(vxlan_tbl->flows);
+	}
+	rte_free(vxlan_tbl);
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_item_num = tbl->max_item_num, i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (tbl->items[i].inner_item.firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl)
+{
+	uint32_t max_flow_num = tbl->max_flow_num, i;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
+		struct rte_mbuf *pkt,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(tbl);
+	if (unlikely(item_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	tbl->items[item_idx].inner_item.firstseg = pkt;
+	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
+	tbl->items[item_idx].inner_item.start_time = start_time;
+	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
+	tbl->items[item_idx].inner_item.sent_seq = sent_seq;
+	tbl->items[item_idx].inner_item.ip_id = ip_id;
+	tbl->items[item_idx].inner_item.nb_merged = 1;
+	tbl->items[item_idx].inner_item.is_atomic = is_atomic;
+	tbl->items[item_idx].outer_ip_id = outer_ip_id;
+	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
+	tbl->item_num++;
+
+	/* If the previous packet exists, chain the new one with it. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		tbl->items[item_idx].inner_item.next_pkt_idx =
+			tbl->items[prev_idx].inner_item.next_pkt_idx;
+		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_vxlan_tcp4_tbl *tbl,
+		uint32_t item_idx,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
+
+	/* NULL indicates an empty item. */
+	tbl->items[item_idx].inner_item.firstseg = NULL;
+	tbl->item_num--;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
+		struct vxlan_tcp4_flow_key *src,
+		uint32_t item_idx)
+{
+	struct vxlan_tcp4_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	ether_addr_copy(&(src->inner_key.eth_saddr),
+			&(dst->inner_key.eth_saddr));
+	ether_addr_copy(&(src->inner_key.eth_daddr),
+			&(dst->inner_key.eth_daddr));
+	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
+	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
+	dst->inner_key.recv_ack = src->inner_key.recv_ack;
+	dst->inner_key.src_port = src->inner_key.src_port;
+	dst->inner_key.dst_port = src->inner_key.dst_port;
+
+	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
+	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
+	ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
+	ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
+	dst->outer_ip_src_addr = src->outer_ip_src_addr;
+	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
+	dst->outer_src_port = src->outer_src_port;
+	dst->outer_dst_port = src->outer_dst_port;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+static inline int
+is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
+		struct vxlan_tcp4_flow_key k2)
+{
+	return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr) &&
+			is_same_ether_addr(&k1.outer_eth_daddr,
+				&k2.outer_eth_daddr) &&
+			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
+			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
+			(k1.outer_src_port == k2.outer_src_port) &&
+			(k1.outer_dst_port == k2.outer_dst_port) &&
+			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
+			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
+			is_same_tcp4_flow(k1.inner_key, k2.inner_key));
+}
+
+static inline int
+check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
+		struct tcp_hdr *tcp_hdr,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint8_t outer_is_atomic,
+		uint8_t is_atomic)
+{
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	int cmp;
+	uint16_t l2_offset;
+
+	/* Don't merge packets whose outer DF bits are different. */
+	if (unlikely(item->outer_is_atomic ^ outer_is_atomic))
+		return 0;
+
+	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
+	cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
+			tcp_hl, tcp_dl, l2_offset, is_atomic);
+	if ((cmp > 0) && (outer_is_atomic ||
+				(outer_ip_id == item->outer_ip_id + 1)))
+		/* Append the new packet. */
+		return 1;
+	else if ((cmp < 0) && (outer_is_atomic ||
+				(outer_ip_id + item->inner_item.nb_merged ==
+				 item->outer_ip_id)))
+		/* Prepend the new packet. */
+		return -1;
+
+	return 0;
+}
+
+static inline int
+merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint32_t sent_seq,
+		uint16_t outer_ip_id,
+		uint16_t ip_id)
+{
+	if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
+				ip_id, pkt->outer_l2_len +
+				pkt->outer_l3_len)) {
+		/* Update the outer IPv4 ID to the large value. */
+		item->outer_ip_id = cmp > 0 ? outer_ip_id : item->outer_ip_id;
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void
+update_vxlan_header(struct gro_vxlan_tcp4_item *item)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct udp_hdr *udp_hdr;
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	uint16_t len;
+
+	/* Update the outer IPv4 header. */
+	len = pkt->pkt_len - pkt->outer_l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Update the outer UDP header. */
+	len -= pkt->outer_l3_len;
+	udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
+
+	/* Update the inner IPv4 header. */
+	len -= pkt->l2_len;
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+}
+
+int32_t
+gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time)
+{
+	struct ether_hdr *outer_eth_hdr, *eth_hdr;
+	struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
+	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
+	struct vxlan_hdr *vxlan_hdr;
+	uint32_t sent_seq;
+	uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
+	uint8_t outer_is_atomic, is_atomic;
+
+	struct vxlan_tcp4_flow_key key;
+	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t i, max_flow_num, remaining_flow_num;
+	int cmp;
+	uint16_t hdr_len;
+	uint8_t find;
+
+	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
+			pkt->outer_l2_len);
+	udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
+			pkt->outer_l3_len);
+	vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
+			sizeof(struct udp_hdr));
+	eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
+			sizeof(struct vxlan_hdr));
+	ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+
+	/*
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG,
+	 * ECE or CWR set.
+	 */
+	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
+		return -1;
+
+	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len + pkt->l4_len;
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
+		return -1;
+
+	/*
+	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IPv4 ID is ignored.
+	 */
+	frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
+	outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	outer_ip_id = outer_is_atomic ? 0 :
+		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
+	ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+	ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
+	ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
+	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
+	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.inner_key.recv_ack = tcp_hdr->recv_ack;
+	key.inner_key.src_port = tcp_hdr->src_port;
+	key.inner_key.dst_port = tcp_hdr->dst_port;
+
+	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
+	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
+	ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
+	ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
+	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
+	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
+	key.outer_src_port = udp_hdr->src_port;
+	key.outer_dst_port = udp_hdr->dst_port;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	remaining_flow_num = tbl->flow_num;
+	find = 0;
+	for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
+			if (is_same_vxlan_tcp4_flow(tbl->flows[i].key, key)) {
+				find = 1;
+				break;
+			}
+			remaining_flow_num--;
+		}
+	}
+
+	/*
+	 * Can't find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (find == 0) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
+				ip_id, outer_is_atomic, is_atomic);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+				INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so
+			 * delete the inserted packet.
+			 */
+			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+
+	/* Check all packets in the flow and try to find a neighbor. */
+	cur_idx = tbl->flows[i].start_index;
+	prev_idx = cur_idx;
+	do {
+		cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
+				sent_seq, outer_ip_id, ip_id, pkt->l4_len,
+				tcp_dl, outer_is_atomic, is_atomic);
+		if (cmp) {
+			if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
+						pkt, cmp, sent_seq,
+						outer_ip_id, ip_id))
+				return 1;
+			/*
+			 * Can't merge two packets, as the packet
+			 * length will be greater than the max value.
+			 * Insert the packet into the flow.
+			 */
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						sent_seq, outer_ip_id,
+						ip_id, outer_is_atomic,
+						is_atomic) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+		prev_idx = cur_idx;
+		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Can't find neighbor. Insert the packet into the flow. */
+	if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
+				outer_ip_id, ip_id, outer_is_atomic,
+				is_atomic) == INVALID_ARRAY_INDEX)
+		return -1;
+
+	return 0;
+}
+
+uint16_t
+gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].inner_item.start_time <=
+					flush_timestamp) {
+				out[k++] = tbl->items[j].inner_item.firstseg;
+				if (tbl->items[j].inner_item.nb_merged > 1)
+					update_vxlan_header(&(tbl->items[j]));
+				/*
+				 * Delete the item and get the next packet
+				 * index.
+				 */
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in the flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_vxlan_tcp4_tbl_pkt_count(void *tbl)
+{
+	struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h
new file mode 100644
index 0000000..0cafb92
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_tcp4.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _GRO_VXLAN_TCP4_H_
+#define _GRO_VXLAN_TCP4_H_
+
+#include "gro_tcp4.h"
+
+#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a VxLAN flow */
+struct vxlan_tcp4_flow_key {
+	struct tcp4_flow_key inner_key;
+	struct vxlan_hdr vxlan_hdr;
+
+	struct ether_addr outer_eth_saddr;
+	struct ether_addr outer_eth_daddr;
+
+	uint32_t outer_ip_src_addr;
+	uint32_t outer_ip_dst_addr;
+
+	/* Outer UDP ports */
+	uint16_t outer_src_port;
+	uint16_t outer_dst_port;
+
+};
+
+struct gro_vxlan_tcp4_flow {
+	struct vxlan_tcp4_flow_key key;
+	/*
+	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
+	 * indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+struct gro_vxlan_tcp4_item {
+	struct gro_tcp4_item inner_item;
+	/* IPv4 ID in the outer IPv4 header */
+	uint16_t outer_ip_id;
+	/* Indicate if outer IPv4 ID can be ignored */
+	uint8_t outer_is_atomic;
+};
+
+/*
+ * VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet)
+ * reassembly table structure
+ */
+struct gro_vxlan_tcp4_tbl {
+	/* item array */
+	struct gro_vxlan_tcp4_item *items;
+	/* flow array */
+	struct gro_vxlan_tcp4_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow number */
+	uint32_t flow_num;
+	/* the maximum item number */
+	uint32_t max_item_num;
+	/* the maximum flow number */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a VxLAN reassembly table for VxLAN packets
+ * which have an outer IPv4 header and an inner TCP/IPv4 packet.
+ *
+ * @param socket_id
+ *  Socket index for allocating the table
+ * @param max_flow_num
+ *  The maximum number of flows in the table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a VxLAN reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ */
+void gro_vxlan_tcp4_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a VxLAN packet which has an outer IPv4 header and
+ * an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP
+ * header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which
+ * doesn't have payload.
+ *
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0), when
+ * IP fragmentation is possible (i.e., DF==0). It returns the packet, if
+ * the packet has invalid parameters (e.g. SYN bit is set) or there is no
+ * available space in the table.
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
+ */
+int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in the VxLAN reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  Pointer pointing to a VxLAN GRO table
+ * @param flush_timestamp
+ *  This function flushes packets which are inserted into the table
+ *  before or at the flush_timestamp.
+ * @param out
+ *  Pointer array used to keep flushed packets
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets
+ */
+uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a VxLAN
+ * reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ *
+ * @return
+ *  The number of packets in the table
+ */
+uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 0b64866..6618f4d 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -9,6 +9,7 @@
 
 #include "rte_gro.h"
 #include "gro_tcp4.h"
+#include "gro_vxlan_tcp4.h"
 
 typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -17,15 +18,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
 typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
-		gro_tcp4_tbl_create, NULL};
+		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_destroy, NULL};
+			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
+			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
-			gro_tcp4_tbl_pkt_count, NULL};
+			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
+			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
 		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
 
+#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
+		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
+		 RTE_PTYPE_TUNNEL_VXLAN) && \
+		 ((ptype & RTE_PTYPE_INNER_L4_TCP) == \
+		  RTE_PTYPE_INNER_L4_TCP) && \
+		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
+		    (RTE_PTYPE_INNER_L3_IPV4 | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
+
 /*
  * GRO context structure. It keeps the table structures, which are
  * used to merge packets, for different GRO types. Before using
@@ -109,12 +123,20 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
+	/* Allocate a reassembly table for VXLAN GRO */
+	struct gro_vxlan_tcp4_tbl vxlan_tbl;
+	struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
+		{{0}, 0, 0} };
+
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	uint32_t item_num;
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
+	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0;
 
-	if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
+	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0))
 		return nb_pkts;
 
 	/* Get the maximum number of packets */
@@ -122,22 +144,47 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
-	for (i = 0; i < item_num; i++)
-		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_tbl.flows = vxlan_flows;
+		vxlan_tbl.items = vxlan_items;
+		vxlan_tbl.flow_num = 0;
+		vxlan_tbl.item_num = 0;
+		vxlan_tbl.max_flow_num = item_num;
+		vxlan_tbl.max_item_num = item_num;
+		do_vxlan_gro = 1;
+	}
 
-	tcp_tbl.flows = tcp_flows;
-	tcp_tbl.items = tcp_items;
-	tcp_tbl.flow_num = 0;
-	tcp_tbl.item_num = 0;
-	tcp_tbl.max_flow_num = item_num;
-	tcp_tbl.max_item_num = item_num;
+	if (param->gro_types & RTE_GRO_TCP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		tcp_tbl.flows = tcp_flows;
+		tcp_tbl.items = tcp_items;
+		tcp_tbl.flow_num = 0;
+		tcp_tbl.item_num = 0;
+		tcp_tbl.max_flow_num = item_num;
+		tcp_tbl.max_item_num = item_num;
+		do_tcp4_gro = 1;
+	}
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
-			/*
-			 * The timestamp is ignored, since all packets
-			 * will be flushed from the tables.
-			 */
+		/*
+		 * The timestamp is ignored, since all packets
+		 * will be flushed from the tables.
+		 */
+		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_gro) {
+			ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0);
+			if (ret > 0)
+				/* Merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp4_gro) {
 			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
 			if (ret > 0)
 				/* merge successfully */
@@ -149,8 +196,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 	}
 
 	if (nb_after_gro < nb_pkts) {
+		i = 0;
 		/* Flush all packets from the tables */
-		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
+		if (do_vxlan_gro) {
+			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
+					0, pkts, nb_pkts);
+		}
+		if (do_tcp4_gro) {
+			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
+					&pkts[i], nb_pkts - i);
+		}
 		/* Copy unprocessed packets */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
@@ -169,18 +224,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 {
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
-	void *tcp_tbl;
+	void *tcp_tbl, *vxlan_tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
+	uint8_t do_tcp4_gro, do_vxlan_gro;
 
-	if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
+	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
+					RTE_GRO_TCP_IPV4)) == 0))
 		return nb_pkts;
 
 	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
+	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
+
+	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
+		RTE_GRO_TCP_IPV4;
+	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
+		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
+
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_gro) {
+			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
+				do_tcp4_gro) {
 			if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
@@ -204,18 +274,27 @@ rte_gro_timeout_flush(void *ctx,
 {
 	struct gro_ctx *gro_ctx = ctx;
 	uint64_t flush_timestamp;
+	uint16_t num = 0;
 
 	gro_types = gro_types & gro_ctx->gro_types;
 	flush_timestamp = rte_rdtsc() - timeout_cycles;
 
-	if (gro_types & RTE_GRO_TCP_IPV4) {
-		return gro_tcp4_tbl_timeout_flush(
+	if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
+		num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
+				RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
+				flush_timestamp, out, max_nb_out);
+		max_nb_out -= num;
+	}
+
+	/* If no available space in 'out', stop flushing. */
+	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
+		num += gro_tcp4_tbl_timeout_flush(
 				gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
 				flush_timestamp,
-				out, max_nb_out);
+				&out[num], max_nb_out);
 	}
 
-	return 0;
+	return num;
 }
 
 uint64_t
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index 85d8143..8d781b5 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -23,12 +23,15 @@ extern "C" {
  */
 #define RTE_GRO_TYPE_MAX_NUM 64
 /**< the max number of supported GRO types */
-#define RTE_GRO_TYPE_SUPPORT_NUM 1
+#define RTE_GRO_TYPE_SUPPORT_NUM 2
 /**< the number of currently supported GRO types */
 
 #define RTE_GRO_TCP_IPV4_INDEX 0
 #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)
 /**< TCP/IPv4 GRO flag */
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1
+#define RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
+/**< VxLAN GRO flag. */
 
 /**
  * Structure used to create GRO context objects or used to pass
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v5 0/3] Support VxLAN GRO
  2018-01-10 14:03       ` [PATCH v5 0/3] Support " Jiayu Hu
                           ` (2 preceding siblings ...)
  2018-01-10 14:03         ` [PATCH v5 3/3] gro: support VxLAN GRO Jiayu Hu
@ 2018-01-11 22:15         ` Thomas Monjalon
  3 siblings, 0 replies; 31+ messages in thread
From: Thomas Monjalon @ 2018-01-11 22:15 UTC (permalink / raw)
  To: Jiayu Hu; +Cc: dev, junjie.j.chen, jianfeng.tan, lei.a.yao

10/01/2018 15:03, Jiayu Hu:
> Jiayu Hu (3):
>   gro: codes cleanup
>   gro: comply RFC 6864 to process IPv4 ID
>   gro: support VxLAN GRO

Applied, thanks

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2018-01-11 22:16 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-25  3:17 [PATCH 0/2] Support VxLAN GRO Jiayu Hu
2017-11-25  3:17 ` [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup Jiayu Hu
2017-11-25  3:17 ` [PATCH 2/2] gro: support VxLAN GRO Jiayu Hu
2017-12-14  2:49 ` [PATCH v2 0/2] Support " Jiayu Hu
2017-12-14  2:49   ` [PATCH v2 1/2] gro: code cleanup Jiayu Hu
2017-12-14  2:49   ` [PATCH v2 2/2] gro: support VxLAN GRO Jiayu Hu
2017-12-14  2:58     ` Stephen Hemminger
2017-12-14  3:02     ` Stephen Hemminger
2017-12-14  4:37       ` Hu, Jiayu
2017-12-22  7:25   ` [PATCH v3 0/2] Support " Jiayu Hu
2017-12-22  7:25     ` [PATCH v3 1/2] gro: code cleanup Jiayu Hu
2017-12-29  3:53       ` Chen, Junjie J
2018-01-02 11:26       ` Bruce Richardson
2018-01-03  1:07         ` Tan, Jianfeng
2018-01-03  1:27           ` Stephen Hemminger
2017-12-22  7:25     ` [PATCH v3 2/2] gro: support VxLAN GRO Jiayu Hu
2017-12-22  8:17       ` Chen, Junjie J
2017-12-25  6:36         ` Jiayu Hu
2017-12-29  3:53       ` Chen, Junjie J
2017-12-29  3:52     ` [PATCH v3 0/2] Support " Chen, Junjie J
2018-01-05  6:12     ` [PATCH v4 " Jiayu Hu
2018-01-05  6:12       ` [PATCH v4 1/2] gro: code cleanup Jiayu Hu
2018-01-08  1:15         ` Yao, Lei A
2018-01-10  0:09         ` Thomas Monjalon
2018-01-10  1:55           ` Hu, Jiayu
2018-01-05  6:12       ` [PATCH v4 2/2] gro: support VxLAN GRO Jiayu Hu
2018-01-10 14:03       ` [PATCH v5 0/3] Support " Jiayu Hu
2018-01-10 14:03         ` [PATCH v5 1/3] gro: codes cleanup Jiayu Hu
2018-01-10 14:03         ` [PATCH v5 2/3] gro: comply RFC 6864 to process IPv4 ID Jiayu Hu
2018-01-10 14:03         ` [PATCH v5 3/3] gro: support VxLAN GRO Jiayu Hu
2018-01-11 22:15         ` [PATCH v5 0/3] Support " Thomas Monjalon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.