All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 7/7] cxgb3 - Add SW LRO support
@ 2007-02-22 11:59 divy
  0 siblings, 0 replies; 9+ messages in thread
From: divy @ 2007-02-22 11:59 UTC (permalink / raw)
  To: jeff; +Cc: netdev, linux-kernel, swise

From: Divy Le Ray <divy@chelsio.com>

Add all-in-sw lro support.

Signed-off-by: Divy Le Ray <divy@chelsio.com>
---

 drivers/net/cxgb3/adapter.h     |   21 ++
 drivers/net/cxgb3/common.h      |    1 
 drivers/net/cxgb3/cxgb3_ioctl.h |    1 
 drivers/net/cxgb3/cxgb3_main.c  |   16 ++
 drivers/net/cxgb3/sge.c         |  341 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/cxgb3/t3_cpl.h      |   10 +
 6 files changed, 384 insertions(+), 6 deletions(-)

diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 80c3d8f..576db4a 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -95,6 +95,23 @@ struct sge_fl {			/* SGE per free-buffer
 	unsigned long alloc_failed; /* # of times buffer allocation failed */
 };
 
+/* Max active LRO sessions per queue set */
+#define MAX_LRO_PER_QSET 8
+
+struct sge_lro_session {
+	struct sk_buff *skb;
+	struct sk_buff *skb_last_frag;
+	u32 seq;
+	u16 iplen;
+};
+
+struct sge_lro {
+	unsigned int enabled;
+	unsigned int num_active;
+	struct sge_lro_session *last_s;
+	struct sge_lro_session s[MAX_LRO_PER_QSET];
+};
+
 /*
  * Bundle size for grouping offload RX packets for delivery to the stack.
  * Don't make this too big as we do prefetch on each packet in a bundle.
@@ -164,6 +181,9 @@ enum {				/* per port SGE statistics */
 	SGE_PSTAT_TX_CSUM,	/* # of TX checksum offloads */
 	SGE_PSTAT_VLANEX,	/* # of VLAN tag extractions */
 	SGE_PSTAT_VLANINS,	/* # of VLAN tag insertions */
+	SGE_PSTATS_LRO_QUEUED,	/* # of LRO appended packets */
+	SGE_PSTATS_LRO_FLUSHED,	/* # of LRO flushed packets */
+	SGE_PSTATS_LRO_X_STREAMS,	/* # of exceeded LRO contexts */
 
 	SGE_PSTAT_MAX		/* must be last */
 };
@@ -171,6 +191,7 @@ enum {				/* per port SGE statistics */
 struct sge_qset {		/* an SGE queue set */
 	struct sge_rspq rspq;
 	struct sge_fl fl[SGE_RXQ_PER_SET];
+	struct sge_lro lro;
 	struct sge_txq txq[SGE_TXQ_PER_SET];
 	struct net_device *netdev;	/* associated net device */
 	unsigned long txq_stopped;	/* which Tx queues are stopped */
diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h
index e23deeb..1031ad0 100644
--- a/drivers/net/cxgb3/common.h
+++ b/drivers/net/cxgb3/common.h
@@ -322,6 +322,7 @@ struct tp_params {
 
 struct qset_params {		/* SGE queue set parameters */
 	unsigned int polling;	/* polling/interrupt service for rspq */
+	unsigned int lro;	/* large receive offload */
 	unsigned int coalesce_usecs;	/* irq coalescing timer */
 	unsigned int rspq_size;	/* # of entries in response queue */
 	unsigned int fl_size;	/* # of entries in regular free list */
diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h
index 42d4bf6..52bb2be 100644
--- a/drivers/net/cxgb3/cxgb3_ioctl.h
+++ b/drivers/net/cxgb3/cxgb3_ioctl.h
@@ -90,6 +90,7 @@ struct ch_qset_params {
 	int32_t fl_size[2];
 	int32_t intr_lat;
 	int32_t polling;
+	int32_t lro;
 	int32_t cong_thres;
 };
 
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 7ff834e..b78eefb 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1031,7 +1031,11 @@ static char stats_strings[][ETH_GSTRING_
 	"VLANinsertions     ",
 	"TxCsumOffload      ",
 	"RxCsumGood         ",
-	"RxDrops            "
+	"RxDrops            ",
+
+	"LroQueued          ",
+	"LroFlushed         ",
+	"LroExceededSessions"
 };
 
 static int get_stats_count(struct net_device *dev)
@@ -1145,6 +1149,9 @@ static void get_stats(struct net_device
 	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM);
 	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD);
 	*data++ = s->rx_cong_drops;
+	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_QUEUED);
+	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_FLUSHED);
+	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_X_STREAMS);
 }
 
 static inline void reg_block_dump(struct adapter *ap, void *buf,
@@ -1624,6 +1631,12 @@ static int cxgb_extension_ioctl(struct n
 				}
 			}
 		}
+		if (t.lro >= 0) {
+			struct sge_qset *qs = &adapter->sge.qs[t.qset_idx];
+			
+			q->lro = t.lro;
+			qs->lro.enabled = t.lro;
+		}
 		break;
 	}
 	case CHELSIO_GET_QSET_PARAMS:{
@@ -1643,6 +1656,7 @@ static int cxgb_extension_ioctl(struct n
 		t.fl_size[0] = q->fl_size;
 		t.fl_size[1] = q->jumbo_size;
 		t.polling = q->polling;
+		t.lro = q->lro;
 		t.intr_lat = q->coalesce_usecs;
 		t.cong_thres = q->cong_thres;
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 850bcc0..c4f8c6a 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -35,6 +35,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <net/tcp.h>
 #include <linux/dma-mapping.h>
 #include "common.h"
 #include "regs.h"
@@ -1710,6 +1711,324 @@ static void rx_eth(struct adapter *adap,
 		netif_rx(skb);
 }
 
+#define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETH_HLEN)
+#define SKB_HASHVAL(skb) (skb->priority)
+#define LRO_SESSION_IDX_HINT(skb) (SKB_HASHVAL(skb) & (MAX_LRO_PER_QSET - 1))
+#define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_PER_QSET - 1))
+#define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_PER_QSET - 1)
+
+static inline struct sge_lro_session *lro_session(struct sge_lro *l, int idx)
+{
+	return l->s + idx;
+}
+
+static inline int lro_match_session(struct sge_lro_session *s,
+				    struct iphdr *iph, struct tcphdr *tcph)
+{
+	struct iphdr *s_iph = (struct iphdr *)(s->skb->data + IPH_OFFSET);
+	struct tcphdr *s_tcph = (struct tcphdr *)(s_iph + 1);
+
+	return *(u32 *) & tcph->source == *(u32 *) & s_tcph->source &&
+	    iph->saddr == s_iph->saddr && iph->daddr == s_iph->daddr;
+}
+
+static inline struct sge_lro_session *lro_find_session(struct sge_lro *l,
+						       int idx,
+						       struct iphdr *iph,
+						       struct tcphdr *tcph)
+{
+	struct sge_lro_session *s;
+	int active = 0;
+
+	while (active < l->num_active) {
+		s = lro_session(l, idx);
+		if (s->skb) {
+			if (lro_match_session(s, iph, tcph)) {
+				l->last_s = s;
+				return s;
+			}
+			active++;
+		}
+		LRO_IDX_INC(idx);
+	}
+
+	return NULL;
+}
+
+static inline void lro_new_session_init(struct sge_lro_session *s,
+					struct sk_buff *skb)
+{
+	struct iphdr *ih = (struct iphdr *)(skb->data + IPH_OFFSET);
+	struct tcphdr *th = (struct tcphdr *)(ih + 1);
+	int iplen = ntohs(ih->tot_len);
+
+	s->skb = skb;
+	s->iplen = iplen;
+	s->seq = ntohl(th->seq) + iplen - sizeof(*ih) - (th->doff << 2);
+}
+
+static void lro_flush_session(struct adapter *adap, struct sge_qset *qs,
+			      struct sge_lro_session *s, struct sk_buff *skb)
+{
+	struct sge_lro *l = &qs->lro;
+	struct iphdr *ih = (struct iphdr *)(s->skb->data + IPH_OFFSET);
+
+	ih->tot_len = htons(s->iplen);
+	ih->check = 0;
+	ih->check = ip_fast_csum((unsigned char *)ih, ih->ihl);
+
+	rx_eth(adap, &qs->rspq, s->skb, 2);
+
+	s->skb = skb;
+	if (skb)
+		lro_new_session_init(s, skb);
+	else
+		l->num_active--;
+
+	qs->port_stats[SGE_PSTATS_LRO_FLUSHED]++;
+}
+
+static inline struct sge_lro_session *lro_new_session(struct adapter *adap,
+						      struct sge_qset *qs,
+						      struct sk_buff *skb)
+{
+	struct sge_lro *l = &qs->lro;
+	int idx = LRO_SESSION_IDX_HINT(skb);
+	struct sge_lro_session *s = lro_session(l, idx);
+
+	if (likely(!s->skb))
+		goto done;
+
+	BUG_ON(l->num_active > MAX_LRO_PER_QSET);
+	if (l->num_active == MAX_LRO_PER_QSET) {
+		lro_flush_session(adap, qs, s, skb);
+		qs->port_stats[SGE_PSTATS_LRO_X_STREAMS]++;
+		return s;
+	}
+
+	while (1) {
+		LRO_IDX_INC(idx);
+		s = lro_session(l, idx);
+		if (!s->skb)
+			break;
+	}
+
+done:
+	lro_new_session_init(s, skb);
+
+	l->num_active++;
+	return s;
+}
+
+static inline void sge_lro_flush_all(struct adapter *adap, struct sge_qset *qs)
+{
+	struct sge_lro *l = &qs->lro;
+	struct sge_lro_session *s = l->last_s;
+	int active = 0, idx = 0, num_active = l->num_active;
+
+	if (unlikely(!s))
+		s = lro_session(l, idx);
+
+	while (active < num_active) {
+		if (s->skb) {
+			lro_flush_session(adap, qs, s, NULL);
+			active++;
+		}
+		LRO_IDX_INC(idx);
+		s = lro_session(l, idx);
+	}
+}
+
+static inline int can_lro_packet(struct cpl_rx_pkt *cpl, unsigned int rss_hi)
+{
+	struct ethhdr *eh = (struct ethhdr *)(cpl + 1);
+	struct iphdr *ih = (struct iphdr *)(eh + 1);
+
+	if (unlikely(G_HASHTYPE(ntohl(rss_hi)) != RSS_HASH_4_TUPLE ||
+		     (*((u8 *) cpl + 1) & 0x90) != 0x10 ||
+		     cpl->csum != 0xffff || eh->h_proto != ntohs(ETH_P_IP) ||
+		     ih->ihl != (sizeof(*ih) >> 2))) {
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline int can_lro_tcpsegment(struct tcphdr *th)
+{
+	int olen = (th->doff << 2) - sizeof(*th);
+	u8 control_bits = *((u8 *) th + 13);
+
+	if (unlikely((control_bits & 0xB7) != 0x10))
+		goto no_lro;
+
+	if (olen) {
+		u32 *ptr = (u32 *) (th + 1);
+		if (unlikely(olen != TCPOLEN_TSTAMP_ALIGNED ||
+			     *ptr != ntohl((TCPOPT_NOP << 24) |
+					   (TCPOPT_NOP << 16) |
+					   (TCPOPT_TIMESTAMP << 8) |
+					   TCPOLEN_TIMESTAMP)))
+			goto no_lro;
+	}
+
+	return 1;
+
+no_lro:
+	return 0;
+}
+
+static inline int lro_update_session(struct sge_lro_session *s,
+				     unsigned char *va,
+				     struct skb_frag_struct *frag,
+				     struct sk_buff *skb)
+{
+	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(s->skb->data + 2);
+	struct cpl_rx_pkt *ncpl = (struct cpl_rx_pkt *)(va + 2);
+	struct iphdr *nih = (struct iphdr *)(va + IPH_OFFSET);
+	struct tcphdr *th, *nth = (struct tcphdr *)(nih + 1);
+	u32 seq = ntohl(nth->seq);
+	int plen, tcpiphlen, olen = (nth->doff << 2) - sizeof(*nth);
+
+	if (cpl->vlan_valid && cpl->vlan != ncpl->vlan)
+		return -1;
+
+	if (unlikely(seq != s->seq))
+		return -1;
+
+	th = (struct tcphdr *)(s->skb->data + IPH_OFFSET +
+			       sizeof(struct iphdr));
+
+	if (olen) {
+		u32 *ptr = (u32 *) (th + 1), *nptr = (u32 *) (nth + 1);
+
+		if (unlikely(ntohl(*(ptr + 1)) > ntohl(*(nptr + 1)) ||
+			     !*(nptr + 2)))
+			return -1;
+
+		*(ptr + 1) = *(nptr + 1);
+		*(ptr + 2) = *(nptr + 2);
+	}
+	th->ack_seq = nth->ack_seq;
+	th->window = nth->window;
+
+	tcpiphlen = (nth->doff << 2) + sizeof(*nih);
+	plen = ntohs(nih->tot_len) - tcpiphlen;
+	s->seq += plen;
+	s->iplen += plen;
+	s->skb->data_len += plen;
+	s->skb->len += plen;
+	s->skb->truesize += plen;
+
+	if (plen > skb_shinfo(s->skb)->gso_size)
+		skb_shinfo(s->skb)->gso_size = plen;
+
+	if (unlikely(skb)) {
+		skb_pull(skb, skb->len - plen);
+		if (unlikely(!skb_shinfo(s->skb)->frag_list))
+			skb_shinfo(s->skb)->frag_list = skb;
+		else
+			s->skb_last_frag->next = skb;
+		s->skb_last_frag = skb;
+	} else {
+		int nr = skb_shinfo(s->skb)->nr_frags;
+		skb_shinfo(s->skb)->frags[nr].page = frag->page;
+		skb_shinfo(s->skb)->frags[nr].page_offset =
+		    frag->page_offset + IPH_OFFSET + tcpiphlen;
+		skb_shinfo(s->skb)->frags[nr].size = plen;
+		skb_shinfo(s->skb)->nr_frags = ++nr;
+	}
+
+	return 0;
+}
+
+static inline int rx_eth_lro_page(struct adapter *adap, struct sge_qset *qs,
+				  struct sge_fl_page *p, u32 hash, u32 csum,
+				  int *lro)
+{
+	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(p->va + 2);
+	struct iphdr *ih;
+	struct tcphdr *th;
+	struct sge_lro_session *s = NULL;
+
+	if (!can_lro_packet(cpl, csum)) {
+		*lro = 0;
+		goto no_lro;
+	}
+
+	ih = (struct iphdr *)(p->va + IPH_OFFSET);
+	th = (struct tcphdr *)(ih + 1);
+	s = lro_find_session(&qs->lro, LRO_SESSION_IDX_HINT_HASH(hash), ih, th);
+	if (unlikely(!s))
+		goto no_lro;
+
+	/* If we already started LRO via chaining skbs, keep doing it that way.
+	 */
+	if (unlikely(skb_shinfo(s->skb)->frag_list))
+		return -1;
+
+	if (unlikely(!can_lro_tcpsegment(th)))
+		goto no_lro;
+
+	if (lro_update_session(s, p->va, &p->frag, NULL))
+		goto no_lro;
+
+	if (unlikely(skb_shinfo(s->skb)->nr_frags == MAX_SKB_FRAGS ||
+		     s->skb->len + qs->netdev->mtu > 65535))
+		lro_flush_session(adap, qs, s, NULL);
+
+	qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
+
+	return 0;
+
+no_lro:
+	if (s)
+		lro_flush_session(adap, qs, s, NULL);
+
+	return -1;
+}
+
+static void rx_eth_lro_skb(struct adapter *adap, struct sge_rspq *rq,
+			   struct sk_buff *skb, int ethpad)
+{
+	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(skb->data + ethpad);
+	struct sge_qset *qs = rspq_to_qset(rq);
+	struct iphdr *ih;
+	struct tcphdr *th;
+	struct sge_lro_session *s = NULL;
+
+	if (!can_lro_packet(cpl, skb->csum))
+		goto no_lro;
+
+	ih = (struct iphdr *)(skb->data + IPH_OFFSET);
+	th = (struct tcphdr *)(ih + 1);
+	s = lro_find_session(&qs->lro,
+			     LRO_SESSION_IDX_HINT_HASH(skb->priority), ih, th);
+
+	if (unlikely(!can_lro_tcpsegment(th)))
+		goto no_lro;
+	else if (unlikely(!s))
+		s = lro_new_session(adap, qs, skb);
+	else {
+		if (lro_update_session(s, skb->data, NULL, skb)) {
+			lro_flush_session(adap, qs, s, skb);
+			return;
+		}
+
+		if (unlikely(s->skb->len + qs->netdev->mtu > 65535))
+			lro_flush_session(adap, qs, s, NULL);
+	}
+
+	qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
+	return;
+
+no_lro:
+	if (s)
+		lro_flush_session(adap, qs, s, NULL);
+
+	rx_eth(adap, rq, skb, ethpad);
+}
+
 #define SKB_DATA_SIZE 128
 
 static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
@@ -1911,7 +2230,7 @@ static int process_responses(struct adap
 	q->next_holdoff = q->holdoff_tmr;
 
 	while (likely(budget_left && is_new_response(r, q))) {
-		int eth, ethpad = 2;
+		int eth, ethpad = 2, lro = qs->lro.enabled;
 		struct sk_buff *skb = NULL;
 		u32 len, flags = ntohl(r->flags);
 		u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
@@ -1961,6 +2280,13 @@ static int process_responses(struct adap
 					if (unlikely(fl->credits <
 						     SGE_RX_DROP_THRES))
 						goto eth_recycle;
+					
+					if (likely(lro &&
+						   !rx_eth_lro_page(adap, qs,
+								    p, rss_lo,
+								    rss_hi,
+								    &lro)))
+						goto eth_done;
 
 					skb = alloc_skb(SKB_DATA_SIZE,
 							GFP_ATOMIC);
@@ -2016,9 +2342,12 @@ eth_done:
 			skb->csum = rss_hi;
 			skb->priority = rss_lo;
 
-			if (eth)
-				rx_eth(adap, q, skb, ethpad);
-			else {
+			if (eth) {
+				if (likely(lro))
+					rx_eth_lro_skb(adap, q, skb, ethpad);
+				else
+					rx_eth(adap, q, skb, ethpad);
+			} else {
 				if (unlikely(r->rss_hdr.opcode ==
 					     CPL_TRACE_PKT))
 					__skb_pull(skb, ethpad);
@@ -2030,7 +2359,7 @@ eth_done:
 		}
 		--budget_left;
 	}
-
+	sge_lro_flush_all(adap, qs);
 	deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
 	if (sleeping)
 		check_ring_db(adap, qs, sleeping);
@@ -2698,6 +3027,7 @@ int t3_sge_alloc_qset(struct adapter *ad
 	spin_unlock(&adapter->sge.reg_lock);
 	q->netdev = netdev;
 	t3_update_qset_coalesce(q, p);
+	q->lro.enabled = p->lro;
 
 	/*
 	 * We use atalk_ptr as a backpointer to a qset.  In case a device is
@@ -2839,6 +3169,7 @@ void __devinit t3_sge_prep(struct adapte
 
 		q->polling = adap->params.rev > 0;
 		q->coalesce_usecs = 5;
+		q->lro = 1;
 		q->rspq_size = 1024;
 		q->fl_size = 1024;
 		q->jumbo_size = 512;
diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h
index b7a1a31..0f9f67d 100644
--- a/drivers/net/cxgb3/t3_cpl.h
+++ b/drivers/net/cxgb3/t3_cpl.h
@@ -174,6 +174,12 @@ enum {				/* TCP congestion control algo
 	CONG_ALG_HIGHSPEED
 };
 
+enum {				/* RSS hash type */
+	RSS_HASH_NONE = 0,
+	RSS_HASH_2_TUPLE = 1 << 0,
+	RSS_HASH_4_TUPLE = 1 << 1
+};
+
 union opcode_tid {
 	__be32 opcode_tid;
 	__u8 opcode;
@@ -184,6 +190,10 @@ union opcode_tid {
 #define G_OPCODE(x) (((x) >> S_OPCODE) & 0xFF)
 #define G_TID(x)    ((x) & 0xFFFFFF)
 
+#define S_HASHTYPE 22
+#define M_HASHTYPE 0x3
+#define G_HASHTYPE(x) (((x) >> S_HASHTYPE) & M_HASHTYPE)
+
 /* tid is assumed to be 24-bits */
 #define MK_OPCODE_TID(opcode, tid) (V_OPCODE(opcode) | (tid))
 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 7/7] cxgb3 - Add SW LRO support
  2007-02-27 16:29         ` Brice Goglin
@ 2007-06-01 20:37           ` Jeff Garzik
  0 siblings, 0 replies; 9+ messages in thread
From: Jeff Garzik @ 2007-06-01 20:37 UTC (permalink / raw)
  To: Brice Goglin; +Cc: Christoph Hellwig, Steve Wise, divy, netdev, linux-kernel

Brice Goglin wrote:
> Steve Wise wrote:
>> You're right.  But cxgb3 has it now in the driver (tested and working).
>> Shouldn't it be pulled in?  When the network stack design gets done
>> (which could take a few releases to finalize), all the drivers can be
>> updated to use it.  It doesn't seem reasonable to allow some drivers to
>> support LRO and others to not support it...
>>   
> 
> I have to agree with Steve here. We have been requesting the inclusion
> of myri10ge LRO for 5 months now (before 2.6.19). I could understand
> that duplicating LRO code between s2io and myri10ge was not a good idea
> at this point. But, I now see that Netxen got merged way later (in
> 2.6.20) but it got its LRO merged immediately. I guess the LRO is
> already duplicated between s2io and netxen then. It does not look fair
> to me at all.

It was an error on my part, that should not be compounded.  Three wrongs 
don't make a right, etc., etc.

The normal way this stuff works in Linux is that people work together to 
create common code that everybody uses.  I would gladly accept patches 
to rip out the code from NetXen.

	Jeff




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 7/7] cxgb3 - Add SW LRO support
  2007-02-27 14:57       ` Steve Wise
@ 2007-02-27 16:29         ` Brice Goglin
  2007-06-01 20:37           ` Jeff Garzik
  0 siblings, 1 reply; 9+ messages in thread
From: Brice Goglin @ 2007-02-27 16:29 UTC (permalink / raw)
  To: Jeff Garzik, Christoph Hellwig; +Cc: Steve Wise, divy, netdev, linux-kernel

Steve Wise wrote:
> You're right.  But cxgb3 has it now in the driver (tested and working).
> Shouldn't it be pulled in?  When the network stack design gets done
> (which could take a few releases to finalize), all the drivers can be
> updated to use it.  It doesn't seem reasonable to allow some drivers to
> support LRO and others to not support it...
>   

I have to agree with Steve here. We have been requesting the inclusion
of myri10ge LRO for 5 months now (before 2.6.19). I could understand
that duplicating LRO code between s2io and myri10ge was not a good idea
at this point. But, I now see that Netxen got merged way later (in
2.6.20) but it got its LRO merged immediately. I guess the LRO is
already duplicated between s2io and netxen then. It does not look fair
to me at all.

Thank you for understanding our concerns.
Brice


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 7/7] cxgb3 - Add SW LRO support
  2007-02-27 14:53     ` Jeff Garzik
@ 2007-02-27 14:57       ` Steve Wise
  2007-02-27 16:29         ` Brice Goglin
  0 siblings, 1 reply; 9+ messages in thread
From: Steve Wise @ 2007-02-27 14:57 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Christoph Hellwig, divy, netdev, linux-kernel

On Tue, 2007-02-27 at 09:53 -0500, Jeff Garzik wrote:
> Steve Wise wrote:
> > On Mon, 2007-02-26 at 05:13 +0000, Christoph Hellwig wrote:
> >> On Sat, Feb 24, 2007 at 04:44:23PM -0800, divy@chelsio.com wrote:
> >>> From: Divy Le Ray <divy@chelsio.com>
> >>>
> >>> Add all-in-sw lro support.
> >> Doing this in a LLDD doesn't sound like a good idea.  Have you
> >> tried doing this in the core networking code instead?
> > 
> > It looks like both Netxen and Neterion drivers already have this.  So I
> > think for now the LLDD implements LRO.  The stack supports it by virtue
> 
> That's argument to further avoiding compounding the problem, by 
> reinventing the wheel for each driver.

You're right.  But cxgb3 has it now in the driver (tested and working).
Shouldn't it be pulled in?  When the network stack design gets done
(which could take a few releases to finalize), all the drivers can be
updated to use it.  It doesn't seem reasonable to allow some drivers to
support LRO and others to not support it...


Steve.




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 7/7] cxgb3 - Add SW LRO support
  2007-02-27 14:17   ` Steve Wise
@ 2007-02-27 14:53     ` Jeff Garzik
  2007-02-27 14:57       ` Steve Wise
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Garzik @ 2007-02-27 14:53 UTC (permalink / raw)
  To: Steve Wise; +Cc: Christoph Hellwig, divy, netdev, linux-kernel

Steve Wise wrote:
> On Mon, 2007-02-26 at 05:13 +0000, Christoph Hellwig wrote:
>> On Sat, Feb 24, 2007 at 04:44:23PM -0800, divy@chelsio.com wrote:
>>> From: Divy Le Ray <divy@chelsio.com>
>>>
>>> Add all-in-sw lro support.
>> Doing this in a LLDD doesn't sound like a good idea.  Have you
>> tried doing this in the core networking code instead?
> 
> It looks like both Netxen and Neterion drivers already have this.  So I
> think for now the LLDD implements LRO.  The stack supports it by virtue

That's argument to further avoiding compounding the problem, by 
reinventing the wheel for each driver.

	Jeff




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 7/7] cxgb3 - Add SW LRO support
  2007-02-26  5:13 ` Christoph Hellwig
  2007-02-27  0:55   ` Divy Le Ray
@ 2007-02-27 14:17   ` Steve Wise
  2007-02-27 14:53     ` Jeff Garzik
  1 sibling, 1 reply; 9+ messages in thread
From: Steve Wise @ 2007-02-27 14:17 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: divy, jeff, netdev, linux-kernel

On Mon, 2007-02-26 at 05:13 +0000, Christoph Hellwig wrote:
> On Sat, Feb 24, 2007 at 04:44:23PM -0800, divy@chelsio.com wrote:
> > From: Divy Le Ray <divy@chelsio.com>
> > 
> > Add all-in-sw lro support.
> 
> Doing this in a LLDD doesn't sound like a good idea.  Have you
> tried doing this in the core networking code instead?

It looks like both Netxen and Neterion drivers already have this.  So I
think for now the LLDD implements LRO.  The stack supports it by virtue
of allowing fragmented skbs that are bigger than the MTU.

Other drivers implementing LRO:

drivers/net/s2io.c
drivers/net/netxen/

Steve.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 7/7] cxgb3 - Add SW LRO support
  2007-02-26  5:13 ` Christoph Hellwig
@ 2007-02-27  0:55   ` Divy Le Ray
  2007-02-27 14:17   ` Steve Wise
  1 sibling, 0 replies; 9+ messages in thread
From: Divy Le Ray @ 2007-02-27  0:55 UTC (permalink / raw)
  To: Christoph Hellwig, divy, jeff, netdev, linux-kernel, swise

Christoph Hellwig wrote:
> On Sat, Feb 24, 2007 at 04:44:23PM -0800, divy@chelsio.com wrote:
>   
>> From: Divy Le Ray <divy@chelsio.com>
>>
>> Add all-in-sw lro support.
>>     
>
> Doing this in a LLDD doesn't sound like a good idea.  Have you
> tried doing this in the core networking code instead?
>   

We have not tried to implement LRO in the core networking code.
Today, the available API for a driver to pass packets up is 
netif_receive_skb/netif_rx.
Our implementation coalesces pages, then gets a skb to point at the 
pages and passes it up to the stack.
LRO in the core networking code would require adding methods to pass up 
data segments in addition to skbs.
HW might also be able to assist LRO by identifying the session an 
incoming packet belongs to,
thus avoiding the session lookup. The API should also allow for the 
driver to indicate such info.

Cheers,
Divy

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 7/7] cxgb3 - Add SW LRO support
  2007-02-25  0:44 divy
@ 2007-02-26  5:13 ` Christoph Hellwig
  2007-02-27  0:55   ` Divy Le Ray
  2007-02-27 14:17   ` Steve Wise
  0 siblings, 2 replies; 9+ messages in thread
From: Christoph Hellwig @ 2007-02-26  5:13 UTC (permalink / raw)
  To: divy; +Cc: jeff, netdev, linux-kernel, swise

On Sat, Feb 24, 2007 at 04:44:23PM -0800, divy@chelsio.com wrote:
> From: Divy Le Ray <divy@chelsio.com>
> 
> Add all-in-sw lro support.

Doing this in a LLDD doesn't sound like a good idea.  Have you
tried doing this in the core networking code instead?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 7/7] cxgb3 - Add SW LRO support
@ 2007-02-25  0:44 divy
  2007-02-26  5:13 ` Christoph Hellwig
  0 siblings, 1 reply; 9+ messages in thread
From: divy @ 2007-02-25  0:44 UTC (permalink / raw)
  To: jeff; +Cc: netdev, linux-kernel, swise

From: Divy Le Ray <divy@chelsio.com>

Add all-in-sw lro support.

Signed-off-by: Divy Le Ray <divy@chelsio.com>
---

 drivers/net/cxgb3/adapter.h     |   21 ++
 drivers/net/cxgb3/common.h      |    1 
 drivers/net/cxgb3/cxgb3_ioctl.h |    1 
 drivers/net/cxgb3/cxgb3_main.c  |   16 ++
 drivers/net/cxgb3/sge.c         |  341 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/cxgb3/t3_cpl.h      |   10 +
 6 files changed, 384 insertions(+), 6 deletions(-)

diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 80c3d8f..576db4a 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -95,6 +95,23 @@ struct sge_fl {			/* SGE per free-buffer
 	unsigned long alloc_failed; /* # of times buffer allocation failed */
 };
 
+/* Max active LRO sessions per queue set */
+#define MAX_LRO_PER_QSET 8
+
+struct sge_lro_session {
+	struct sk_buff *skb;
+	struct sk_buff *skb_last_frag;
+	u32 seq;
+	u16 iplen;
+};
+
+struct sge_lro {
+	unsigned int enabled;
+	unsigned int num_active;
+	struct sge_lro_session *last_s;
+	struct sge_lro_session s[MAX_LRO_PER_QSET];
+};
+
 /*
  * Bundle size for grouping offload RX packets for delivery to the stack.
  * Don't make this too big as we do prefetch on each packet in a bundle.
@@ -164,6 +181,9 @@ enum {				/* per port SGE statistics */
 	SGE_PSTAT_TX_CSUM,	/* # of TX checksum offloads */
 	SGE_PSTAT_VLANEX,	/* # of VLAN tag extractions */
 	SGE_PSTAT_VLANINS,	/* # of VLAN tag insertions */
+	SGE_PSTATS_LRO_QUEUED,	/* # of LRO appended packets */
+	SGE_PSTATS_LRO_FLUSHED,	/* # of LRO flushed packets */
+	SGE_PSTATS_LRO_X_STREAMS,	/* # of exceeded LRO contexts */
 
 	SGE_PSTAT_MAX		/* must be last */
 };
@@ -171,6 +191,7 @@ enum {				/* per port SGE statistics */
 struct sge_qset {		/* an SGE queue set */
 	struct sge_rspq rspq;
 	struct sge_fl fl[SGE_RXQ_PER_SET];
+	struct sge_lro lro;
 	struct sge_txq txq[SGE_TXQ_PER_SET];
 	struct net_device *netdev;	/* associated net device */
 	unsigned long txq_stopped;	/* which Tx queues are stopped */
diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h
index e23deeb..1031ad0 100644
--- a/drivers/net/cxgb3/common.h
+++ b/drivers/net/cxgb3/common.h
@@ -322,6 +322,7 @@ struct tp_params {
 
 struct qset_params {		/* SGE queue set parameters */
 	unsigned int polling;	/* polling/interrupt service for rspq */
+	unsigned int lro;	/* large receive offload */
 	unsigned int coalesce_usecs;	/* irq coalescing timer */
 	unsigned int rspq_size;	/* # of entries in response queue */
 	unsigned int fl_size;	/* # of entries in regular free list */
diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h
index 0a82fcd..68200a1 100644
--- a/drivers/net/cxgb3/cxgb3_ioctl.h
+++ b/drivers/net/cxgb3/cxgb3_ioctl.h
@@ -90,6 +90,7 @@ struct ch_qset_params {
 	int32_t fl_size[2];
 	int32_t intr_lat;
 	int32_t polling;
+	int32_t lro;
 	int32_t cong_thres;
 };
 
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 7ff834e..b78eefb 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1031,7 +1031,11 @@ static char stats_strings[][ETH_GSTRING_
 	"VLANinsertions     ",
 	"TxCsumOffload      ",
 	"RxCsumGood         ",
-	"RxDrops            "
+	"RxDrops            ",
+
+	"LroQueued          ",
+	"LroFlushed         ",
+	"LroExceededSessions"
 };
 
 static int get_stats_count(struct net_device *dev)
@@ -1145,6 +1149,9 @@ static void get_stats(struct net_device
 	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM);
 	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD);
 	*data++ = s->rx_cong_drops;
+	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_QUEUED);
+	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_FLUSHED);
+	*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_X_STREAMS);
 }
 
 static inline void reg_block_dump(struct adapter *ap, void *buf,
@@ -1624,6 +1631,12 @@ static int cxgb_extension_ioctl(struct n
 				}
 			}
 		}
+		if (t.lro >= 0) {
+			struct sge_qset *qs = &adapter->sge.qs[t.qset_idx];
+			
+			q->lro = t.lro;
+			qs->lro.enabled = t.lro;
+		}
 		break;
 	}
 	case CHELSIO_GET_QSET_PARAMS:{
@@ -1643,6 +1656,7 @@ static int cxgb_extension_ioctl(struct n
 		t.fl_size[0] = q->fl_size;
 		t.fl_size[1] = q->jumbo_size;
 		t.polling = q->polling;
+		t.lro = q->lro;
 		t.intr_lat = q->coalesce_usecs;
 		t.cong_thres = q->cong_thres;
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index c237834..44c4220 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -35,6 +35,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <net/tcp.h>
 #include <linux/dma-mapping.h>
 #include "common.h"
 #include "regs.h"
@@ -1710,6 +1711,324 @@ static void rx_eth(struct adapter *adap,
 		netif_rx(skb);
 }
 
+#define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETH_HLEN)
+#define SKB_HASHVAL(skb) (skb->priority)
+#define LRO_SESSION_IDX_HINT(skb) (SKB_HASHVAL(skb) & (MAX_LRO_PER_QSET - 1))
+#define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_PER_QSET - 1))
+#define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_PER_QSET - 1)
+
+static inline struct sge_lro_session *lro_session(struct sge_lro *l, int idx)
+{
+	return l->s + idx;
+}
+
+static inline int lro_match_session(struct sge_lro_session *s,
+				    struct iphdr *iph, struct tcphdr *tcph)
+{
+	struct iphdr *s_iph = (struct iphdr *)(s->skb->data + IPH_OFFSET);
+	struct tcphdr *s_tcph = (struct tcphdr *)(s_iph + 1);
+
+	return *(u32 *) & tcph->source == *(u32 *) & s_tcph->source &&
+	    iph->saddr == s_iph->saddr && iph->daddr == s_iph->daddr;
+}
+
+static inline struct sge_lro_session *lro_find_session(struct sge_lro *l,
+						       int idx,
+						       struct iphdr *iph,
+						       struct tcphdr *tcph)
+{
+	struct sge_lro_session *s;
+	int active = 0;
+
+	while (active < l->num_active) {
+		s = lro_session(l, idx);
+		if (s->skb) {
+			if (lro_match_session(s, iph, tcph)) {
+				l->last_s = s;
+				return s;
+			}
+			active++;
+		}
+		LRO_IDX_INC(idx);
+	}
+
+	return NULL;
+}
+
+static inline void lro_new_session_init(struct sge_lro_session *s,
+					struct sk_buff *skb)
+{
+	struct iphdr *ih = (struct iphdr *)(skb->data + IPH_OFFSET);
+	struct tcphdr *th = (struct tcphdr *)(ih + 1);
+	int iplen = ntohs(ih->tot_len);
+
+	s->skb = skb;
+	s->iplen = iplen;
+	s->seq = ntohl(th->seq) + iplen - sizeof(*ih) - (th->doff << 2);
+}
+
+static void lro_flush_session(struct adapter *adap, struct sge_qset *qs,
+			      struct sge_lro_session *s, struct sk_buff *skb)
+{
+	struct sge_lro *l = &qs->lro;
+	struct iphdr *ih = (struct iphdr *)(s->skb->data + IPH_OFFSET);
+
+	ih->tot_len = htons(s->iplen);
+	ih->check = 0;
+	ih->check = ip_fast_csum((unsigned char *)ih, ih->ihl);
+
+	rx_eth(adap, &qs->rspq, s->skb, 2);
+
+	s->skb = skb;
+	if (skb)
+		lro_new_session_init(s, skb);
+	else
+		l->num_active--;
+
+	qs->port_stats[SGE_PSTATS_LRO_FLUSHED]++;
+}
+
+static inline struct sge_lro_session *lro_new_session(struct adapter *adap,
+						      struct sge_qset *qs,
+						      struct sk_buff *skb)
+{
+	struct sge_lro *l = &qs->lro;
+	int idx = LRO_SESSION_IDX_HINT(skb);
+	struct sge_lro_session *s = lro_session(l, idx);
+
+	if (likely(!s->skb))
+		goto done;
+
+	BUG_ON(l->num_active > MAX_LRO_PER_QSET);
+	if (l->num_active == MAX_LRO_PER_QSET) {
+		lro_flush_session(adap, qs, s, skb);
+		qs->port_stats[SGE_PSTATS_LRO_X_STREAMS]++;
+		return s;
+	}
+
+	while (1) {
+		LRO_IDX_INC(idx);
+		s = lro_session(l, idx);
+		if (!s->skb)
+			break;
+	}
+
+done:
+	lro_new_session_init(s, skb);
+
+	l->num_active++;
+	return s;
+}
+
+static inline void sge_lro_flush_all(struct adapter *adap, struct sge_qset *qs)
+{
+	struct sge_lro *l = &qs->lro;
+	struct sge_lro_session *s = l->last_s;
+	int active = 0, idx = 0, num_active = l->num_active;
+
+	if (unlikely(!s))
+		s = lro_session(l, idx);
+
+	while (active < num_active) {
+		if (s->skb) {
+			lro_flush_session(adap, qs, s, NULL);
+			active++;
+		}
+		LRO_IDX_INC(idx);
+		s = lro_session(l, idx);
+	}
+}
+
+static inline int can_lro_packet(struct cpl_rx_pkt *cpl, unsigned int rss_hi)
+{
+	struct ethhdr *eh = (struct ethhdr *)(cpl + 1);
+	struct iphdr *ih = (struct iphdr *)(eh + 1);
+
+	if (unlikely(G_HASHTYPE(ntohl(rss_hi)) != RSS_HASH_4_TUPLE ||
+		     (*((u8 *) cpl + 1) & 0x90) != 0x10 ||
+		     cpl->csum != 0xffff || eh->h_proto != ntohs(ETH_P_IP) ||
+		     ih->ihl != (sizeof(*ih) >> 2))) {
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline int can_lro_tcpsegment(struct tcphdr *th)
+{
+	int olen = (th->doff << 2) - sizeof(*th);
+	u8 control_bits = *((u8 *) th + 13);
+
+	if (unlikely((control_bits & 0xB7) != 0x10))
+		goto no_lro;
+
+	if (olen) {
+		u32 *ptr = (u32 *) (th + 1);
+		if (unlikely(olen != TCPOLEN_TSTAMP_ALIGNED ||
+			     *ptr != ntohl((TCPOPT_NOP << 24) |
+					   (TCPOPT_NOP << 16) |
+					   (TCPOPT_TIMESTAMP << 8) |
+					   TCPOLEN_TIMESTAMP)))
+			goto no_lro;
+	}
+
+	return 1;
+
+no_lro:
+	return 0;
+}
+
+static inline int lro_update_session(struct sge_lro_session *s,
+				     unsigned char *va,
+				     struct skb_frag_struct *frag,
+				     struct sk_buff *skb)
+{
+	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(s->skb->data + 2);
+	struct cpl_rx_pkt *ncpl = (struct cpl_rx_pkt *)(va + 2);
+	struct iphdr *nih = (struct iphdr *)(va + IPH_OFFSET);
+	struct tcphdr *th, *nth = (struct tcphdr *)(nih + 1);
+	u32 seq = ntohl(nth->seq);
+	int plen, tcpiphlen, olen = (nth->doff << 2) - sizeof(*nth);
+
+	if (cpl->vlan_valid && cpl->vlan != ncpl->vlan)
+		return -1;
+
+	if (unlikely(seq != s->seq))
+		return -1;
+
+	th = (struct tcphdr *)(s->skb->data + IPH_OFFSET +
+			       sizeof(struct iphdr));
+
+	if (olen) {
+		u32 *ptr = (u32 *) (th + 1), *nptr = (u32 *) (nth + 1);
+
+		if (unlikely(ntohl(*(ptr + 1)) > ntohl(*(nptr + 1)) ||
+			     !*(nptr + 2)))
+			return -1;
+
+		*(ptr + 1) = *(nptr + 1);
+		*(ptr + 2) = *(nptr + 2);
+	}
+	th->ack_seq = nth->ack_seq;
+	th->window = nth->window;
+
+	tcpiphlen = (nth->doff << 2) + sizeof(*nih);
+	plen = ntohs(nih->tot_len) - tcpiphlen;
+	s->seq += plen;
+	s->iplen += plen;
+	s->skb->data_len += plen;
+	s->skb->len += plen;
+	s->skb->truesize += plen;
+
+	if (plen > skb_shinfo(s->skb)->gso_size)
+		skb_shinfo(s->skb)->gso_size = plen;
+
+	if (unlikely(skb)) {
+		skb_pull(skb, skb->len - plen);
+		if (unlikely(!skb_shinfo(s->skb)->frag_list))
+			skb_shinfo(s->skb)->frag_list = skb;
+		else
+			s->skb_last_frag->next = skb;
+		s->skb_last_frag = skb;
+	} else {
+		int nr = skb_shinfo(s->skb)->nr_frags;
+		skb_shinfo(s->skb)->frags[nr].page = frag->page;
+		skb_shinfo(s->skb)->frags[nr].page_offset =
+		    frag->page_offset + IPH_OFFSET + tcpiphlen;
+		skb_shinfo(s->skb)->frags[nr].size = plen;
+		skb_shinfo(s->skb)->nr_frags = ++nr;
+	}
+
+	return 0;
+}
+
+static inline int rx_eth_lro_page(struct adapter *adap, struct sge_qset *qs,
+				  struct sge_fl_page *p, u32 hash, u32 csum,
+				  int *lro)
+{
+	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(p->va + 2);
+	struct iphdr *ih;
+	struct tcphdr *th;
+	struct sge_lro_session *s = NULL;
+
+	if (!can_lro_packet(cpl, csum)) {
+		*lro = 0;
+		goto no_lro;
+	}
+
+	ih = (struct iphdr *)(p->va + IPH_OFFSET);
+	th = (struct tcphdr *)(ih + 1);
+	s = lro_find_session(&qs->lro, LRO_SESSION_IDX_HINT_HASH(hash), ih, th);
+	if (unlikely(!s))
+		goto no_lro;
+
+	/* If we already started LRO via chaining skbs, keep doing it that way.
+	 */
+	if (unlikely(skb_shinfo(s->skb)->frag_list))
+		return -1;
+
+	if (unlikely(!can_lro_tcpsegment(th)))
+		goto no_lro;
+
+	if (lro_update_session(s, p->va, &p->frag, NULL))
+		goto no_lro;
+
+	if (unlikely(skb_shinfo(s->skb)->nr_frags == MAX_SKB_FRAGS ||
+		     s->skb->len + qs->netdev->mtu > 65535))
+		lro_flush_session(adap, qs, s, NULL);
+
+	qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
+
+	return 0;
+
+no_lro:
+	if (s)
+		lro_flush_session(adap, qs, s, NULL);
+
+	return -1;
+}
+
+static void rx_eth_lro_skb(struct adapter *adap, struct sge_rspq *rq,
+			   struct sk_buff *skb, int ethpad)
+{
+	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(skb->data + ethpad);
+	struct sge_qset *qs = rspq_to_qset(rq);
+	struct iphdr *ih;
+	struct tcphdr *th;
+	struct sge_lro_session *s = NULL;
+
+	if (!can_lro_packet(cpl, skb->csum))
+		goto no_lro;
+
+	ih = (struct iphdr *)(skb->data + IPH_OFFSET);
+	th = (struct tcphdr *)(ih + 1);
+	s = lro_find_session(&qs->lro,
+			     LRO_SESSION_IDX_HINT_HASH(skb->priority), ih, th);
+
+	if (unlikely(!can_lro_tcpsegment(th)))
+		goto no_lro;
+	else if (unlikely(!s))
+		s = lro_new_session(adap, qs, skb);
+	else {
+		if (lro_update_session(s, skb->data, NULL, skb)) {
+			lro_flush_session(adap, qs, s, skb);
+			return;
+		}
+
+		if (unlikely(s->skb->len + qs->netdev->mtu > 65535))
+			lro_flush_session(adap, qs, s, NULL);
+	}
+
+	qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
+	return;
+
+no_lro:
+	if (s)
+		lro_flush_session(adap, qs, s, NULL);
+
+	rx_eth(adap, rq, skb, ethpad);
+}
+
 #define SKB_DATA_SIZE 128
 
 static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
@@ -1911,7 +2230,7 @@ static int process_responses(struct adap
 	q->next_holdoff = q->holdoff_tmr;
 
 	while (likely(budget_left && is_new_response(r, q))) {
-		int eth, ethpad = 2;
+		int eth, ethpad = 2, lro = qs->lro.enabled;
 		struct sk_buff *skb = NULL;
 		u32 len, flags = ntohl(r->flags);
 		u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
@@ -1961,6 +2280,13 @@ static int process_responses(struct adap
 					if (unlikely(fl->credits <
 						     SGE_RX_DROP_THRES))
 						goto eth_recycle;
+					
+					if (likely(lro &&
+						   !rx_eth_lro_page(adap, qs,
+								    p, rss_lo,
+								    rss_hi,
+								    &lro)))
+						goto eth_done;
 
 					skb = alloc_skb(SKB_DATA_SIZE,
 							GFP_ATOMIC);
@@ -2016,9 +2342,12 @@ eth_done:
 			skb->csum = rss_hi;
 			skb->priority = rss_lo;
 
-			if (eth)
-				rx_eth(adap, q, skb, ethpad);
-			else {
+			if (eth) {
+				if (likely(lro))
+					rx_eth_lro_skb(adap, q, skb, ethpad);
+				else
+					rx_eth(adap, q, skb, ethpad);
+			} else {
 				if (unlikely(r->rss_hdr.opcode ==
 					     CPL_TRACE_PKT))
 					__skb_pull(skb, ethpad);
@@ -2030,7 +2359,7 @@ eth_done:
 		}
 		--budget_left;
 	}
-
+	sge_lro_flush_all(adap, qs);
 	deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
 	if (sleeping)
 		check_ring_db(adap, qs, sleeping);
@@ -2698,6 +3027,7 @@ int t3_sge_alloc_qset(struct adapter *ad
 	spin_unlock(&adapter->sge.reg_lock);
 	q->netdev = netdev;
 	t3_update_qset_coalesce(q, p);
+	q->lro.enabled = p->lro;
 
 	/*
 	 * We use atalk_ptr as a backpointer to a qset.  In case a device is
@@ -2839,6 +3169,7 @@ void __devinit t3_sge_prep(struct adapte
 
 		q->polling = adap->params.rev > 0;
 		q->coalesce_usecs = 5;
+		q->lro = 1;
 		q->rspq_size = 1024;
 		q->fl_size = 1024;
 		q->jumbo_size = 512;
diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h
index b7a1a31..0f9f67d 100644
--- a/drivers/net/cxgb3/t3_cpl.h
+++ b/drivers/net/cxgb3/t3_cpl.h
@@ -174,6 +174,12 @@ enum {				/* TCP congestion control algo
 	CONG_ALG_HIGHSPEED
 };
 
+enum {				/* RSS hash type */
+	RSS_HASH_NONE = 0,
+	RSS_HASH_2_TUPLE = 1 << 0,
+	RSS_HASH_4_TUPLE = 1 << 1
+};
+
 union opcode_tid {
 	__be32 opcode_tid;
 	__u8 opcode;
@@ -184,6 +190,10 @@ union opcode_tid {
 #define G_OPCODE(x) (((x) >> S_OPCODE) & 0xFF)
 #define G_TID(x)    ((x) & 0xFFFFFF)
 
+#define S_HASHTYPE 22
+#define M_HASHTYPE 0x3
+#define G_HASHTYPE(x) (((x) >> S_HASHTYPE) & M_HASHTYPE)
+
 /* tid is assumed to be 24-bits */
 #define MK_OPCODE_TID(opcode, tid) (V_OPCODE(opcode) | (tid))
 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2007-06-01 20:38 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-22 11:59 [PATCH 7/7] cxgb3 - Add SW LRO support divy
2007-02-25  0:44 divy
2007-02-26  5:13 ` Christoph Hellwig
2007-02-27  0:55   ` Divy Le Ray
2007-02-27 14:17   ` Steve Wise
2007-02-27 14:53     ` Jeff Garzik
2007-02-27 14:57       ` Steve Wise
2007-02-27 16:29         ` Brice Goglin
2007-06-01 20:37           ` Jeff Garzik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.