All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fragment: add fast path
@ 2010-06-13 23:16 Changli Gao
  2010-06-14  1:18 ` David Miller
  2010-06-14  5:35 ` Eric Dumazet
  0 siblings, 2 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-13 23:16 UTC (permalink / raw)
  To: David S. Miller
  Cc: Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev,
	Changli Gao

add fast path

As the fragments are usually in order, it is likely the new fragments are
at the end of the inet_frag_queue. In the fast path, we check if the skb at the
end of the inet_frag_queue is the prev we expect.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_frag.h |    1 +
 net/ipv4/ip_fragment.c  |   17 +++++++++++++++++
 net/ipv6/reassembly.c   |   16 ++++++++++++++++
 3 files changed, 34 insertions(+)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 39f2dc9..16ff29a 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -20,6 +20,7 @@ struct inet_frag_queue {
 	atomic_t		refcnt;
 	struct timer_list	timer;      /* when will this queue expire? */
 	struct sk_buff		*fragments; /* list of received fragments */
+	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;        /* total length of orig datagram */
 	int			meat;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea..d8c36d4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -317,6 +317,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.len = 0;
 	qp->q.meat = 0;
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
 
 	return 0;
@@ -389,6 +390,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = qp->q.fragments_tail;
+	if (prev) {
+		if (FRAG_CB(prev)->offset < offset) {
+			next = NULL;
+			goto found;
+		}
+	} else {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for (next = qp->q.fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
@@ -396,6 +407,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -454,6 +466,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		qp->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -507,6 +521,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			goto out_nomem;
 
 		fp->next = head->next;
+		if (!fp->next)
+			qp->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, qp->q.fragments);
@@ -578,6 +594,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	return 0;
 
 out_nomem:
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292f..dc15624 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -336,6 +336,16 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = fq->q.fragments_tail;
+	if (prev) {
+		if (FRAG6_CB(prev)->offset < offset) {
+			next = NULL;
+			goto found;
+		}
+	} else {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
@@ -343,6 +353,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -400,6 +411,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -466,6 +479,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			goto out_oom;
 
 		fp->next = head->next;
+		if (!fp->next)
+			fq->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, fq->q.fragments);
@@ -553,6 +568,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
 	return 1;
 
 out_oversize:

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-13 23:16 [PATCH] fragment: add fast path Changli Gao
@ 2010-06-14  1:18 ` David Miller
  2010-06-14  2:03   ` Changli Gao
  2010-06-14  5:15   ` YOSHIFUJI Hideaki
  2010-06-14  5:35 ` Eric Dumazet
  1 sibling, 2 replies; 13+ messages in thread
From: David Miller @ 2010-06-14  1:18 UTC (permalink / raw)
  To: xiaosuo; +Cc: kuznet, pekkas, jmorris, yoshfuji, kaber, netdev

From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 14 Jun 2010 07:16:35 +0800

> As the fragments are usually in order,

In what universe does this happen "usually"?

Linux has been outputting fragments in reverse order for more than 10
years.

I'm not applying this patch.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  1:18 ` David Miller
@ 2010-06-14  2:03   ` Changli Gao
  2010-06-14  2:24     ` YOSHIFUJI Hideaki
  2010-06-14  2:55     ` Changli Gao
  2010-06-14  5:15   ` YOSHIFUJI Hideaki
  1 sibling, 2 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14  2:03 UTC (permalink / raw)
  To: David Miller; +Cc: kuznet, pekkas, jmorris, yoshfuji, kaber, netdev

On Mon, Jun 14, 2010 at 9:18 AM, David Miller <davem@davemloft.net> wrote:
> From: Changli Gao <xiaosuo@gmail.com>
> Date: Mon, 14 Jun 2010 07:16:35 +0800
>
>> As the fragments are usually in order,
>
> In what universe does this happen "usually"?
>
> Linux has been outputting fragments in reverse order for more than 10
> years.
>

I have tested next-next-2.6 and darwin, and found they are both send
fragments in order:

Darwin:

Darwin localhost 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15
16:55:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_I386 i386 i386

09:53:26.891820 IP (tos 0x0, ttl 64, id 19628, offset 0, flags [+],
proto UDP (17), length 1500) 10.13.3.10.52189 > 10.13.150.1.8888: UDP,
length 8192
09:53:26.892048 IP (tos 0x0, ttl 64, id 19628, offset 1480, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892229 IP (tos 0x0, ttl 64, id 19628, offset 2960, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892397 IP (tos 0x0, ttl 64, id 19628, offset 4440, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892529 IP (tos 0x0, ttl 64, id 19628, offset 5920, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892670 IP (tos 0x0, ttl 64, id 19628, offset 7400, flags
[none], proto UDP (17), length 820) 10.13.3.10 > 10.13.150.1: udp

Linux:

Linux localhost 2.6.35-rc1 #88 SMP Sun Jun 13 14:25:07 CST 2010 x86_64
Intel(R) Core(TM)2 Duo CPU E6550 @ 2.33GHz GenuineIntel GNU/Linux

08:01:53.730902 IP (tos 0x0, ttl 64, id 1263, offset 0, flags [+],
proto UDP (17), length 1500) 10.13.150.50.45295 > 10.13.150.1.8888:
UDP, length 8192
08:01:53.730955 IP (tos 0x0, ttl 64, id 1263, offset 1480, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731113 IP (tos 0x0, ttl 64, id 1263, offset 2960, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731139 IP (tos 0x0, ttl 64, id 1263, offset 4440, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731280 IP (tos 0x0, ttl 64, id 1263, offset 5920, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731306 IP (tos 0x0, ttl 64, id 1263, offset 7400, flags
[none], proto UDP (17), length 820) 10.13.150.50 > 10.13.150.1: udp

Later I'll test Windows.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  2:03   ` Changli Gao
@ 2010-06-14  2:24     ` YOSHIFUJI Hideaki
  2010-06-14  2:52       ` Changli Gao
  2010-06-14  2:55     ` Changli Gao
  1 sibling, 1 reply; 13+ messages in thread
From: YOSHIFUJI Hideaki @ 2010-06-14  2:24 UTC (permalink / raw)
  To: Changli Gao
  Cc: David Miller, kuznet, pekkas, jmorris, kaber, netdev, YOSHIFUJI Hideaki

NIC?

--yoshfuji

(2010/06/14 11:03), Changli Gao wrote:
> On Mon, Jun 14, 2010 at 9:18 AM, David Miller<davem@davemloft.net>  wrote:
>> From: Changli Gao<xiaosuo@gmail.com>
>> Date: Mon, 14 Jun 2010 07:16:35 +0800
>>
>>> As the fragments are usually in order,
>>
>> In what universe does this happen "usually"?
>>
>> Linux has been outputting fragments in reverse order for more than 10
>> years.
>>
>
> I have tested next-next-2.6 and darwin, and found they are both send
> fragments in order:
>
> Darwin:
>
> Darwin localhost 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15
> 16:55:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_I386 i386 i386
>
> 09:53:26.891820 IP (tos 0x0, ttl 64, id 19628, offset 0, flags [+],
> proto UDP (17), length 1500) 10.13.3.10.52189>  10.13.150.1.8888: UDP,
> length 8192
> 09:53:26.892048 IP (tos 0x0, ttl 64, id 19628, offset 1480, flags [+],
> proto UDP (17), length 1500) 10.13.3.10>  10.13.150.1: udp
> 09:53:26.892229 IP (tos 0x0, ttl 64, id 19628, offset 2960, flags [+],
> proto UDP (17), length 1500) 10.13.3.10>  10.13.150.1: udp
> 09:53:26.892397 IP (tos 0x0, ttl 64, id 19628, offset 4440, flags [+],
> proto UDP (17), length 1500) 10.13.3.10>  10.13.150.1: udp
> 09:53:26.892529 IP (tos 0x0, ttl 64, id 19628, offset 5920, flags [+],
> proto UDP (17), length 1500) 10.13.3.10>  10.13.150.1: udp
> 09:53:26.892670 IP (tos 0x0, ttl 64, id 19628, offset 7400, flags
> [none], proto UDP (17), length 820) 10.13.3.10>  10.13.150.1: udp
>
> Linux:
>
> Linux localhost 2.6.35-rc1 #88 SMP Sun Jun 13 14:25:07 CST 2010 x86_64
> Intel(R) Core(TM)2 Duo CPU E6550 @ 2.33GHz GenuineIntel GNU/Linux
>
> 08:01:53.730902 IP (tos 0x0, ttl 64, id 1263, offset 0, flags [+],
> proto UDP (17), length 1500) 10.13.150.50.45295>  10.13.150.1.8888:
> UDP, length 8192
> 08:01:53.730955 IP (tos 0x0, ttl 64, id 1263, offset 1480, flags [+],
> proto UDP (17), length 1500) 10.13.150.50>  10.13.150.1: udp
> 08:01:53.731113 IP (tos 0x0, ttl 64, id 1263, offset 2960, flags [+],
> proto UDP (17), length 1500) 10.13.150.50>  10.13.150.1: udp
> 08:01:53.731139 IP (tos 0x0, ttl 64, id 1263, offset 4440, flags [+],
> proto UDP (17), length 1500) 10.13.150.50>  10.13.150.1: udp
> 08:01:53.731280 IP (tos 0x0, ttl 64, id 1263, offset 5920, flags [+],
> proto UDP (17), length 1500) 10.13.150.50>  10.13.150.1: udp
> 08:01:53.731306 IP (tos 0x0, ttl 64, id 1263, offset 7400, flags
> [none], proto UDP (17), length 820) 10.13.150.50>  10.13.150.1: udp
>
> Later I'll test Windows.
>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  2:24     ` YOSHIFUJI Hideaki
@ 2010-06-14  2:52       ` Changli Gao
  0 siblings, 0 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14  2:52 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki; +Cc: David Miller, kuznet, pekkas, jmorris, kaber, netdev

On Mon, Jun 14, 2010 at 10:24 AM, YOSHIFUJI Hideaki
<yoshfuji@linux-ipv6.org> wrote:
> NIC?
>

Linux: e1000
Darwin: tun

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  2:03   ` Changli Gao
  2010-06-14  2:24     ` YOSHIFUJI Hideaki
@ 2010-06-14  2:55     ` Changli Gao
  1 sibling, 0 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14  2:55 UTC (permalink / raw)
  To: David Miller; +Cc: kuznet, pekkas, jmorris, yoshfuji, kaber, netdev

On Mon, Jun 14, 2010 at 10:03 AM, Changli Gao <xiaosuo@gmail.com> wrote:
>
> Later I'll test Windows.
>

Windows also sends fragments in order.

10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 0, flags [+],
proto UDP (17), length 1452) 221.238.33.71.4194 > 202.113.29.4.8888:
UDP, length 32768
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 1432, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 2864, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 4296, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 5728, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 7160, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 8592, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 10024, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 11456, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 12888, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 14320, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 15752, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 17184, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 18616, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 20048, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 21480, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 22912, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 24344, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 25776, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 27208, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 28640, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 30072, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 31504, flags
[none], proto UDP (17), length 1292) 221.238.33.71 > 202.113.29.4: udp

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  1:18 ` David Miller
  2010-06-14  2:03   ` Changli Gao
@ 2010-06-14  5:15   ` YOSHIFUJI Hideaki
  2010-06-14  6:19     ` Mitchell Erblich
  1 sibling, 1 reply; 13+ messages in thread
From: YOSHIFUJI Hideaki @ 2010-06-14  5:15 UTC (permalink / raw)
  To: David Miller
  Cc: xiaosuo, kuznet, pekkas, jmorris, kaber, netdev, YOSHIFUJI Hideaki

David Miller wrote:
>> As the fragments are usually in order,
> 
> In what universe does this happen "usually"?
> 
> Linux has been outputting fragments in reverse order for more than 10
> years.
> 
> I'm not applying this patch.

Dave,  I know we've been sending in reverse order, of course.
And, as far as I know, it seems Linux is the only implementation
which sends fragments in reverse order.

This is receiving side.  I think we should accept the fact
that Linux is not the only implementation, no?

--yoshfuji

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-13 23:16 [PATCH] fragment: add fast path Changli Gao
  2010-06-14  1:18 ` David Miller
@ 2010-06-14  5:35 ` Eric Dumazet
  2010-06-14  5:59   ` Changli Gao
  2010-06-14  6:40   ` Eric Dumazet
  1 sibling, 2 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-06-14  5:35 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

Le lundi 14 juin 2010 à 07:16 +0800, Changli Gao a écrit :
> add fast path
> 
> As the fragments are usually in order, it is likely the new fragments are
> at the end of the inet_frag_queue. In the fast path, we check if the skb at the
> end of the inet_frag_queue is the prev we expect.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
>  include/net/inet_frag.h |    1 +
>  net/ipv4/ip_fragment.c  |   17 +++++++++++++++++
>  net/ipv6/reassembly.c   |   16 ++++++++++++++++
>  3 files changed, 34 insertions(+)
> diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
> index 39f2dc9..16ff29a 100644
> --- a/include/net/inet_frag.h
> +++ b/include/net/inet_frag.h
> @@ -20,6 +20,7 @@ struct inet_frag_queue {
>  	atomic_t		refcnt;
>  	struct timer_list	timer;      /* when will this queue expire? */
>  	struct sk_buff		*fragments; /* list of received fragments */
> +	struct sk_buff		*fragments_tail;
>  	ktime_t			stamp;
>  	int			len;        /* total length of orig datagram */
>  	int			meat;
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index 75347ea..d8c36d4 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -317,6 +317,7 @@ static int ip_frag_reinit(struct ipq *qp)
>  	qp->q.len = 0;
>  	qp->q.meat = 0;
>  	qp->q.fragments = NULL;
> +	qp->q.fragments_tail = NULL;
>  	qp->iif = 0;
>  
>  	return 0;
> @@ -389,6 +390,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
>  	 * in the chain of fragments so far.  We must know where to put
>  	 * this fragment, right?
>  	 */
> +	prev = qp->q.fragments_tail;
> +	if (prev) {
> +		if (FRAG_CB(prev)->offset < offset) {
> +			next = NULL;
> +			goto found;
> +		}
> +	} else {
> +		next = NULL;

How can this chunk be a win ? queue is empty anyway.
You add tests and slow the 'other path'

> +		goto found;
> +	}

Quite frankly, one easy way to speedup things would be to move 'offset'
from  ipfrag_skb_cb close to skb->next field so that only one cache line
per frag is used during lookup.

I am not sure why we need "struct inet_skb_parm h;" field in struct
ipfrag_skb_cb... 

I probably need to wakeup this monday morning ?

Untested patch follows, only compiled.

[PATCH] frags: Remove unecessary bits

While trying to move 'offset' to the beginning of frag CB, I found
inet_skb_parm field was unused.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea..0f51ae0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -55,7 +55,6 @@ static int sysctl_ipfrag_max_dist __read_mostly = 64;
 
 struct ipfrag_skb_cb
 {
-	struct inet_skb_parm	h;
 	int			offset;
 };
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292f..122e0be 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -57,7 +57,6 @@
 
 struct ip6frag_skb_cb
 {
-	struct inet6_skb_parm	h;
 	int			offset;
 };
 



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  5:35 ` Eric Dumazet
@ 2010-06-14  5:59   ` Changli Gao
  2010-06-14  7:04     ` Eric Dumazet
  2010-06-14  6:40   ` Eric Dumazet
  1 sibling, 1 reply; 13+ messages in thread
From: Changli Gao @ 2010-06-14  5:59 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

On Mon, Jun 14, 2010 at 1:35 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le lundi 14 juin 2010 à 07:16 +0800, Changli Gao a écrit :
>> add fast path
>>
>> As the fragments are usually in order, it is likely the new fragments are
>> at the end of the inet_frag_queue. In the fast path, we check if the skb at the
>> end of the inet_frag_queue is the prev we expect.
>>
>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>> ----
>>  include/net/inet_frag.h |    1 +
>>  net/ipv4/ip_fragment.c  |   17 +++++++++++++++++
>>  net/ipv6/reassembly.c   |   16 ++++++++++++++++
>>  3 files changed, 34 insertions(+)
>> diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
>> index 39f2dc9..16ff29a 100644
>> --- a/include/net/inet_frag.h
>> +++ b/include/net/inet_frag.h
>> @@ -20,6 +20,7 @@ struct inet_frag_queue {
>>       atomic_t                refcnt;
>>       struct timer_list       timer;      /* when will this queue expire? */
>>       struct sk_buff          *fragments; /* list of received fragments */
>> +     struct sk_buff          *fragments_tail;
>>       ktime_t                 stamp;
>>       int                     len;        /* total length of orig datagram */
>>       int                     meat;
>> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
>> index 75347ea..d8c36d4 100644
>> --- a/net/ipv4/ip_fragment.c
>> +++ b/net/ipv4/ip_fragment.c
>> @@ -317,6 +317,7 @@ static int ip_frag_reinit(struct ipq *qp)
>>       qp->q.len = 0;
>>       qp->q.meat = 0;
>>       qp->q.fragments = NULL;
>> +     qp->q.fragments_tail = NULL;
>>       qp->iif = 0;
>>
>>       return 0;
>> @@ -389,6 +390,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
>>        * in the chain of fragments so far.  We must know where to put
>>        * this fragment, right?
>>        */
>> +     prev = qp->q.fragments_tail;
>> +     if (prev) {
>> +             if (FRAG_CB(prev)->offset < offset) {
>> +                     next = NULL;
>> +                     goto found;
>> +             }
>> +     } else {
>> +             next = NULL;
>
> How can this chunk be a win ? queue is empty anyway.
> You add tests and slow the 'other path'
>
>> +             goto found;
>> +     }

Without this branch. prev needs to be initialized to zero again(of
course, we can avoid this by moving prev = NULL in the previous
branch). next needs an assignment, and a duplicate check if the the
queue is empty, which is already known in the above branch. Sorry, but
I can't see which path I slow.

        prev = NULL;
        for (next = qp->q.fragments; next != NULL; next = next->next) {
                if (FRAG_CB(next)->offset >= offset)
                        break;  /* bingo! */
                prev = next;
        }

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  5:15   ` YOSHIFUJI Hideaki
@ 2010-06-14  6:19     ` Mitchell Erblich
  0 siblings, 0 replies; 13+ messages in thread
From: Mitchell Erblich @ 2010-06-14  6:19 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki
  Cc: David Miller, xiaosuo, kuznet, pekkas, jmorris, kaber, netdev

On Jun 13, 2010, at 10:15 PM, YOSHIFUJI Hideaki wrote:

> David Miller wrote:
>>> As the fragments are usually in order,
>> In what universe does this happen "usually"?
>> Linux has been outputting fragments in reverse order for more than 10
>> years.
>> I'm not applying this patch.
> 
> Dave,  I know we've been sending in reverse order, of course.
> And, as far as I know, it seems Linux is the only implementation
> which sends fragments in reverse order.
> 
> This is receiving side.  I think we should accept the fact
> that Linux is not the only implementation, no?
> 
> --yoshfuji


Group,

With respect to IPv4 and PATH MTU, aren't we
unlikely to generate packet frags?

With respect to IPv6, aren't  frags are less likely 
(intermediate nodes are not allowed to frag 
(rfc 2460, sect 4.5 Frag header)) as I would
expect the source to use the PATH MTU?

Thus, a faster path is to support the largest Jumbo
MTUs (MTU >=9000: 16,110 : 14,336 : 10,240)
where possible for bulk data transfers with
proper page orders, IMO.

Mitchell Erblich


> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  5:35 ` Eric Dumazet
  2010-06-14  5:59   ` Changli Gao
@ 2010-06-14  6:40   ` Eric Dumazet
  1 sibling, 0 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-06-14  6:40 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev


> I am not sure why we need "struct inet_skb_parm h;" field in struct
> ipfrag_skb_cb... 
> 
> I probably need to wakeup this monday morning ?
> 
> Untested patch follows, only compiled.
> 

Oh well, I see light now I woke up ;)

> [PATCH] frags: Remove unecessary bits
> 
> While trying to move 'offset' to the beginning of frag CB, I found
> inet_skb_parm field was unused.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---

Wrong patch of course.

We need some comment or document better why its there..




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  5:59   ` Changli Gao
@ 2010-06-14  7:04     ` Eric Dumazet
  2010-06-14 10:15       ` Changli Gao
  0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2010-06-14  7:04 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev


> 
> Without this branch. prev needs to be initialized to zero again(of
> course, we can avoid this by moving prev = NULL in the previous
> branch). next needs an assignment, and a duplicate check if the the
> queue is empty, which is already known in the above branch. Sorry, but
> I can't see which path I slow.
> 
>         prev = NULL;
>         for (next = qp->q.fragments; next != NULL; next = next->next) {
>                 if (FRAG_CB(next)->offset >= offset)
>                         break;  /* bingo! */
>                 prev = next;
>         }
> 

Concept of 'fast path' has changed over years. It used to be cpu
instructions and cycles, its now number of memory transactions.

The only thing we need to address are the cache lines we must bring into
cpu caches, and keep code short.

These days, one cache line miss -> more than one hundred instructions
that could be done during cpu stall. cpu cycles are cheap if code
already in instruction cache.

Adding a test to avoid entering a NULL loop (no fragment is stored yet)
just bloats the code, making it larger than necessary.

You dont need the else branch :

if (prev) {
	if (FRAG_CB(prev)->offset < offset) {
		next = NULL;
		goto found;
	}
else {
	next = NULL;
	goto found;
}

Just write :

next = NULL;
if (prev && FRAG_CB(prev)->offset < offset)
	goto found;




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] fragment: add fast path
  2010-06-14  7:04     ` Eric Dumazet
@ 2010-06-14 10:15       ` Changli Gao
  0 siblings, 0 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14 10:15 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev

On Mon, Jun 14, 2010 at 3:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> Concept of 'fast path' has changed over years. It used to be cpu
> instructions and cycles, its now number of memory transactions.
>
> The only thing we need to address are the cache lines we must bring into
> cpu caches, and keep code short.
>
> These days, one cache line miss -> more than one hundred instructions
> that could be done during cpu stall. cpu cycles are cheap if code
> already in instruction cache.
>
> Adding a test to avoid entering a NULL loop (no fragment is stored yet)
> just bloats the code, making it larger than necessary.
>
> You dont need the else branch :
>
> if (prev) {
>        if (FRAG_CB(prev)->offset < offset) {
>                next = NULL;
>                goto found;
>        }
> else {
>        next = NULL;
>        goto found;
> }
>
> Just write :
>
> next = NULL;
> if (prev && FRAG_CB(prev)->offset < offset)
>        goto found;
>

Thanks:

I think the following code is better:

if (!prev || FRAG_CB(prev)->offset < offset) {
      next = NULL;
      goto found;
}


-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-06-14 10:15 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-13 23:16 [PATCH] fragment: add fast path Changli Gao
2010-06-14  1:18 ` David Miller
2010-06-14  2:03   ` Changli Gao
2010-06-14  2:24     ` YOSHIFUJI Hideaki
2010-06-14  2:52       ` Changli Gao
2010-06-14  2:55     ` Changli Gao
2010-06-14  5:15   ` YOSHIFUJI Hideaki
2010-06-14  6:19     ` Mitchell Erblich
2010-06-14  5:35 ` Eric Dumazet
2010-06-14  5:59   ` Changli Gao
2010-06-14  7:04     ` Eric Dumazet
2010-06-14 10:15       ` Changli Gao
2010-06-14  6:40   ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.