* [PATCH] fragment: add fast path
@ 2010-06-13 23:16 Changli Gao
2010-06-14 1:18 ` David Miller
2010-06-14 5:35 ` Eric Dumazet
0 siblings, 2 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-13 23:16 UTC (permalink / raw)
To: David S. Miller
Cc: Alexey Kuznetsov, Pekka Savola (ipv6),
James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev,
Changli Gao
add fast path
As the fragments are usually in order, it is likely the new fragments are
at the end of the inet_frag_queue. In the fast path, we check if the skb at the
end of the inet_frag_queue is the prev we expect.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
include/net/inet_frag.h | 1 +
net/ipv4/ip_fragment.c | 17 +++++++++++++++++
net/ipv6/reassembly.c | 16 ++++++++++++++++
3 files changed, 34 insertions(+)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 39f2dc9..16ff29a 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -20,6 +20,7 @@ struct inet_frag_queue {
atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
struct sk_buff *fragments; /* list of received fragments */
+ struct sk_buff *fragments_tail;
ktime_t stamp;
int len; /* total length of orig datagram */
int meat;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea..d8c36d4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -317,6 +317,7 @@ static int ip_frag_reinit(struct ipq *qp)
qp->q.len = 0;
qp->q.meat = 0;
qp->q.fragments = NULL;
+ qp->q.fragments_tail = NULL;
qp->iif = 0;
return 0;
@@ -389,6 +390,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* in the chain of fragments so far. We must know where to put
* this fragment, right?
*/
+ prev = qp->q.fragments_tail;
+ if (prev) {
+ if (FRAG_CB(prev)->offset < offset) {
+ next = NULL;
+ goto found;
+ }
+ } else {
+ next = NULL;
+ goto found;
+ }
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
@@ -396,6 +407,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
prev = next;
}
+found:
/* We found where to put this one. Check for overlap with
* preceding fragment, and, if needed, align things so that
* any overlaps are eliminated.
@@ -454,6 +466,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
/* Insert this fragment in the chain of fragments. */
skb->next = next;
+ if (!next)
+ qp->q.fragments_tail = skb;
if (prev)
prev->next = skb;
else
@@ -507,6 +521,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
goto out_nomem;
fp->next = head->next;
+ if (!fp->next)
+ qp->q.fragments_tail = fp;
prev->next = fp;
skb_morph(head, qp->q.fragments);
@@ -578,6 +594,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
iph->tot_len = htons(len);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
+ qp->q.fragments_tail = NULL;
return 0;
out_nomem:
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292f..dc15624 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -336,6 +336,16 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* in the chain of fragments so far. We must know where to put
* this fragment, right?
*/
+ prev = fq->q.fragments_tail;
+ if (prev) {
+ if (FRAG6_CB(prev)->offset < offset) {
+ next = NULL;
+ goto found;
+ }
+ } else {
+ next = NULL;
+ goto found;
+ }
prev = NULL;
for(next = fq->q.fragments; next != NULL; next = next->next) {
if (FRAG6_CB(next)->offset >= offset)
@@ -343,6 +353,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
prev = next;
}
+found:
/* We found where to put this one. Check for overlap with
* preceding fragment, and, if needed, align things so that
* any overlaps are eliminated.
@@ -400,6 +411,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
/* Insert this fragment in the chain of fragments. */
skb->next = next;
+ if (!next)
+ fq->q.fragments_tail = skb;
if (prev)
prev->next = skb;
else
@@ -466,6 +479,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
goto out_oom;
fp->next = head->next;
+ if (!fp->next)
+ fq->q.fragments_tail = fp;
prev->next = fp;
skb_morph(head, fq->q.fragments);
@@ -553,6 +568,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.fragments = NULL;
+ fq->q.fragments_tail = NULL;
return 1;
out_oversize:
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-13 23:16 [PATCH] fragment: add fast path Changli Gao
@ 2010-06-14 1:18 ` David Miller
2010-06-14 2:03 ` Changli Gao
2010-06-14 5:15 ` YOSHIFUJI Hideaki
2010-06-14 5:35 ` Eric Dumazet
1 sibling, 2 replies; 13+ messages in thread
From: David Miller @ 2010-06-14 1:18 UTC (permalink / raw)
To: xiaosuo; +Cc: kuznet, pekkas, jmorris, yoshfuji, kaber, netdev
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 14 Jun 2010 07:16:35 +0800
> As the fragments are usually in order,
In what universe does this happen "usually"?
Linux has been outputting fragments in reverse order for more than 10
years.
I'm not applying this patch.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 1:18 ` David Miller
@ 2010-06-14 2:03 ` Changli Gao
2010-06-14 2:24 ` YOSHIFUJI Hideaki
2010-06-14 2:55 ` Changli Gao
2010-06-14 5:15 ` YOSHIFUJI Hideaki
1 sibling, 2 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14 2:03 UTC (permalink / raw)
To: David Miller; +Cc: kuznet, pekkas, jmorris, yoshfuji, kaber, netdev
On Mon, Jun 14, 2010 at 9:18 AM, David Miller <davem@davemloft.net> wrote:
> From: Changli Gao <xiaosuo@gmail.com>
> Date: Mon, 14 Jun 2010 07:16:35 +0800
>
>> As the fragments are usually in order,
>
> In what universe does this happen "usually"?
>
> Linux has been outputting fragments in reverse order for more than 10
> years.
>
I have tested next-next-2.6 and darwin, and found they are both send
fragments in order:
Darwin:
Darwin localhost 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15
16:55:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_I386 i386 i386
09:53:26.891820 IP (tos 0x0, ttl 64, id 19628, offset 0, flags [+],
proto UDP (17), length 1500) 10.13.3.10.52189 > 10.13.150.1.8888: UDP,
length 8192
09:53:26.892048 IP (tos 0x0, ttl 64, id 19628, offset 1480, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892229 IP (tos 0x0, ttl 64, id 19628, offset 2960, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892397 IP (tos 0x0, ttl 64, id 19628, offset 4440, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892529 IP (tos 0x0, ttl 64, id 19628, offset 5920, flags [+],
proto UDP (17), length 1500) 10.13.3.10 > 10.13.150.1: udp
09:53:26.892670 IP (tos 0x0, ttl 64, id 19628, offset 7400, flags
[none], proto UDP (17), length 820) 10.13.3.10 > 10.13.150.1: udp
Linux:
Linux localhost 2.6.35-rc1 #88 SMP Sun Jun 13 14:25:07 CST 2010 x86_64
Intel(R) Core(TM)2 Duo CPU E6550 @ 2.33GHz GenuineIntel GNU/Linux
08:01:53.730902 IP (tos 0x0, ttl 64, id 1263, offset 0, flags [+],
proto UDP (17), length 1500) 10.13.150.50.45295 > 10.13.150.1.8888:
UDP, length 8192
08:01:53.730955 IP (tos 0x0, ttl 64, id 1263, offset 1480, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731113 IP (tos 0x0, ttl 64, id 1263, offset 2960, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731139 IP (tos 0x0, ttl 64, id 1263, offset 4440, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731280 IP (tos 0x0, ttl 64, id 1263, offset 5920, flags [+],
proto UDP (17), length 1500) 10.13.150.50 > 10.13.150.1: udp
08:01:53.731306 IP (tos 0x0, ttl 64, id 1263, offset 7400, flags
[none], proto UDP (17), length 820) 10.13.150.50 > 10.13.150.1: udp
Later I'll test Windows.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 2:03 ` Changli Gao
@ 2010-06-14 2:24 ` YOSHIFUJI Hideaki
2010-06-14 2:52 ` Changli Gao
2010-06-14 2:55 ` Changli Gao
1 sibling, 1 reply; 13+ messages in thread
From: YOSHIFUJI Hideaki @ 2010-06-14 2:24 UTC (permalink / raw)
To: Changli Gao
Cc: David Miller, kuznet, pekkas, jmorris, kaber, netdev, YOSHIFUJI Hideaki
NIC?
--yoshfuji
(2010/06/14 11:03), Changli Gao wrote:
> On Mon, Jun 14, 2010 at 9:18 AM, David Miller<davem@davemloft.net> wrote:
>> From: Changli Gao<xiaosuo@gmail.com>
>> Date: Mon, 14 Jun 2010 07:16:35 +0800
>>
>>> As the fragments are usually in order,
>>
>> In what universe does this happen "usually"?
>>
>> Linux has been outputting fragments in reverse order for more than 10
>> years.
>>
>
> I have tested next-next-2.6 and darwin, and found they are both send
> fragments in order:
>
> Darwin:
>
> Darwin localhost 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15
> 16:55:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_I386 i386 i386
>
> 09:53:26.891820 IP (tos 0x0, ttl 64, id 19628, offset 0, flags [+],
> proto UDP (17), length 1500) 10.13.3.10.52189> 10.13.150.1.8888: UDP,
> length 8192
> 09:53:26.892048 IP (tos 0x0, ttl 64, id 19628, offset 1480, flags [+],
> proto UDP (17), length 1500) 10.13.3.10> 10.13.150.1: udp
> 09:53:26.892229 IP (tos 0x0, ttl 64, id 19628, offset 2960, flags [+],
> proto UDP (17), length 1500) 10.13.3.10> 10.13.150.1: udp
> 09:53:26.892397 IP (tos 0x0, ttl 64, id 19628, offset 4440, flags [+],
> proto UDP (17), length 1500) 10.13.3.10> 10.13.150.1: udp
> 09:53:26.892529 IP (tos 0x0, ttl 64, id 19628, offset 5920, flags [+],
> proto UDP (17), length 1500) 10.13.3.10> 10.13.150.1: udp
> 09:53:26.892670 IP (tos 0x0, ttl 64, id 19628, offset 7400, flags
> [none], proto UDP (17), length 820) 10.13.3.10> 10.13.150.1: udp
>
> Linux:
>
> Linux localhost 2.6.35-rc1 #88 SMP Sun Jun 13 14:25:07 CST 2010 x86_64
> Intel(R) Core(TM)2 Duo CPU E6550 @ 2.33GHz GenuineIntel GNU/Linux
>
> 08:01:53.730902 IP (tos 0x0, ttl 64, id 1263, offset 0, flags [+],
> proto UDP (17), length 1500) 10.13.150.50.45295> 10.13.150.1.8888:
> UDP, length 8192
> 08:01:53.730955 IP (tos 0x0, ttl 64, id 1263, offset 1480, flags [+],
> proto UDP (17), length 1500) 10.13.150.50> 10.13.150.1: udp
> 08:01:53.731113 IP (tos 0x0, ttl 64, id 1263, offset 2960, flags [+],
> proto UDP (17), length 1500) 10.13.150.50> 10.13.150.1: udp
> 08:01:53.731139 IP (tos 0x0, ttl 64, id 1263, offset 4440, flags [+],
> proto UDP (17), length 1500) 10.13.150.50> 10.13.150.1: udp
> 08:01:53.731280 IP (tos 0x0, ttl 64, id 1263, offset 5920, flags [+],
> proto UDP (17), length 1500) 10.13.150.50> 10.13.150.1: udp
> 08:01:53.731306 IP (tos 0x0, ttl 64, id 1263, offset 7400, flags
> [none], proto UDP (17), length 820) 10.13.150.50> 10.13.150.1: udp
>
> Later I'll test Windows.
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 2:24 ` YOSHIFUJI Hideaki
@ 2010-06-14 2:52 ` Changli Gao
0 siblings, 0 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14 2:52 UTC (permalink / raw)
To: YOSHIFUJI Hideaki; +Cc: David Miller, kuznet, pekkas, jmorris, kaber, netdev
On Mon, Jun 14, 2010 at 10:24 AM, YOSHIFUJI Hideaki
<yoshfuji@linux-ipv6.org> wrote:
> NIC?
>
Linux: e1000
Darwin: tun
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 2:03 ` Changli Gao
2010-06-14 2:24 ` YOSHIFUJI Hideaki
@ 2010-06-14 2:55 ` Changli Gao
1 sibling, 0 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14 2:55 UTC (permalink / raw)
To: David Miller; +Cc: kuznet, pekkas, jmorris, yoshfuji, kaber, netdev
On Mon, Jun 14, 2010 at 10:03 AM, Changli Gao <xiaosuo@gmail.com> wrote:
>
> Later I'll test Windows.
>
Windows also sends fragments in order.
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 0, flags [+],
proto UDP (17), length 1452) 221.238.33.71.4194 > 202.113.29.4.8888:
UDP, length 32768
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 1432, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 2864, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 4296, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 5728, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 7160, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 8592, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 10024, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 11456, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 12888, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 14320, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.064453 IP (tos 0x0, ttl 128, id 35511, offset 15752, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 17184, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 18616, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 20048, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 21480, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 22912, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 24344, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 25776, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 27208, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 28640, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 30072, flags
[+], proto UDP (17), length 1452) 221.238.33.71 > 202.113.29.4: udp
10:40:29.066406 IP (tos 0x0, ttl 128, id 35511, offset 31504, flags
[none], proto UDP (17), length 1292) 221.238.33.71 > 202.113.29.4: udp
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 1:18 ` David Miller
2010-06-14 2:03 ` Changli Gao
@ 2010-06-14 5:15 ` YOSHIFUJI Hideaki
2010-06-14 6:19 ` Mitchell Erblich
1 sibling, 1 reply; 13+ messages in thread
From: YOSHIFUJI Hideaki @ 2010-06-14 5:15 UTC (permalink / raw)
To: David Miller
Cc: xiaosuo, kuznet, pekkas, jmorris, kaber, netdev, YOSHIFUJI Hideaki
David Miller wrote:
>> As the fragments are usually in order,
>
> In what universe does this happen "usually"?
>
> Linux has been outputting fragments in reverse order for more than 10
> years.
>
> I'm not applying this patch.
Dave, I know we've been sending in reverse order, of course.
And, as far as I know, it seems Linux is the only implementation
which sends fragments in reverse order.
This is receiving side. I think we should accept the fact
that Linux is not the only implementation, no?
--yoshfuji
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-13 23:16 [PATCH] fragment: add fast path Changli Gao
2010-06-14 1:18 ` David Miller
@ 2010-06-14 5:35 ` Eric Dumazet
2010-06-14 5:59 ` Changli Gao
2010-06-14 6:40 ` Eric Dumazet
1 sibling, 2 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-06-14 5:35 UTC (permalink / raw)
To: Changli Gao
Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev
Le lundi 14 juin 2010 à 07:16 +0800, Changli Gao a écrit :
> add fast path
>
> As the fragments are usually in order, it is likely the new fragments are
> at the end of the inet_frag_queue. In the fast path, we check if the skb at the
> end of the inet_frag_queue is the prev we expect.
>
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
> include/net/inet_frag.h | 1 +
> net/ipv4/ip_fragment.c | 17 +++++++++++++++++
> net/ipv6/reassembly.c | 16 ++++++++++++++++
> 3 files changed, 34 insertions(+)
> diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
> index 39f2dc9..16ff29a 100644
> --- a/include/net/inet_frag.h
> +++ b/include/net/inet_frag.h
> @@ -20,6 +20,7 @@ struct inet_frag_queue {
> atomic_t refcnt;
> struct timer_list timer; /* when will this queue expire? */
> struct sk_buff *fragments; /* list of received fragments */
> + struct sk_buff *fragments_tail;
> ktime_t stamp;
> int len; /* total length of orig datagram */
> int meat;
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index 75347ea..d8c36d4 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -317,6 +317,7 @@ static int ip_frag_reinit(struct ipq *qp)
> qp->q.len = 0;
> qp->q.meat = 0;
> qp->q.fragments = NULL;
> + qp->q.fragments_tail = NULL;
> qp->iif = 0;
>
> return 0;
> @@ -389,6 +390,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
> * in the chain of fragments so far. We must know where to put
> * this fragment, right?
> */
> + prev = qp->q.fragments_tail;
> + if (prev) {
> + if (FRAG_CB(prev)->offset < offset) {
> + next = NULL;
> + goto found;
> + }
> + } else {
> + next = NULL;
How can this chunk be a win ? queue is empty anyway.
You add tests and slow the 'other path'
> + goto found;
> + }
Quite frankly, one easy way to speedup things would be to move 'offset'
from ipfrag_skb_cb close to skb->next field so that only one cache line
per frag is used during lookup.
I am not sure why we need "struct inet_skb_parm h;" field in struct
ipfrag_skb_cb...
I probably need to wakeup this monday morning ?
Untested patch follows, only compiled.
[PATCH] frags: Remove unecessary bits
While trying to move 'offset' to the beginning of frag CB, I found
inet_skb_parm field was unused.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea..0f51ae0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -55,7 +55,6 @@ static int sysctl_ipfrag_max_dist __read_mostly = 64;
struct ipfrag_skb_cb
{
- struct inet_skb_parm h;
int offset;
};
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292f..122e0be 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -57,7 +57,6 @@
struct ip6frag_skb_cb
{
- struct inet6_skb_parm h;
int offset;
};
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 5:35 ` Eric Dumazet
@ 2010-06-14 5:59 ` Changli Gao
2010-06-14 7:04 ` Eric Dumazet
2010-06-14 6:40 ` Eric Dumazet
1 sibling, 1 reply; 13+ messages in thread
From: Changli Gao @ 2010-06-14 5:59 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev
On Mon, Jun 14, 2010 at 1:35 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le lundi 14 juin 2010 à 07:16 +0800, Changli Gao a écrit :
>> add fast path
>>
>> As the fragments are usually in order, it is likely the new fragments are
>> at the end of the inet_frag_queue. In the fast path, we check if the skb at the
>> end of the inet_frag_queue is the prev we expect.
>>
>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>> ----
>> include/net/inet_frag.h | 1 +
>> net/ipv4/ip_fragment.c | 17 +++++++++++++++++
>> net/ipv6/reassembly.c | 16 ++++++++++++++++
>> 3 files changed, 34 insertions(+)
>> diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
>> index 39f2dc9..16ff29a 100644
>> --- a/include/net/inet_frag.h
>> +++ b/include/net/inet_frag.h
>> @@ -20,6 +20,7 @@ struct inet_frag_queue {
>> atomic_t refcnt;
>> struct timer_list timer; /* when will this queue expire? */
>> struct sk_buff *fragments; /* list of received fragments */
>> + struct sk_buff *fragments_tail;
>> ktime_t stamp;
>> int len; /* total length of orig datagram */
>> int meat;
>> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
>> index 75347ea..d8c36d4 100644
>> --- a/net/ipv4/ip_fragment.c
>> +++ b/net/ipv4/ip_fragment.c
>> @@ -317,6 +317,7 @@ static int ip_frag_reinit(struct ipq *qp)
>> qp->q.len = 0;
>> qp->q.meat = 0;
>> qp->q.fragments = NULL;
>> + qp->q.fragments_tail = NULL;
>> qp->iif = 0;
>>
>> return 0;
>> @@ -389,6 +390,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
>> * in the chain of fragments so far. We must know where to put
>> * this fragment, right?
>> */
>> + prev = qp->q.fragments_tail;
>> + if (prev) {
>> + if (FRAG_CB(prev)->offset < offset) {
>> + next = NULL;
>> + goto found;
>> + }
>> + } else {
>> + next = NULL;
>
> How can this chunk be a win ? queue is empty anyway.
> You add tests and slow the 'other path'
>
>> + goto found;
>> + }
Without this branch. prev needs to be initialized to zero again(of
course, we can avoid this by moving prev = NULL in the previous
branch). next needs an assignment, and a duplicate check if the the
queue is empty, which is already known in the above branch. Sorry, but
I can't see which path I slow.
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
}
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 5:15 ` YOSHIFUJI Hideaki
@ 2010-06-14 6:19 ` Mitchell Erblich
0 siblings, 0 replies; 13+ messages in thread
From: Mitchell Erblich @ 2010-06-14 6:19 UTC (permalink / raw)
To: YOSHIFUJI Hideaki
Cc: David Miller, xiaosuo, kuznet, pekkas, jmorris, kaber, netdev
On Jun 13, 2010, at 10:15 PM, YOSHIFUJI Hideaki wrote:
> David Miller wrote:
>>> As the fragments are usually in order,
>> In what universe does this happen "usually"?
>> Linux has been outputting fragments in reverse order for more than 10
>> years.
>> I'm not applying this patch.
>
> Dave, I know we've been sending in reverse order, of course.
> And, as far as I know, it seems Linux is the only implementation
> which sends fragments in reverse order.
>
> This is receiving side. I think we should accept the fact
> that Linux is not the only implementation, no?
>
> --yoshfuji
Group,
With respect to IPv4 and PATH MTU, aren't we
unlikely to generate packet frags?
With respect to IPv6, aren't frags are less likely
(intermediate nodes are not allowed to frag
(rfc 2460, sect 4.5 Frag header)) as I would
expect the source to use the PATH MTU?
Thus, a faster path is to support the largest Jumbo
MTUs (MTU >=9000: 16,110 : 14,336 : 10,240)
where possible for bulk data transfers with
proper page orders, IMO.
Mitchell Erblich
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 5:35 ` Eric Dumazet
2010-06-14 5:59 ` Changli Gao
@ 2010-06-14 6:40 ` Eric Dumazet
1 sibling, 0 replies; 13+ messages in thread
From: Eric Dumazet @ 2010-06-14 6:40 UTC (permalink / raw)
To: Changli Gao
Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev
> I am not sure why we need "struct inet_skb_parm h;" field in struct
> ipfrag_skb_cb...
>
> I probably need to wakeup this monday morning ?
>
> Untested patch follows, only compiled.
>
Oh well, I see light now I woke up ;)
> [PATCH] frags: Remove unecessary bits
>
> While trying to move 'offset' to the beginning of frag CB, I found
> inet_skb_parm field was unused.
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
Wrong patch of course.
We need some comment or document better why its there..
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 5:59 ` Changli Gao
@ 2010-06-14 7:04 ` Eric Dumazet
2010-06-14 10:15 ` Changli Gao
0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2010-06-14 7:04 UTC (permalink / raw)
To: Changli Gao
Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev
>
> Without this branch. prev needs to be initialized to zero again(of
> course, we can avoid this by moving prev = NULL in the previous
> branch). next needs an assignment, and a duplicate check if the the
> queue is empty, which is already known in the above branch. Sorry, but
> I can't see which path I slow.
>
> prev = NULL;
> for (next = qp->q.fragments; next != NULL; next = next->next) {
> if (FRAG_CB(next)->offset >= offset)
> break; /* bingo! */
> prev = next;
> }
>
Concept of 'fast path' has changed over years. It used to be cpu
instructions and cycles, its now number of memory transactions.
The only thing we need to address are the cache lines we must bring into
cpu caches, and keep code short.
These days, one cache line miss -> more than one hundred instructions
that could be done during cpu stall. cpu cycles are cheap if code
already in instruction cache.
Adding a test to avoid entering a NULL loop (no fragment is stored yet)
just bloats the code, making it larger than necessary.
You dont need the else branch :
if (prev) {
if (FRAG_CB(prev)->offset < offset) {
next = NULL;
goto found;
}
else {
next = NULL;
goto found;
}
Just write :
next = NULL;
if (prev && FRAG_CB(prev)->offset < offset)
goto found;
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] fragment: add fast path
2010-06-14 7:04 ` Eric Dumazet
@ 2010-06-14 10:15 ` Changli Gao
0 siblings, 0 replies; 13+ messages in thread
From: Changli Gao @ 2010-06-14 10:15 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
James Morris, Hideaki YOSHIFUJI, Patrick McHardy, netdev
On Mon, Jun 14, 2010 at 3:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> Concept of 'fast path' has changed over years. It used to be cpu
> instructions and cycles, its now number of memory transactions.
>
> The only thing we need to address are the cache lines we must bring into
> cpu caches, and keep code short.
>
> These days, one cache line miss -> more than one hundred instructions
> that could be done during cpu stall. cpu cycles are cheap if code
> already in instruction cache.
>
> Adding a test to avoid entering a NULL loop (no fragment is stored yet)
> just bloats the code, making it larger than necessary.
>
> You dont need the else branch :
>
> if (prev) {
> if (FRAG_CB(prev)->offset < offset) {
> next = NULL;
> goto found;
> }
> else {
> next = NULL;
> goto found;
> }
>
> Just write :
>
> next = NULL;
> if (prev && FRAG_CB(prev)->offset < offset)
> goto found;
>
Thanks:
I think the following code is better:
if (!prev || FRAG_CB(prev)->offset < offset) {
next = NULL;
goto found;
}
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2010-06-14 10:15 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-13 23:16 [PATCH] fragment: add fast path Changli Gao
2010-06-14 1:18 ` David Miller
2010-06-14 2:03 ` Changli Gao
2010-06-14 2:24 ` YOSHIFUJI Hideaki
2010-06-14 2:52 ` Changli Gao
2010-06-14 2:55 ` Changli Gao
2010-06-14 5:15 ` YOSHIFUJI Hideaki
2010-06-14 6:19 ` Mitchell Erblich
2010-06-14 5:35 ` Eric Dumazet
2010-06-14 5:59 ` Changli Gao
2010-06-14 7:04 ` Eric Dumazet
2010-06-14 10:15 ` Changli Gao
2010-06-14 6:40 ` Eric Dumazet
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.