All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net: convert gro_count to bitmask
@ 2018-07-11  9:15 Li RongQing
  2018-07-11 10:51 ` Stefano Brivio
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Li RongQing @ 2018-07-11  9:15 UTC (permalink / raw)
  To: netdev

gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
flows, gro_hash may be not fully used, so it is unnecessary to iterate
all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.

convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
represents a element of gro_hash, only flush a gro_hash element if the
related bit is set, to speed up napi_gro_flush().

and update gro_bitmask only if it will be changed, to reduce cache
update

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 35 +++++++++++++++++++++++------------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b683971e500d..df49b36ef378 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -322,7 +322,7 @@ struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
-	unsigned int		gro_count;
+	unsigned long		gro_bitmask;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
 	int			poll_owner;
diff --git a/net/core/dev.c b/net/core/dev.c
index d13cddcac41f..a08dbdd217a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5171,9 +5171,11 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 			return;
 		list_del_init(&skb->list);
 		napi_gro_complete(skb);
-		napi->gro_count--;
 		napi->gro_hash[index].count--;
 	}
+
+	if (!napi->gro_hash[index].count)
+		clear_bit(index, &napi->gro_bitmask);
 }
 
 /* napi->gro_hash[].list contains packets ordered by age.
@@ -5184,8 +5186,10 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
 	u32 i;
 
-	for (i = 0; i < GRO_HASH_BUCKETS; i++)
-		__napi_gro_flush_chain(napi, i, flush_old);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		if (test_bit(i, &napi->gro_bitmask))
+			__napi_gro_flush_chain(napi, i, flush_old);
+	}
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
@@ -5277,8 +5281,8 @@ static void gro_flush_oldest(struct list_head *head)
 	if (WARN_ON_ONCE(!oldest))
 		return;
 
-	/* Do not adjust napi->gro_count, caller is adding a new SKB to
-	 * the chain.
+	/* Do not adjust napi->gro_hash[].count, caller is adding a new
+	 * SKB to the chain.
 	 */
 	list_del(&oldest->list);
 	napi_gro_complete(oldest);
@@ -5352,7 +5356,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (pp) {
 		list_del_init(&pp->list);
 		napi_gro_complete(pp);
-		napi->gro_count--;
 		napi->gro_hash[hash].count--;
 	}
 
@@ -5365,7 +5368,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
 		gro_flush_oldest(gro_head);
 	} else {
-		napi->gro_count++;
 		napi->gro_hash[hash].count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
@@ -5380,6 +5382,12 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
+	if (napi->gro_hash[hash].count)
+		if (!test_bit(hash, &napi->gro_bitmask))
+			set_bit(hash, &napi->gro_bitmask);
+	else if (test_bit(hash, &napi->gro_bitmask))
+		clear_bit(hash, &napi->gro_bitmask);
+
 	return ret;
 
 normal:
@@ -5778,7 +5786,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -5987,7 +5995,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_count && !napi_disable_pending(napi) &&
+	if (napi->gro_bitmask && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -6002,7 +6010,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	INIT_LIST_HEAD(&napi->poll_list);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 		INIT_LIST_HEAD(&napi->gro_hash[i].list);
 		napi->gro_hash[i].count = 0;
@@ -6062,7 +6070,7 @@ void netif_napi_del(struct napi_struct *napi)
 	napi_free_frags(napi);
 
 	flush_gro_hash(napi);
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 
@@ -6104,7 +6112,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
@@ -9151,6 +9159,9 @@ static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
+	BUILD_BUG_ON(GRO_HASH_BUCKETS >
+			FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+
 	if (net != &init_net)
 		INIT_LIST_HEAD(&net->dev_base_head);
 
-- 
2.16.2

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] net: convert gro_count to bitmask
  2018-07-11  9:15 [PATCH] net: convert gro_count to bitmask Li RongQing
@ 2018-07-11 10:51 ` Stefano Brivio
  2018-07-12  2:31   ` 答复: " Li,Rongqing
  2018-07-11 11:32 ` Eric Dumazet
  2018-07-12  2:49 ` David Miller
  2 siblings, 1 reply; 9+ messages in thread
From: Stefano Brivio @ 2018-07-11 10:51 UTC (permalink / raw)
  To: Li RongQing; +Cc: netdev, Eric Dumazet

On Wed, 11 Jul 2018 17:15:53 +0800
Li RongQing <lirongqing@baidu.com> wrote:

> @@ -5380,6 +5382,12 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  	if (grow > 0)
>  		gro_pull_from_frag0(skb, grow);
>  ok:
> +	if (napi->gro_hash[hash].count)
> +		if (!test_bit(hash, &napi->gro_bitmask))
> +			set_bit(hash, &napi->gro_bitmask);
> +	else if (test_bit(hash, &napi->gro_bitmask))
> +		clear_bit(hash, &napi->gro_bitmask);

This might not do what you want.

-- 
Stefano

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] net: convert gro_count to bitmask
  2018-07-11  9:15 [PATCH] net: convert gro_count to bitmask Li RongQing
  2018-07-11 10:51 ` Stefano Brivio
@ 2018-07-11 11:32 ` Eric Dumazet
  2018-07-12  2:31   ` 答复: " Li,Rongqing
  2018-07-12  2:49 ` David Miller
  2 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2018-07-11 11:32 UTC (permalink / raw)
  To: Li RongQing, netdev



On 07/11/2018 02:15 AM, Li RongQing wrote:
> gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
> flows, gro_hash may be not fully used, so it is unnecessary to iterate
> all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.
> 
> convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
> represents a element of gro_hash, only flush a gro_hash element if the
> related bit is set, to speed up napi_gro_flush().
> 
> and update gro_bitmask only if it will be changed, to reduce cache
> update
> 
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---
>  include/linux/netdevice.h |  2 +-
>  net/core/dev.c            | 35 +++++++++++++++++++++++------------
>  2 files changed, 24 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index b683971e500d..df49b36ef378 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -322,7 +322,7 @@ struct napi_struct {
>  
>  	unsigned long		state;
>  	int			weight;
> -	unsigned int		gro_count;
> +	unsigned long		gro_bitmask;
>  	int			(*poll)(struct napi_struct *, int);
>  #ifdef CONFIG_NETPOLL
>  	int			poll_owner;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d13cddcac41f..a08dbdd217a6 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -5171,9 +5171,11 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
>  			return;
>  		list_del_init(&skb->list);
>  		napi_gro_complete(skb);
> -		napi->gro_count--;
>  		napi->gro_hash[index].count--;
>  	}
> +
> +	if (!napi->gro_hash[index].count)
> +		clear_bit(index, &napi->gro_bitmask);

I suggest you not add an atomic operation here.

Current cpu owns this NAPI after all.

Same remark for the whole patch.

->  __clear_bit(), __set_bit() and similar operators

Ideally you should provide TCP_RR number with busy polling enabled, to eventually catch regressions.

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* 答复: [PATCH] net: convert gro_count to bitmask
  2018-07-11 10:51 ` Stefano Brivio
@ 2018-07-12  2:31   ` Li,Rongqing
  2018-07-12  9:48     ` Stefano Brivio
  0 siblings, 1 reply; 9+ messages in thread
From: Li,Rongqing @ 2018-07-12  2:31 UTC (permalink / raw)
  To: Stefano Brivio; +Cc: netdev, Eric Dumazet



> -----邮件原件-----
> 发件人: Stefano Brivio [mailto:sbrivio@redhat.com]
> 发送时间: 2018年7月11日 18:52
> 收件人: Li,Rongqing <lirongqing@baidu.com>
> 抄送: netdev@vger.kernel.org; Eric Dumazet <edumazet@google.com>
> 主题: Re: [PATCH] net: convert gro_count to bitmask
> 
> On Wed, 11 Jul 2018 17:15:53 +0800
> Li RongQing <lirongqing@baidu.com> wrote:
> 
> > @@ -5380,6 +5382,12 @@ static enum gro_result dev_gro_receive(struct
> napi_struct *napi, struct sk_buff
> >  	if (grow > 0)
> >  		gro_pull_from_frag0(skb, grow);
> >  ok:
> > +	if (napi->gro_hash[hash].count)
> > +		if (!test_bit(hash, &napi->gro_bitmask))
> > +			set_bit(hash, &napi->gro_bitmask);
> > +	else if (test_bit(hash, &napi->gro_bitmask))
> > +		clear_bit(hash, &napi->gro_bitmask);
> 
> This might not do what you want.
> 
> --

could you show detail ?

-RongQing

> Stefano

^ permalink raw reply	[flat|nested] 9+ messages in thread

* 答复: [PATCH] net: convert gro_count to bitmask
  2018-07-11 11:32 ` Eric Dumazet
@ 2018-07-12  2:31   ` Li,Rongqing
  0 siblings, 0 replies; 9+ messages in thread
From: Li,Rongqing @ 2018-07-12  2:31 UTC (permalink / raw)
  To: Eric Dumazet, netdev



> -----邮件原件-----
> 发件人: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> 发送时间: 2018年7月11日 19:32
> 收件人: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org
> 主题: Re: [PATCH] net: convert gro_count to bitmask
> 
> 
> 
> On 07/11/2018 02:15 AM, Li RongQing wrote:
> > gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
> > flows, gro_hash may be not fully used, so it is unnecessary to iterate
> > all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.
> >
> > convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
> > represents a element of gro_hash, only flush a gro_hash element if the
> > related bit is set, to speed up napi_gro_flush().
> >
> > and update gro_bitmask only if it will be changed, to reduce cache
> > update
> >
> > Suggested-by: Eric Dumazet <edumazet@google.com>
> > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > ---
> >  include/linux/netdevice.h |  2 +-
> >  net/core/dev.c            | 35 +++++++++++++++++++++++------------
> >  2 files changed, 24 insertions(+), 13 deletions(-)
> >
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index b683971e500d..df49b36ef378 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -322,7 +322,7 @@ struct napi_struct {
> >
> >  	unsigned long		state;
> >  	int			weight;
> > -	unsigned int		gro_count;
> > +	unsigned long		gro_bitmask;
> >  	int			(*poll)(struct napi_struct *, int);
> >  #ifdef CONFIG_NETPOLL
> >  	int			poll_owner;
> > diff --git a/net/core/dev.c b/net/core/dev.c index
> > d13cddcac41f..a08dbdd217a6 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -5171,9 +5171,11 @@ static void __napi_gro_flush_chain(struct
> napi_struct *napi, u32 index,
> >  			return;
> >  		list_del_init(&skb->list);
> >  		napi_gro_complete(skb);
> > -		napi->gro_count--;
> >  		napi->gro_hash[index].count--;
> >  	}
> > +
> > +	if (!napi->gro_hash[index].count)
> > +		clear_bit(index, &napi->gro_bitmask);
> 
> I suggest you not add an atomic operation here.
> 
> Current cpu owns this NAPI after all.
> 
> Same remark for the whole patch.
> 
> ->  __clear_bit(), __set_bit() and similar operators
> 
> Ideally you should provide TCP_RR number with busy polling enabled, to
> eventually catch regressions.
> 

I will change and do the test
Thank you.

-RongQing

> Thanks.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] net: convert gro_count to bitmask
  2018-07-11  9:15 [PATCH] net: convert gro_count to bitmask Li RongQing
  2018-07-11 10:51 ` Stefano Brivio
  2018-07-11 11:32 ` Eric Dumazet
@ 2018-07-12  2:49 ` David Miller
  2018-07-12  3:03   ` 答复: " Li,Rongqing
  2 siblings, 1 reply; 9+ messages in thread
From: David Miller @ 2018-07-12  2:49 UTC (permalink / raw)
  To: lirongqing; +Cc: netdev

From: Li RongQing <lirongqing@baidu.com>
Date: Wed, 11 Jul 2018 17:15:53 +0800

> +		clear_bit(index, &napi->gro_bitmask);

Please don't use atomics here, at least use __clear_bit().

This is why I did the operations by hand in my version of the patch.
Also, if you are going to preempt my patch, at least retain the
comment I added around the GRO_HASH_BUCKETS definitions which warns
the reader about the limit.

Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* 答复: [PATCH] net: convert gro_count to bitmask
  2018-07-12  2:49 ` David Miller
@ 2018-07-12  3:03   ` Li,Rongqing
  2018-07-12  3:51     ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Li,Rongqing @ 2018-07-12  3:03 UTC (permalink / raw)
  To: David Miller; +Cc: netdev



> -----邮件原件-----
> 发件人: David Miller [mailto:davem@davemloft.net]
> 发送时间: 2018年7月12日 10:49
> 收件人: Li,Rongqing <lirongqing@baidu.com>
> 抄送: netdev@vger.kernel.org
> 主题: Re: [PATCH] net: convert gro_count to bitmask
> 
> From: Li RongQing <lirongqing@baidu.com>
> Date: Wed, 11 Jul 2018 17:15:53 +0800
> 
> > +		clear_bit(index, &napi->gro_bitmask);
> 
> Please don't use atomics here, at least use __clear_bit().
> 

Thanks, this is same as Eric's suggestion.


> This is why I did the operations by hand in my version of the patch.
> Also, if you are going to preempt my patch, at least retain the comment I
> added around the GRO_HASH_BUCKETS definitions which warns the reader
> about the limit.
> 

I add BUILD_BUG_ON in netdev_init, so I think we need not to add comment

@@ -9151,6 +9159,9 @@ static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */  static int __net_init netdev_init(struct net *net)  {
+	BUILD_BUG_ON(GRO_HASH_BUCKETS >
+			FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+


-RongQing

> Thanks.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: 答复: [PATCH] net: convert gro_count to bitmask
  2018-07-12  3:03   ` 答复: " Li,Rongqing
@ 2018-07-12  3:51     ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2018-07-12  3:51 UTC (permalink / raw)
  To: lirongqing; +Cc: netdev

From: "Li,Rongqing" <lirongqing@baidu.com>
Date: Thu, 12 Jul 2018 03:03:51 +0000

> 
> 
>> -----邮件原件-----
>> 发件人: David Miller [mailto:davem@davemloft.net]
>> 发送时间: 2018年7月12日 10:49
>> 收件人: Li,Rongqing <lirongqing@baidu.com>
>> 抄送: netdev@vger.kernel.org
>> 主题: Re: [PATCH] net: convert gro_count to bitmask
>> 
>> From: Li RongQing <lirongqing@baidu.com>
>> Date: Wed, 11 Jul 2018 17:15:53 +0800
>> 
>> > +		clear_bit(index, &napi->gro_bitmask);
>> 
>> Please don't use atomics here, at least use __clear_bit().
>> 
> 
> Thanks, this is same as Eric's suggestion.
> 
> 
>> This is why I did the operations by hand in my version of the patch.
>> Also, if you are going to preempt my patch, at least retain the comment I
>> added around the GRO_HASH_BUCKETS definitions which warns the reader
>> about the limit.
>> 
> 
> I add BUILD_BUG_ON in netdev_init, so I think we need not to add comment

That's a good compile time check, but the person thinking about editing
the definition doesn't see the limit in the header file nor know why
the limit exists in the first place.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] net: convert gro_count to bitmask
  2018-07-12  2:31   ` 答复: " Li,Rongqing
@ 2018-07-12  9:48     ` Stefano Brivio
  0 siblings, 0 replies; 9+ messages in thread
From: Stefano Brivio @ 2018-07-12  9:48 UTC (permalink / raw)
  To: Li,Rongqing; +Cc: netdev, Eric Dumazet

On Thu, 12 Jul 2018 02:31:10 +0000
"Li,Rongqing" <lirongqing@baidu.com> wrote:

> > -----邮件原件-----
> > 发件人: Stefano Brivio [mailto:sbrivio@redhat.com]
> > 发送时间: 2018年7月11日 18:52
> > 收件人: Li,Rongqing <lirongqing@baidu.com>
> > 抄送: netdev@vger.kernel.org; Eric Dumazet <edumazet@google.com>
> > 主题: Re: [PATCH] net: convert gro_count to bitmask
> > 
> > On Wed, 11 Jul 2018 17:15:53 +0800
> > Li RongQing <lirongqing@baidu.com> wrote:
> >   
> > > @@ -5380,6 +5382,12 @@ static enum gro_result dev_gro_receive(struct  
> > napi_struct *napi, struct sk_buff  
> > >  	if (grow > 0)
> > >  		gro_pull_from_frag0(skb, grow);
> > >  ok:
> > > +	if (napi->gro_hash[hash].count)
> > > +		if (!test_bit(hash, &napi->gro_bitmask))
> > > +			set_bit(hash, &napi->gro_bitmask);
> > > +	else if (test_bit(hash, &napi->gro_bitmask))
> > > +		clear_bit(hash, &napi->gro_bitmask);  
> > 
> > This might not do what you want.
> > 
> > --  
> 
> could you show detail ?

$ cat if1.c; gcc -o if1 if1.c
#include <stdio.h>

int main()
{
	if (1)
		if (0)
			;
	else if (2)
		printf("whoops\n");

	return 0;
}
$ ./if1
whoops

$ cat if2.c; gcc -o if2 if2.c
#include <stdio.h>

int main()
{
	if (1) {
		if (0)
			;
	} else if (2) {
		printf("whoops\n");
	}

	return 0;
}
$ ./if2

-- 
Stefano

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-07-12  9:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-11  9:15 [PATCH] net: convert gro_count to bitmask Li RongQing
2018-07-11 10:51 ` Stefano Brivio
2018-07-12  2:31   ` 答复: " Li,Rongqing
2018-07-12  9:48     ` Stefano Brivio
2018-07-11 11:32 ` Eric Dumazet
2018-07-12  2:31   ` 答复: " Li,Rongqing
2018-07-12  2:49 ` David Miller
2018-07-12  3:03   ` 答复: " Li,Rongqing
2018-07-12  3:51     ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.