linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues()
@ 2015-05-07 11:41 Denys Vlasenko
  2015-05-07 17:14 ` Alexander Duyck
  2015-05-10  2:27 ` David Miller
  0 siblings, 2 replies; 7+ messages in thread
From: Denys Vlasenko @ 2015-05-07 11:41 UTC (permalink / raw)
  To: David S. Miller
  Cc: Denys Vlasenko, Jiri Pirko, linux-kernel, netdev, netfilter-devel

These functions compile to ~60 bytes of machine code each.

With this .config: http://busybox.net/~vda/kernel_config
there are 617 calls to netif_tx_stop_queue()
and 49 calls to netif_tx_stop_all_queues() in vmlinux.

Code size is reduced by 27 kbytes:

    text     data      bss       dec     hex filename
82426986 22255416 20627456 125309858 77813a2 vmlinux.before
82399481 22255416 20627456 125282353 777a831 vmlinux

It may seem strange that a seemingly simple code like one in
netif_tx_stop_queue() compiles to ~60 bytes of code.
Well, it's true. Here's its disassembly:

    netif_tx_stop_queue:
       e8 b0 15 4d 00          callq  <__fentry__>
       48 85 ff                test   %rdi,%rdi
       75 25                   jne    <netif_tx_stop_queue+0x2f>
       55                      push   %rbp
       be 7a 18 00 00          mov    $0x187a,%esi
       48 c7 c7 50 59 d8 85    mov    $.rodata+0x1d85950,%rdi
       48 89 e5                mov    %rsp,%rbp
       e8 54 5a 7d fd          callq  <warn_slowpath_null>
       48 c7 c7 5f 59 d8 85    mov    $.rodata+0x1d8595f,%rdi
       31 c0                   xor    %eax,%eax
       e8 b0 47 48 00          callq  <printk>
       eb 09                   jmp    <netif_tx_stop_queue+0x38>
       f0 80 8f e0 01 00 00 01 lock orb $0x1,0x1e0(%rdi)
       c3                      retq
       5d                      pop    %rbp
       c3                      retq

This causes gcc to auto-deinline it before this patch, but with 203 separate
copies in each module which uses this function:

$ nm --size-sort vmlinux.before | grep -e ' netif_tx_stop_queue$' | wc -l
203

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: David S. Miller <davem@davemloft.net>
CC: Jiri Pirko <jpirko@redhat.com>
CC: linux-kernel@vger.kernel.org
CC: netdev@vger.kernel.org
CC: netfilter-devel@vger.kernel.org
---
 include/linux/netdevice.h | 19 ++-----------------
 net/core/dev.c            | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dcf6ec2..f650d16 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2546,14 +2546,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
 	}
 }
 
-static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
-{
-	if (WARN_ON(!dev_queue)) {
-		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
-		return;
-	}
-	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
-}
+void netif_tx_stop_queue(struct netdev_queue *dev_queue);
 
 /**
  *	netif_stop_queue - stop transmitted packets
@@ -2567,15 +2560,7 @@ static inline void netif_stop_queue(struct net_device *dev)
 	netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
 }
 
-static inline void netif_tx_stop_all_queues(struct net_device *dev)
-{
-	unsigned int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		netif_tx_stop_queue(txq);
-	}
-}
+void netif_tx_stop_all_queues(struct net_device *dev);
 
 static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 962ee9d..569031f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6261,6 +6261,27 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	return 0;
 }
 
+void netif_tx_stop_queue(struct netdev_queue *dev_queue)
+{
+	if (WARN_ON(!dev_queue)) {
+		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
+		return;
+	}
+	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
+}
+EXPORT_SYMBOL(netif_tx_stop_queue);
+
+void netif_tx_stop_all_queues(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		netif_tx_stop_queue(txq);
+	}
+}
+EXPORT_SYMBOL(netif_tx_stop_all_queues);
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues()
  2015-05-07 11:41 [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues() Denys Vlasenko
@ 2015-05-07 17:14 ` Alexander Duyck
  2015-05-07 18:44   ` Joe Perches
  2015-05-08  9:45   ` Denys Vlasenko
  2015-05-10  2:27 ` David Miller
  1 sibling, 2 replies; 7+ messages in thread
From: Alexander Duyck @ 2015-05-07 17:14 UTC (permalink / raw)
  To: Denys Vlasenko, David S. Miller
  Cc: Jiri Pirko, linux-kernel, netdev, netfilter-devel

On 05/07/2015 04:41 AM, Denys Vlasenko wrote:
> These functions compile to ~60 bytes of machine code each.
>
> With this .config: http://busybox.net/~vda/kernel_config
> there are 617 calls to netif_tx_stop_queue()
> and 49 calls to netif_tx_stop_all_queues() in vmlinux.
>
> Code size is reduced by 27 kbytes:
>
>      text     data      bss       dec     hex filename
> 82426986 22255416 20627456 125309858 77813a2 vmlinux.before
> 82399481 22255416 20627456 125282353 777a831 vmlinux
>
> It may seem strange that a seemingly simple code like one in
> netif_tx_stop_queue() compiles to ~60 bytes of code.
> Well, it's true. Here's its disassembly:
>
>      netif_tx_stop_queue:
>         e8 b0 15 4d 00          callq  <__fentry__>

This bit was added because you converted this to a function.

>         48 85 ff                test   %rdi,%rdi
>         75 25                   jne    <netif_tx_stop_queue+0x2f>

This bit is your WARN_ON test

>         55                      push   %rbp
>         be 7a 18 00 00          mov    $0x187a,%esi
>         48 c7 c7 50 59 d8 85    mov    $.rodata+0x1d85950,%rdi
>         48 89 e5                mov    %rsp,%rbp
>         e8 54 5a 7d fd          callq  <warn_slowpath_null>
>         48 c7 c7 5f 59 d8 85    mov    $.rodata+0x1d8595f,%rdi
>         31 c0                   xor    %eax,%eax
>         e8 b0 47 48 00          callq  <printk>
>         eb 09                   jmp    <netif_tx_stop_queue+0x38>

This is the WARN_ON action.  One thing you might try doing is moving 
this to a function of its own instead of moving the entire thing out of 
being an inline.  You may find you still get most of the space savings 
as I wonder if the string for the printk isn't being duplicated for each 
caller.

>         f0 80 8f e0 01 00 00 01 lock orb $0x1,0x1e0(%rdi)

This is your set bit operation.  If you were to drop the whole WARN_ON 
then this is the only thing you would be inlining.  That is only 8 bytes 
in size which would probably be comparable to the callq and register 
sorting needed for a function call.

>         c3                      retq
>         5d                      pop    %rbp
>         c3                      retq

The rest of this is just more function overhead, one return for your 
standard path, and  a pop and a return for the WARN_ON path.

>
> This causes gcc to auto-deinline it before this patch, but with 203 separate
> copies in each module which uses this function:
>
> $ nm --size-sort vmlinux.before | grep -e ' netif_tx_stop_queue$' | wc -l
> 203
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: David S. Miller <davem@davemloft.net>
> CC: Jiri Pirko <jpirko@redhat.com>
> CC: linux-kernel@vger.kernel.org
> CC: netdev@vger.kernel.org
> CC: netfilter-devel@vger.kernel.org
> ---

Have you done any performance testing on this change?  I suspect there 
will likely be a noticeable impact some some tests.

>   include/linux/netdevice.h | 19 ++-----------------
>   net/core/dev.c            | 21 +++++++++++++++++++++
>   2 files changed, 23 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index dcf6ec2..f650d16 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2546,14 +2546,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
>   	}
>   }
>   
> -static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
> -{
> -	if (WARN_ON(!dev_queue)) {
> -		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
> -		return;
> -	}
> -	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
> -}
> +void netif_tx_stop_queue(struct netdev_queue *dev_queue);

It looks to me like most of the overhead for this function is the 
WARN_ON.  Without that function would just be the "lock orb".

The question I would have is why do we need the WARN_ON?  Why not let 
any drivers that call netif_stop_queue before the netdev is registered 
take the NULL pointer dereference?  The would likely learn real quick 
not to do that and a NULL pointer deference is fairly easy to debug.  
You could probably even just replace the WARN_ON with a comment that if 
you get a NULL pointer dereference here you probably called it before 
register_netdev.

>   
>   /**
>    *	netif_stop_queue - stop transmitted packets
> @@ -2567,15 +2560,7 @@ static inline void netif_stop_queue(struct net_device *dev)
>   	netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
>   }
>   
> -static inline void netif_tx_stop_all_queues(struct net_device *dev)
> -{
> -	unsigned int i;
> -
> -	for (i = 0; i < dev->num_tx_queues; i++) {
> -		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
> -		netif_tx_stop_queue(txq);
> -	}
> -}
> +void netif_tx_stop_all_queues(struct net_device *dev);
>   
>   static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
>   {

This is usually slow path for most device drivers so it should fine to 
uninline.

> diff --git a/net/core/dev.c b/net/core/dev.c
> index 962ee9d..569031f 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6261,6 +6261,27 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
>   	return 0;
>   }
>   
> +void netif_tx_stop_queue(struct netdev_queue *dev_queue)
> +{
> +	if (WARN_ON(!dev_queue)) {
> +		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
> +		return;
> +	}
> +	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
> +}
> +EXPORT_SYMBOL(netif_tx_stop_queue);
> +

One thing I noticed on reviewing the assembly above was that you should 
probably wrap the !dev_queue check in an unlikely.  It would save you 
some unnecessary jumps instructions.

> +void netif_tx_stop_all_queues(struct net_device *dev)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < dev->num_tx_queues; i++) {
> +		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
> +		netif_tx_stop_queue(txq);
> +	}
> +}
> +EXPORT_SYMBOL(netif_tx_stop_all_queues);
> +
>   /**
>    *	register_netdevice	- register a network device
>    *	@dev: device to register


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues()
  2015-05-07 17:14 ` Alexander Duyck
@ 2015-05-07 18:44   ` Joe Perches
  2015-05-08  9:45   ` Denys Vlasenko
  1 sibling, 0 replies; 7+ messages in thread
From: Joe Perches @ 2015-05-07 18:44 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Denys Vlasenko, David S. Miller, Jiri Pirko, linux-kernel,
	netdev, netfilter-devel

On Thu, 2015-05-07 at 10:14 -0700, Alexander Duyck wrote:
> On 05/07/2015 04:41 AM, Denys Vlasenko wrote:
> > These functions compile to ~60 bytes of machine code each.
> >
> > With this .config: http://busybox.net/~vda/kernel_config
> > there are 617 calls to netif_tx_stop_queue()
> > and 49 calls to netif_tx_stop_all_queues() in vmlinux.
> >
> > Code size is reduced by 27 kbytes:

[]

> This is the WARN_ON action.  One thing you might try doing is moving 
> this to a function of its own instead of moving the entire thing out of 
> being an inline.  You may find you still get most of the space savings 
> as I wonder if the string for the printk isn't being duplicated for each 
> caller.

It is effectively duplicated (with different prefixes) if there is a
#define pr_fmt(fmt) "some_prefix: " fmt
before this code is reached.  That's most callers now.

The code that doesn't have a pr_fmt should get symbol deduplicated
at link time.




^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues()
  2015-05-07 17:14 ` Alexander Duyck
  2015-05-07 18:44   ` Joe Perches
@ 2015-05-08  9:45   ` Denys Vlasenko
  2015-05-08 15:50     ` Alexander Duyck
  1 sibling, 1 reply; 7+ messages in thread
From: Denys Vlasenko @ 2015-05-08  9:45 UTC (permalink / raw)
  To: Alexander Duyck, David S. Miller
  Cc: Jiri Pirko, linux-kernel, netdev, netfilter-devel

On 05/07/2015 07:14 PM, Alexander Duyck wrote:
> On 05/07/2015 04:41 AM, Denys Vlasenko wrote:
>> These functions compile to ~60 bytes of machine code each.
>>
>> With this .config: http://busybox.net/~vda/kernel_config
>> there are 617 calls to netif_tx_stop_queue()
>> and 49 calls to netif_tx_stop_all_queues() in vmlinux.
>>
>> Code size is reduced by 27 kbytes:
>>
>>      text     data      bss       dec     hex filename
>> 82426986 22255416 20627456 125309858 77813a2 vmlinux.before
>> 82399481 22255416 20627456 125282353 777a831 vmlinux
>>
>> It may seem strange that a seemingly simple code like one in
>> netif_tx_stop_queue() compiles to ~60 bytes of code.
>> Well, it's true. Here's its disassembly:
>>
>>      netif_tx_stop_queue:
...
>>         55                      push   %rbp
>>         be 7a 18 00 00          mov    $0x187a,%esi
>>         48 c7 c7 50 59 d8 85    mov    $.rodata+0x1d85950,%rdi
>>         48 89 e5                mov    %rsp,%rbp
>>         e8 54 5a 7d fd          callq  <warn_slowpath_null>
>>         48 c7 c7 5f 59 d8 85    mov    $.rodata+0x1d8595f,%rdi
>>         31 c0                   xor    %eax,%eax
>>         e8 b0 47 48 00          callq  <printk>
>>         eb 09                   jmp    <netif_tx_stop_queue+0x38>
> 
> This is the WARN_ON action.  One thing you might try doing is moving
> this to a function of its own instead of moving the entire thing
> out of being an inline.

If WARN_ON check would be moved into a function, the call overhead
would still be there, while each callsite will be larder than with
this patch.

> You may find you still get most
> of the space savings as I wonder if the string for the printk
> isn't being duplicated for each caller.

Yes, strings are duplicated:

$ strings vmlinux0  | grep 'cannot be called before register_netdev'
6netif_stop_queue() cannot be called before register_netdev()
6tun: netif_stop_queue() cannot be called before register_netdev()
6cc770: netif_stop_queue() cannot be called before register_netdev()
63c589_cs: netif_stop_queue() cannot be called before register_netdev()
63c574_cs: netif_stop_queue() cannot be called before register_netdev()
6typhoon netif_stop_queue() cannot be called before register_netdev()
6axnet_cs: netif_stop_queue() cannot be called before register_netdev()
6pcnet_cs: netif_stop_queue() cannot be called before register_netdev()
...

However, they amount only to ~5.7k out of 27k:

$ strings vmlinux0  | grep 'cannot be called before register_netdev' | wc -c
5731


>>         f0 80 8f e0 01 00 00 01 lock orb $0x1,0x1e0(%rdi)
> 
> This is your set bit operation.  If you were to drop the whole WARN_ON
> then this is the only thing you would be inlining.

It's up to networking people to decide. I would happily send a patch which drops
WARN_ON if they say that's ok with them. Davem?


> That is only 8 bytes in size which would probably be comparable to the callq
> and register sorting needed for a function call.

"lock or" in my tests takes 21 cycles even on exclusively cached
L1 data cache line. Added "call+ret" is 4-5 cycles.

> Have you done any performance testing on this change?

No.

> I suspect there will likely be a noticeable impact some some tests.

(1) It's *transmit off* operation. Usually it means that we have to turn
transmit off because hw TX queue is full. So the bottleneck is likely
the network, not the CPU.

(2) It was auto-deinlined by gcc anyway. We already were unknownigly
using the uninlined version for some time. Apparently, it wasn't noticed.


-- 
vda


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues()
  2015-05-08  9:45   ` Denys Vlasenko
@ 2015-05-08 15:50     ` Alexander Duyck
  2015-05-08 17:30       ` Alexei Starovoitov
  0 siblings, 1 reply; 7+ messages in thread
From: Alexander Duyck @ 2015-05-08 15:50 UTC (permalink / raw)
  To: Denys Vlasenko, David S. Miller
  Cc: Jiri Pirko, linux-kernel, netdev, netfilter-devel

On 05/08/2015 02:45 AM, Denys Vlasenko wrote:
> On 05/07/2015 07:14 PM, Alexander Duyck wrote:
>> On 05/07/2015 04:41 AM, Denys Vlasenko wrote:
>>> These functions compile to ~60 bytes of machine code each.
>>>
>>> With this .config: http://busybox.net/~vda/kernel_config
>>> there are 617 calls to netif_tx_stop_queue()
>>> and 49 calls to netif_tx_stop_all_queues() in vmlinux.
>>>
>>> Code size is reduced by 27 kbytes:
>>>
>>>       text     data      bss       dec     hex filename
>>> 82426986 22255416 20627456 125309858 77813a2 vmlinux.before
>>> 82399481 22255416 20627456 125282353 777a831 vmlinux
>>>
>>> It may seem strange that a seemingly simple code like one in
>>> netif_tx_stop_queue() compiles to ~60 bytes of code.
>>> Well, it's true. Here's its disassembly:
>>>
>>>       netif_tx_stop_queue:
> ...
>>>          55                      push   %rbp
>>>          be 7a 18 00 00          mov    $0x187a,%esi
>>>          48 c7 c7 50 59 d8 85    mov    $.rodata+0x1d85950,%rdi
>>>          48 89 e5                mov    %rsp,%rbp
>>>          e8 54 5a 7d fd          callq  <warn_slowpath_null>
>>>          48 c7 c7 5f 59 d8 85    mov    $.rodata+0x1d8595f,%rdi
>>>          31 c0                   xor    %eax,%eax
>>>          e8 b0 47 48 00          callq  <printk>
>>>          eb 09                   jmp    <netif_tx_stop_queue+0x38>
>> This is the WARN_ON action.  One thing you might try doing is moving
>> this to a function of its own instead of moving the entire thing
>> out of being an inline.
> If WARN_ON check would be moved into a function, the call overhead
> would still be there, while each callsite will be larder than with
> this patch.
>
>> You may find you still get most
>> of the space savings as I wonder if the string for the printk
>> isn't being duplicated for each caller.
> Yes, strings are duplicated:
>
> $ strings vmlinux0  | grep 'cannot be called before register_netdev'
> 6netif_stop_queue() cannot be called before register_netdev()
> 6tun: netif_stop_queue() cannot be called before register_netdev()
> 6cc770: netif_stop_queue() cannot be called before register_netdev()
> 63c589_cs: netif_stop_queue() cannot be called before register_netdev()
> 63c574_cs: netif_stop_queue() cannot be called before register_netdev()
> 6typhoon netif_stop_queue() cannot be called before register_netdev()
> 6axnet_cs: netif_stop_queue() cannot be called before register_netdev()
> 6pcnet_cs: netif_stop_queue() cannot be called before register_netdev()
> ...
>
> However, they amount only to ~5.7k out of 27k:
>
> $ strings vmlinux0  | grep 'cannot be called before register_netdev' | wc -c
> 5731
>

Yeah, they are probably coalesced per .c file since the compiler cannot 
span files.  The average driver probably calls it 2 or more times which 
is why it is only about 1/5 instead of 1/2 of the total bytecount.  Also 
the count above excludes carriage returns and NULL characters.

>>>          f0 80 8f e0 01 00 00 01 lock orb $0x1,0x1e0(%rdi)
>> This is your set bit operation.  If you were to drop the whole WARN_ON
>> then this is the only thing you would be inlining.
> It's up to networking people to decide. I would happily send a patch which drops
> WARN_ON if they say that's ok with them. Davem?

This was added under commit 18543a643fae6 ("net: Detect and ignore 
netif_stop_queue() calls before register_netdev()").  I think the time 
for allowing drivers to ignore the WARN_ON has passed and at this point 
they should be strongly encouraged to fix the issue via a NULL pointer 
dereference if they still haven't gotten the issue resolved so we can 
track them down and fix them.  I'd say add a comment here in case 
someone triggers this and does some debugging, but the WARN_ON at this 
point has proven it is too expensive.

>> That is only 8 bytes in size which would probably be comparable to the callq
>> and register sorting needed for a function call.
> "lock or" in my tests takes 21 cycles even on exclusively cached
> L1 data cache line. Added "call+ret" is 4-5 cycles.

It is an expensive instruction, but pushing it out into a separate 
function just adds that much more overhead.

>> Have you done any performance testing on this change?
> No.

The most likely thing to exercise this would probably be something like 
a standard pktgen test.  It should be able to put enough stress on a 
single queue for the function to be called frequently.

>> I suspect there will likely be a noticeable impact some some tests.
> (1) It's *transmit off* operation. Usually it means that we have to turn
> transmit off because hw TX queue is full. So the bottleneck is likely
> the network, not the CPU.

That is true.  However there are still scenarios such as pktgen where we 
would be triggering this often and I would prefer to keep it as fast as 
possible since it is still kind of hard to maintain line rate 10Gb/s for 
some of my traffic generator setups.

>
> (2) It was auto-deinlined by gcc anyway. We already were unknownigly
> using the uninlined version for some time. Apparently, it wasn't noticed.

Depends on what cases where uninlined.  I believe the compiler is making 
the decision per .c file so each driver is handling it differently.  For 
example it looks like the ixgbe driver was still inlining this in my -O2 
build, so that is one case that was not.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues()
  2015-05-08 15:50     ` Alexander Duyck
@ 2015-05-08 17:30       ` Alexei Starovoitov
  0 siblings, 0 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2015-05-08 17:30 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Denys Vlasenko, David S. Miller, Jiri Pirko, linux-kernel,
	netdev, netfilter-devel

On Fri, May 08, 2015 at 08:50:37AM -0700, Alexander Duyck wrote:
> 
> >>>         f0 80 8f e0 01 00 00 01 lock orb $0x1,0x1e0(%rdi)
> >>This is your set bit operation.  If you were to drop the whole WARN_ON
> >>then this is the only thing you would be inlining.
> >It's up to networking people to decide. I would happily send a patch which drops
> >WARN_ON if they say that's ok with them. Davem?
> 
> This was added under commit 18543a643fae6 ("net: Detect and ignore
> netif_stop_queue() calls before register_netdev()").  I think the time for
> allowing drivers to ignore the WARN_ON has passed and at this point they
> should be strongly encouraged to fix the issue via a NULL pointer
> dereference if they still haven't gotten the issue resolved so we can track
> them down and fix them.  I'd say add a comment here in case someone triggers
> this and does some debugging, but the WARN_ON at this point has proven it is
> too expensive.

+1
5 years for this WARN_ON was enough. Time to remove it.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues()
  2015-05-07 11:41 [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues() Denys Vlasenko
  2015-05-07 17:14 ` Alexander Duyck
@ 2015-05-10  2:27 ` David Miller
  1 sibling, 0 replies; 7+ messages in thread
From: David Miller @ 2015-05-10  2:27 UTC (permalink / raw)
  To: dvlasenk; +Cc: jpirko, linux-kernel, netdev, netfilter-devel

From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Thu,  7 May 2015 13:41:10 +0200

> These functions compile to ~60 bytes of machine code each.

As others have suggested, just kill the WARN_ON().

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-05-10  2:27 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-07 11:41 [PATCH] net: deinline netif_tx_stop_queue() and netif_tx_stop_all_queues() Denys Vlasenko
2015-05-07 17:14 ` Alexander Duyck
2015-05-07 18:44   ` Joe Perches
2015-05-08  9:45   ` Denys Vlasenko
2015-05-08 15:50     ` Alexander Duyck
2015-05-08 17:30       ` Alexei Starovoitov
2015-05-10  2:27 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).