All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets
@ 2017-03-13 16:16 John Fastabend
  2017-03-13 17:18 ` Alexander Duyck
  0 siblings, 1 reply; 6+ messages in thread
From: John Fastabend @ 2017-03-13 16:16 UTC (permalink / raw)
  To: intel-wired-lan

Current XDP implementation hits the tail on every XDP_TX return
code. This patch changes driver behavior to only hit the tail after
packet processing is complete.

RFC for now as I test this, it looks promising on my dev box but
want to do some more tests before official submission.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index bef4e24..2c244b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2282,6 +2282,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 	unsigned int mss = 0;
 #endif /* IXGBE_FCOE */
 	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+	bool xdp_xmit = false;
 
 	while (likely(total_rx_packets < budget)) {
 		union ixgbe_adv_rx_desc *rx_desc;
@@ -2321,10 +2322,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		}
 
 		if (IS_ERR(skb)) {
-			if (PTR_ERR(skb) == -IXGBE_XDP_TX)
+			if (PTR_ERR(skb) == -IXGBE_XDP_TX) {
+				xdp_xmit = true;
 				ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
-			else
+			} else {
 				rx_buffer->pagecnt_bias++;
+			}
 			total_rx_packets++;
 			total_rx_bytes += size;
 		} else if (skb) {
@@ -2392,6 +2395,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		total_rx_packets++;
 	}
 
+	if (xdp_xmit) {
+		struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
+
+		writel(ring->next_to_use, ring->tail);
+	}
+
 	u64_stats_update_begin(&rx_ring->syncp);
 	rx_ring->stats.packets += total_rx_packets;
 	rx_ring->stats.bytes += total_rx_bytes;
@@ -8251,7 +8260,6 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 	tx_buffer->next_to_watch = tx_desc;
 	ring->next_to_use = i;
 
-	writel(i, ring->tail);
 	return IXGBE_XDP_TX;
 }
 


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets
  2017-03-13 16:16 [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets John Fastabend
@ 2017-03-13 17:18 ` Alexander Duyck
  2017-03-14 23:50   ` John Fastabend
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Duyck @ 2017-03-13 17:18 UTC (permalink / raw)
  To: intel-wired-lan

On Mon, Mar 13, 2017 at 9:16 AM, John Fastabend
<john.fastabend@gmail.com> wrote:
> Current XDP implementation hits the tail on every XDP_TX return
> code. This patch changes driver behavior to only hit the tail after
> packet processing is complete.
>
> RFC for now as I test this, it looks promising on my dev box but
> want to do some more tests before official submission.
>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   14 +++++++++++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index bef4e24..2c244b6 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -2282,6 +2282,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>         unsigned int mss = 0;
>  #endif /* IXGBE_FCOE */
>         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
> +       bool xdp_xmit = false;
>
>         while (likely(total_rx_packets < budget)) {
>                 union ixgbe_adv_rx_desc *rx_desc;
> @@ -2321,10 +2322,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>                 }
>
>                 if (IS_ERR(skb)) {
> -                       if (PTR_ERR(skb) == -IXGBE_XDP_TX)
> +                       if (PTR_ERR(skb) == -IXGBE_XDP_TX) {
> +                               xdp_xmit = true;
>                                 ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
> -                       else
> +                       } else {
>                                 rx_buffer->pagecnt_bias++;
> +                       }
>                         total_rx_packets++;
>                         total_rx_bytes += size;
>                 } else if (skb) {
> @@ -2392,6 +2395,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>                 total_rx_packets++;
>         }
>
> +       if (xdp_xmit) {
> +               struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
> +
> +               writel(ring->next_to_use, ring->tail);
> +       }
> +

We will need a wmb here.

>         u64_stats_update_begin(&rx_ring->syncp);
>         rx_ring->stats.packets += total_rx_packets;
>         rx_ring->stats.bytes += total_rx_bytes;
> @@ -8251,7 +8260,6 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
>         tx_buffer->next_to_watch = tx_desc;
>         ring->next_to_use = i;
>
> -       writel(i, ring->tail);

So you might want to change the barrier setup for all this to use
smp_wmb instead.  We need the wmb to be paired with the writel.  That
should give you a slight performance boost since smp_wmb breaks down
to just a barrier on x86 systems.

>         return IXGBE_XDP_TX;
>  }
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets
  2017-03-13 17:18 ` Alexander Duyck
@ 2017-03-14 23:50   ` John Fastabend
  2017-03-15  1:14     ` Alexander Duyck
  0 siblings, 1 reply; 6+ messages in thread
From: John Fastabend @ 2017-03-14 23:50 UTC (permalink / raw)
  To: intel-wired-lan

On 17-03-13 10:18 AM, Alexander Duyck wrote:
> On Mon, Mar 13, 2017 at 9:16 AM, John Fastabend
> <john.fastabend@gmail.com> wrote:
>> Current XDP implementation hits the tail on every XDP_TX return
>> code. This patch changes driver behavior to only hit the tail after
>> packet processing is complete.
>>
>> RFC for now as I test this, it looks promising on my dev box but
>> want to do some more tests before official submission.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> ---
>>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   14 +++++++++++---
>>  1 file changed, 11 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> index bef4e24..2c244b6 100644
>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> @@ -2282,6 +2282,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>         unsigned int mss = 0;
>>  #endif /* IXGBE_FCOE */
>>         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
>> +       bool xdp_xmit = false;
>>
>>         while (likely(total_rx_packets < budget)) {
>>                 union ixgbe_adv_rx_desc *rx_desc;
>> @@ -2321,10 +2322,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>                 }
>>
>>                 if (IS_ERR(skb)) {
>> -                       if (PTR_ERR(skb) == -IXGBE_XDP_TX)
>> +                       if (PTR_ERR(skb) == -IXGBE_XDP_TX) {
>> +                               xdp_xmit = true;
>>                                 ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
>> -                       else
>> +                       } else {
>>                                 rx_buffer->pagecnt_bias++;
>> +                       }
>>                         total_rx_packets++;
>>                         total_rx_bytes += size;
>>                 } else if (skb) {
>> @@ -2392,6 +2395,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>                 total_rx_packets++;
>>         }
>>
>> +       if (xdp_xmit) {
>> +               struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
>> +
>> +               writel(ring->next_to_use, ring->tail);
>> +       }
>> +
> 
> We will need a wmb here.
> 
>>         u64_stats_update_begin(&rx_ring->syncp);
>>         rx_ring->stats.packets += total_rx_packets;
>>         rx_ring->stats.bytes += total_rx_bytes;
>> @@ -8251,7 +8260,6 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
>>         tx_buffer->next_to_watch = tx_desc;
>>         ring->next_to_use = i;
>>
>> -       writel(i, ring->tail);
> 
> So you might want to change the barrier setup for all this to use
> smp_wmb instead.  We need the wmb to be paired with the writel.  That
> should give you a slight performance boost since smp_wmb breaks down
> to just a barrier on x86 systems.
> 

Not sure I grok this description entirely, but I think you are just saying
replace,

	if (xdp_xmit) {
		...
		writel(...)
	}

with

	if (xdp_xmit) {
		...
		smp_rmb()
		writel()
	}

Correct? Did you have some other change in mind ... "for all this"?

Thanks,
John

>>         return IXGBE_XDP_TX;
>>  }
>>
>>


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets
  2017-03-14 23:50   ` John Fastabend
@ 2017-03-15  1:14     ` Alexander Duyck
  2017-03-27 19:28       ` Fastabend, John R
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Duyck @ 2017-03-15  1:14 UTC (permalink / raw)
  To: intel-wired-lan

On Tue, Mar 14, 2017 at 4:50 PM, John Fastabend
<john.fastabend@gmail.com> wrote:
> On 17-03-13 10:18 AM, Alexander Duyck wrote:
>> On Mon, Mar 13, 2017 at 9:16 AM, John Fastabend
>> <john.fastabend@gmail.com> wrote:
>>> Current XDP implementation hits the tail on every XDP_TX return
>>> code. This patch changes driver behavior to only hit the tail after
>>> packet processing is complete.
>>>
>>> RFC for now as I test this, it looks promising on my dev box but
>>> want to do some more tests before official submission.
>>>
>>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>>> ---
>>>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   14 +++++++++++---
>>>  1 file changed, 11 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>> index bef4e24..2c244b6 100644
>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>> @@ -2282,6 +2282,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>>         unsigned int mss = 0;
>>>  #endif /* IXGBE_FCOE */
>>>         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
>>> +       bool xdp_xmit = false;
>>>
>>>         while (likely(total_rx_packets < budget)) {
>>>                 union ixgbe_adv_rx_desc *rx_desc;
>>> @@ -2321,10 +2322,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>>                 }
>>>
>>>                 if (IS_ERR(skb)) {
>>> -                       if (PTR_ERR(skb) == -IXGBE_XDP_TX)
>>> +                       if (PTR_ERR(skb) == -IXGBE_XDP_TX) {
>>> +                               xdp_xmit = true;
>>>                                 ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
>>> -                       else
>>> +                       } else {
>>>                                 rx_buffer->pagecnt_bias++;
>>> +                       }
>>>                         total_rx_packets++;
>>>                         total_rx_bytes += size;
>>>                 } else if (skb) {
>>> @@ -2392,6 +2395,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>>                 total_rx_packets++;
>>>         }
>>>
>>> +       if (xdp_xmit) {
>>> +               struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
>>> +
>>> +               writel(ring->next_to_use, ring->tail);
>>> +       }
>>> +
>>
>> We will need a wmb here.
>>
>>>         u64_stats_update_begin(&rx_ring->syncp);
>>>         rx_ring->stats.packets += total_rx_packets;
>>>         rx_ring->stats.bytes += total_rx_bytes;
>>> @@ -8251,7 +8260,6 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
>>>         tx_buffer->next_to_watch = tx_desc;
>>>         ring->next_to_use = i;
>>>
>>> -       writel(i, ring->tail);
>>
>> So you might want to change the barrier setup for all this to use
>> smp_wmb instead.  We need the wmb to be paired with the writel.  That
>> should give you a slight performance boost since smp_wmb breaks down
>> to just a barrier on x86 systems.
>>
>
> Not sure I grok this description entirely, but I think you are just saying
> replace,
>
>         if (xdp_xmit) {
>                 ...
>                 writel(...)
>         }
>
> with
>
>         if (xdp_xmit) {
>                 ...
>                 smp_rmb()
>                 writel()
>         }
>
> Correct? Did you have some other change in mind ... "for all this"?
>
> Thanks,
> John


No.  Basically what it should be is find and replace wmb with
smp_wmb() in ixgbe_xmit_xdp_ring.  That way you won't have to worry
about a race between Tx and clean-up.

Then the if statement should be:
if (xdp_xmit) {
    ...
    /* wmb required to flush writes to coherent memory before writing
to non-coherent memory*/
    wmb();
    /* writel is a write to non-coherent memory mapped I/O */
    writel();
}

>>>         return IXGBE_XDP_TX;
>>>  }
>>>
>>>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets
  2017-03-15  1:14     ` Alexander Duyck
@ 2017-03-27 19:28       ` Fastabend, John R
  2017-03-27 22:42         ` Duyck, Alexander H
  0 siblings, 1 reply; 6+ messages in thread
From: Fastabend, John R @ 2017-03-27 19:28 UTC (permalink / raw)
  To: intel-wired-lan

FWIW with a revised version of this patch I see 14.6Mpps on RX drop tests and 13.5Mpps on TX test case.

Alex, maybe we can sync up and squeeze the last mpps out of the design.

Thanks,
John
________________________________________
From: Intel-wired-lan [intel-wired-lan-bounces at lists.osuosl.org] on behalf of Alexander Duyck [alexander.duyck at gmail.com]
Sent: Tuesday, March 14, 2017 6:14 PM
To: John Fastabend
Cc: intel-wired-lan; Alexei Starovoitov; Daniel Borkmann; William Tu
Subject: Re: [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every     'n' packets

On Tue, Mar 14, 2017 at 4:50 PM, John Fastabend
<john.fastabend@gmail.com> wrote:
> On 17-03-13 10:18 AM, Alexander Duyck wrote:
>> On Mon, Mar 13, 2017 at 9:16 AM, John Fastabend
>> <john.fastabend@gmail.com> wrote:
>>> Current XDP implementation hits the tail on every XDP_TX return
>>> code. This patch changes driver behavior to only hit the tail after
>>> packet processing is complete.
>>>
>>> RFC for now as I test this, it looks promising on my dev box but
>>> want to do some more tests before official submission.
>>>
>>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>>> ---
>>>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   14 +++++++++++---
>>>  1 file changed, 11 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>> index bef4e24..2c244b6 100644
>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>> @@ -2282,6 +2282,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>>         unsigned int mss = 0;
>>>  #endif /* IXGBE_FCOE */
>>>         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
>>> +       bool xdp_xmit = false;
>>>
>>>         while (likely(total_rx_packets < budget)) {
>>>                 union ixgbe_adv_rx_desc *rx_desc;
>>> @@ -2321,10 +2322,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>>                 }
>>>
>>>                 if (IS_ERR(skb)) {
>>> -                       if (PTR_ERR(skb) == -IXGBE_XDP_TX)
>>> +                       if (PTR_ERR(skb) == -IXGBE_XDP_TX) {
>>> +                               xdp_xmit = true;
>>>                                 ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
>>> -                       else
>>> +                       } else {
>>>                                 rx_buffer->pagecnt_bias++;
>>> +                       }
>>>                         total_rx_packets++;
>>>                         total_rx_bytes += size;
>>>                 } else if (skb) {
>>> @@ -2392,6 +2395,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>>                 total_rx_packets++;
>>>         }
>>>
>>> +       if (xdp_xmit) {
>>> +               struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
>>> +
>>> +               writel(ring->next_to_use, ring->tail);
>>> +       }
>>> +
>>
>> We will need a wmb here.
>>
>>>         u64_stats_update_begin(&rx_ring->syncp);
>>>         rx_ring->stats.packets += total_rx_packets;
>>>         rx_ring->stats.bytes += total_rx_bytes;
>>> @@ -8251,7 +8260,6 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
>>>         tx_buffer->next_to_watch = tx_desc;
>>>         ring->next_to_use = i;
>>>
>>> -       writel(i, ring->tail);
>>
>> So you might want to change the barrier setup for all this to use
>> smp_wmb instead.  We need the wmb to be paired with the writel.  That
>> should give you a slight performance boost since smp_wmb breaks down
>> to just a barrier on x86 systems.
>>
>
> Not sure I grok this description entirely, but I think you are just saying
> replace,
>
>         if (xdp_xmit) {
>                 ...
>                 writel(...)
>         }
>
> with
>
>         if (xdp_xmit) {
>                 ...
>                 smp_rmb()
>                 writel()
>         }
>
> Correct? Did you have some other change in mind ... "for all this"?
>
> Thanks,
> John


No.  Basically what it should be is find and replace wmb with
smp_wmb() in ixgbe_xmit_xdp_ring.  That way you won't have to worry
about a race between Tx and clean-up.

Then the if statement should be:
if (xdp_xmit) {
    ...
    /* wmb required to flush writes to coherent memory before writing
to non-coherent memory*/
    wmb();
    /* writel is a write to non-coherent memory mapped I/O */
    writel();
}

>>>         return IXGBE_XDP_TX;
>>>  }
>>>
>>>
>
_______________________________________________
Intel-wired-lan mailing list
Intel-wired-lan at lists.osuosl.org
http://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets
  2017-03-27 19:28       ` Fastabend, John R
@ 2017-03-27 22:42         ` Duyck, Alexander H
  0 siblings, 0 replies; 6+ messages in thread
From: Duyck, Alexander H @ 2017-03-27 22:42 UTC (permalink / raw)
  To: intel-wired-lan

Sounds good. ?We can probably discuss it tomorrow at our 1pm meeting.

Thanks.

- Alex

On Mon, 2017-03-27 at 19:28 +0000, Fastabend, John R wrote:
> FWIW with a revised version of this patch I see 14.6Mpps on RX drop tests and 13.5Mpps on TX test case.
> 
> Alex, maybe we can sync up and squeeze the last mpps out of the design.
> 
> Thanks,
> John
> ________________________________________
> From: Intel-wired-lan [intel-wired-lan-bounces at lists.osuosl.org] on behalf of Alexander Duyck [alexander.duyck at gmail.com]
> Sent: Tuesday, March 14, 2017 6:14 PM
> To: John Fastabend
> Cc: intel-wired-lan; Alexei Starovoitov; Daniel Borkmann; William Tu
> Subject: Re: [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every     'n' packets
> 
> On Tue, Mar 14, 2017 at 4:50 PM, John Fastabend
> <john.fastabend@gmail.com> wrote:
> > 
> > On 17-03-13 10:18 AM, Alexander Duyck wrote:
> > > 
> > > On Mon, Mar 13, 2017 at 9:16 AM, John Fastabend
> > > <john.fastabend@gmail.com> wrote:
> > > > 
> > > > Current XDP implementation hits the tail on every XDP_TX return
> > > > code. This patch changes driver behavior to only hit the tail after
> > > > packet processing is complete.
> > > > 
> > > > RFC for now as I test this, it looks promising on my dev box but
> > > > want to do some more tests before official submission.
> > > > 
> > > > Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> > > > ---
> > > >  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   14 +++++++++++---
> > > >  1 file changed, 11 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > > > index bef4e24..2c244b6 100644
> > > > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > > > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> > > > @@ -2282,6 +2282,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
> > > >         unsigned int mss = 0;
> > > >  #endif /* IXGBE_FCOE */
> > > >         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
> > > > +       bool xdp_xmit = false;
> > > > 
> > > >         while (likely(total_rx_packets < budget)) {
> > > >                 union ixgbe_adv_rx_desc *rx_desc;
> > > > @@ -2321,10 +2322,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
> > > >                 }
> > > > 
> > > >                 if (IS_ERR(skb)) {
> > > > -                       if (PTR_ERR(skb) == -IXGBE_XDP_TX)
> > > > +                       if (PTR_ERR(skb) == -IXGBE_XDP_TX) {
> > > > +                               xdp_xmit = true;
> > > >                                 ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
> > > > -                       else
> > > > +                       } else {
> > > >                                 rx_buffer->pagecnt_bias++;
> > > > +                       }
> > > >                         total_rx_packets++;
> > > >                         total_rx_bytes += size;
> > > >                 } else if (skb) {
> > > > @@ -2392,6 +2395,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
> > > >                 total_rx_packets++;
> > > >         }
> > > > 
> > > > +       if (xdp_xmit) {
> > > > +               struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
> > > > +
> > > > +               writel(ring->next_to_use, ring->tail);
> > > > +       }
> > > > +
> > > 
> > > We will need a wmb here.
> > > 
> > > > 
> > > >         u64_stats_update_begin(&rx_ring->syncp);
> > > >         rx_ring->stats.packets += total_rx_packets;
> > > >         rx_ring->stats.bytes += total_rx_bytes;
> > > > @@ -8251,7 +8260,6 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
> > > >         tx_buffer->next_to_watch = tx_desc;
> > > >         ring->next_to_use = i;
> > > > 
> > > > -       writel(i, ring->tail);
> > > 
> > > So you might want to change the barrier setup for all this to use
> > > smp_wmb instead.  We need the wmb to be paired with the writel.  That
> > > should give you a slight performance boost since smp_wmb breaks down
> > > to just a barrier on x86 systems.
> > > 
> > 
> > Not sure I grok this description entirely, but I think you are just saying
> > replace,
> > 
> >         if (xdp_xmit) {
> >                 ...
> >                 writel(...)
> >         }
> > 
> > with
> > 
> >         if (xdp_xmit) {
> >                 ...
> >                 smp_rmb()
> >                 writel()
> >         }
> > 
> > Correct? Did you have some other change in mind ... "for all this"?
> > 
> > Thanks,
> > John
> 
> 
> No.  Basically what it should be is find and replace wmb with
> smp_wmb() in ixgbe_xmit_xdp_ring.  That way you won't have to worry
> about a race between Tx and clean-up.
> 
> Then the if statement should be:
> if (xdp_xmit) {
>     ...
>     /* wmb required to flush writes to coherent memory before writing
> to non-coherent memory*/
>     wmb();
>     /* writel is a write to non-coherent memory mapped I/O */
>     writel();
> }
> 
> > 
> > > 
> > > > 
> > > >         return IXGBE_XDP_TX;
> > > >  }
> > > > 
> > > > 
> > 
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan at lists.osuosl.org
> http://lists.osuosl.org/mailman/listinfo/intel-wired-lan
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan at lists.osuosl.org
> http://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-03-27 22:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-13 16:16 [Intel-wired-lan] [RFC PATCH] ixgbe: delay tail write to every 'n' packets John Fastabend
2017-03-13 17:18 ` Alexander Duyck
2017-03-14 23:50   ` John Fastabend
2017-03-15  1:14     ` Alexander Duyck
2017-03-27 19:28       ` Fastabend, John R
2017-03-27 22:42         ` Duyck, Alexander H

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.