All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-wired-lan] i40e and TSO with MPLS ?
@ 2022-02-15 21:02 Joe Damato
  0 siblings, 0 replies; 9+ messages in thread
From: Joe Damato @ 2022-02-15 21:02 UTC (permalink / raw)
  To: intel-wired-lan

Greetings:

Does i40e (XL710) support TSO with MPLS?

We are using firmware version: 7.10 0x80006469 1.2527.0

We've attempted to add support for TSO+MPLS to i40e, but were unable to
get it working. The patch is included below for reference, but it is almost
certainly incorrect - and I am not clear if the hardware itself would
support this even if the patch was correct.

Applying the patch below and using tcpdump shows that:

	- packet data, as seen by the pcap filter in the kernel, is large.
	  This suggests that the kernel is attempting to offload
	  segmentation to the device,

	but

	- those large packets are not ACK'd by the client

This suggests that either:

	- the device does not support TSO + MPLS, and/or
	- the patch below is incorrect

Does anyone working on i40e have any insight on this?

Thanks,
Joe

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index a21833c..b7455cc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12920,8 +12920,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	np->vsi = vsi;
 
 	hw_enc_features = NETIF_F_SG			|
-			  NETIF_F_IP_CSUM		|
-			  NETIF_F_IPV6_CSUM		|
+			  NETIF_F_HW_CSUM		|
 			  NETIF_F_HIGHDMA		|
 			  NETIF_F_SOFT_FEATURES		|
 			  NETIF_F_TSO			|
@@ -12952,6 +12951,23 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	/* record features VLANs can make use of */
 	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
 
+#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+				   NETIF_F_GSO_GRE_CSUM | \
+				   NETIF_F_GSO_IPXIP4 | \
+				   NETIF_F_GSO_IPXIP6 | \
+				   NETIF_F_GSO_UDP_TUNNEL | \
+				   NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES;
+	netdev->features |= NETIF_F_GSO_PARTIAL |
+			    I40E_GSO_PARTIAL_FEATURES;
+
+	netdev->mpls_features    |= NETIF_F_SG;
+	netdev->mpls_features    |= NETIF_F_HW_CSUM;
+	netdev->mpls_features    |= NETIF_F_TSO;
+	netdev->mpls_features    |= NETIF_F_TSO6;
+	netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES;
+
 	/* enable macvlan offloads */
 	netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 5ad2812..9e641a9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -9,6 +9,7 @@
 #include "i40e_prototype.h"
 #include "i40e_txrx_common.h"
 #include "i40e_xsk.h"
+#include <net/mpls.h>
 
 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
 /**
@@ -2908,6 +2909,7 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
 {
 	struct sk_buff *skb = first->skb;
 	u64 cd_cmd, cd_tso_len, cd_mss;
+	__be16 protocol;
 	union {
 		struct iphdr *v4;
 		struct ipv6hdr *v6;
@@ -2932,15 +2934,24 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
 	if (err < 0)
 		return err;
 
-	ip.hdr = skb_network_header(skb);
-	l4.hdr = skb_transport_header(skb);
+	protocol = vlan_get_protocol(skb);
+
+	if (eth_p_mpls(protocol))
+		ip.hdr = skb_inner_network_header(skb);
+	else
+		ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
 
 	/* initialize outer IP header fields */
 	if (ip.v4->version == 4) {
 		ip.v4->tot_len = 0;
 		ip.v4->check = 0;
+
+		first->tx_flags |= I40E_TX_FLAGS_TSO |
+				   I40E_TX_FLAGS_IPV4;
 	} else {
 		ip.v6->payload_len = 0;
+		first->tx_flags |= I40E_TX_FLAGS_TSO;
 	}
 
 	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
@@ -2962,10 +2973,6 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
 					     (__force __wsum)htonl(paylen));
 		}
 
-		/* reset pointers to inner headers */
-		ip.hdr = skb_inner_network_header(skb);
-		l4.hdr = skb_inner_transport_header(skb);
-
 		/* initialize inner IP header fields */
 		if (ip.v4->version == 4) {
 			ip.v4->tot_len = 0;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
  2022-03-02 20:24           ` Joe Damato
@ 2022-03-02 21:10             ` Kubalewski, Arkadiusz
  0 siblings, 0 replies; 9+ messages in thread
From: Kubalewski, Arkadiusz @ 2022-03-02 21:10 UTC (permalink / raw)
  To: intel-wired-lan

> On Tue, Mar 1, 2022 at 5:58 AM Kubalewski, Arkadiusz
> <arkadiusz.kubalewski@intel.com> wrote:
> >
> > > On Thu, Feb 24, 2022 at 6:28 AM Kubalewski, Arkadiusz
> > > <arkadiusz.kubalewski@intel.com> wrote:
> > > >
> > > > > On Wed, Feb 23, 2022 at 9:56 AM Kubalewski, Arkadiusz
> > > > > <arkadiusz.kubalewski@intel.com> wrote:
> > > > > >
> > > > > > +Joe
> > > > > >
> > > > > > > Greetings:
> > > > > > >
> > > > > > > Does i40e (XL710) support TSO with MPLS?
> > > > > > >
> > > > > > > We are using firmware version: 7.10 0x80006469 1.2527.0
> > > > > > >
> > > > > > > We've attempted to add support for TSO+MPLS to i40e, but were unable to
> > > > > > > get it working. The patch is included below for reference, but it is almost
> > > > > > > certainly incorrect - and I am not clear if the hardware itself would
> > > > > > > support this even if the patch was correct.
> > > > > > >
> > > > > > > Applying the patch below and using tcpdump shows that:
> > > > > > >
> > > > > > >         - packet data, as seen by the pcap filter in the kernel, is large.
> > > > > > >           This suggests that the kernel is attempting to offload
> > > > > > >           segmentation to the device,
> > > > > > >
> > > > > > >         but
> > > > > > >
> > > > > > >         - those large packets are not ACK'd by the client
> > > > > > >
> > > > > > > This suggests that either:
> > > > > > >
> > > > > > >         - the device does not support TSO + MPLS, and/or
> > > > > > >         - the patch below is incorrect
> > > > > > >
> > > > > > > Does anyone working on i40e have any insight on this?
> > > > > >
> > > > > > Hi Joe,
> > > > > >
> > > > > > I have done some research for your case, good news is that we believe that 710
> > > > > > hardware supports it. Currently we do not have plans to implement such feature
> > > > > > ourselves, but we will do our best in reviewing if you decide to implement it.
> > > > >
> > > > > OK, thanks. I appreciate the information and your willingness to
> > > > > review. I am pleased to hear that the hardware supports it.
> > > > >
> > > > > > Such offloads should be supported on packets with up to 2 MPLS tags before the
> > > > > > IP header. For start, you might take a look for the features pre check function:
> > > > > > static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
> > > > > > It probably requires an update after the changes you have posted.
> > > > >
> > > > > I took a look at i40e_features_check, as you suggested, but I am
> > > > > probably missing something.
> > > > >
> > > > > My understanding is that the call graph on the xmit path is roughly:
> > > > >
> > > > > __dev_queue_xmit which calls (in order):
> > > > > 1. validate_xmit_skb -- this eventually calls i40e_features_check and
> > > > > harmonize_features which will use the mpls_features bitfield set in
> > > > > the patch to turn on the TSO bit
> > > > >
> > > > Just saying, worth to check if the required flags are already set when
> > > > i40e_features_check was called.
> > > >
> > > > > 2. dev_hard_start_xmit -- this delivers packets to taps, then to the driver
> > > > >
> > > > > dev_hard_start_xmit internally hands packets to any taps installed
> > > > > (for example pcap), before handing them to the driver
> > > > > (i40e_lan_xmit_frame).
> > > > >
> > > > > In our tests of the patch below, we see that tcpdump reports large
> > > > > packet sizes. Since we see them in tcpdump, I think this suggests that
> > > > > everything leading up to pcap delivery (including i40e_features_check)
> > > > > is correct... otherwise we'd see smaller packet sizes being delivered
> > > > > to pcap.
> > > > >
> > > > > This leads me to believe the issue is somewhere in i40e_lan_xmit_frame
> > > > > or below -- most likely in i40e_tso, which my patch attempts to tweak.
> > > > >
> > > > > Let me know if I'm off track and misunderstanding your analysis, but
> > > > > based on the above, I suspect the changes to i40e_tso are buggy.
> > > > >
> > > >
> > > > Seems like your understanding is correct.
> > > > Are those packets actually sent to the wire?
> > > > Any stats are incremented?
> > > > Anything at all shows up on the client?
> > >
> > > The bytes are never ACK'd by the client and are eventually re-transmit.
> > >
> > > Based on the tcpdump output I think the packet data makes it to the
> > > driver unsegmented (which is what we want), but due to some issue in
> > > i40e_tso (or a hardware limitation) the NIC fails to TSO the bytes and
> > > they are eventually re-transmit.
> > >
> >
> > I think good place to start would be the 710 datasheet:
> > https://cdrdv2.intel.com/v1/dl/getContent/332464?explicitVersion=true
> > i.e. 8.4.4.3.2 Transmit Segmentation Flow
> > Please verify if your change is following the expected flow,
> 
> I think I found the issue.
> 
> The original patch only modified i40e_tso, but I needed two more tweaks:
> 
> - a similar change is needed in i40e_tx_enable_csum.
> - tx_flags need to be tweaked slightly so that the l4 protocol
> detection works in i40e_tx_enable_csum.
> 
> Tests in my test environment show large MPLS packet TX which are now
> ACK'd by the client :)
> 
> I will submit this patch to the mailing list now and CC you on it for
> a formal review.
> 
> I noticed ice probably needs the same change (assuming it supports
> mpls+tso), but I don't have any ice hardware to test on. I am happy to
> port this change to the ice driver and submit that as well, if you
> like, but I'll need to ask the ice folks to test on my behalf.
> 
> Let me know if you'd like me to submit to ice, as well.
> 
> Thanks,
> Joe
> 

Hi Joe,

Great to hear that! :)

If you think that it is required to enable MPLS+TSO on ice, you can also submit
the patch there. When the patch is submitted to intel-wired-lan they are
passed to validation team,  and once they are reviewed and validated, our
maintainer will propose them to be merged into Dave's next-tree.

Thanks,
Arek

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
  2022-03-01 13:57         ` Kubalewski, Arkadiusz
@ 2022-03-02 20:24           ` Joe Damato
  2022-03-02 21:10             ` Kubalewski, Arkadiusz
  0 siblings, 1 reply; 9+ messages in thread
From: Joe Damato @ 2022-03-02 20:24 UTC (permalink / raw)
  To: intel-wired-lan

On Tue, Mar 1, 2022 at 5:58 AM Kubalewski, Arkadiusz
<arkadiusz.kubalewski@intel.com> wrote:
>
> > On Thu, Feb 24, 2022 at 6:28 AM Kubalewski, Arkadiusz
> > <arkadiusz.kubalewski@intel.com> wrote:
> > >
> > > > On Wed, Feb 23, 2022 at 9:56 AM Kubalewski, Arkadiusz
> > > > <arkadiusz.kubalewski@intel.com> wrote:
> > > > >
> > > > > +Joe
> > > > >
> > > > > > Greetings:
> > > > > >
> > > > > > Does i40e (XL710) support TSO with MPLS?
> > > > > >
> > > > > > We are using firmware version: 7.10 0x80006469 1.2527.0
> > > > > >
> > > > > > We've attempted to add support for TSO+MPLS to i40e, but were unable to
> > > > > > get it working. The patch is included below for reference, but it is almost
> > > > > > certainly incorrect - and I am not clear if the hardware itself would
> > > > > > support this even if the patch was correct.
> > > > > >
> > > > > > Applying the patch below and using tcpdump shows that:
> > > > > >
> > > > > >         - packet data, as seen by the pcap filter in the kernel, is large.
> > > > > >           This suggests that the kernel is attempting to offload
> > > > > >           segmentation to the device,
> > > > > >
> > > > > >         but
> > > > > >
> > > > > >         - those large packets are not ACK'd by the client
> > > > > >
> > > > > > This suggests that either:
> > > > > >
> > > > > >         - the device does not support TSO + MPLS, and/or
> > > > > >         - the patch below is incorrect
> > > > > >
> > > > > > Does anyone working on i40e have any insight on this?
> > > > >
> > > > > Hi Joe,
> > > > >
> > > > > I have done some research for your case, good news is that we believe that 710
> > > > > hardware supports it. Currently we do not have plans to implement such feature
> > > > > ourselves, but we will do our best in reviewing if you decide to implement it.
> > > >
> > > > OK, thanks. I appreciate the information and your willingness to
> > > > review. I am pleased to hear that the hardware supports it.
> > > >
> > > > > Such offloads should be supported on packets with up to 2 MPLS tags before the
> > > > > IP header. For start, you might take a look for the features pre check function:
> > > > > static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
> > > > > It probably requires an update after the changes you have posted.
> > > >
> > > > I took a look at i40e_features_check, as you suggested, but I am
> > > > probably missing something.
> > > >
> > > > My understanding is that the call graph on the xmit path is roughly:
> > > >
> > > > __dev_queue_xmit which calls (in order):
> > > > 1. validate_xmit_skb -- this eventually calls i40e_features_check and
> > > > harmonize_features which will use the mpls_features bitfield set in
> > > > the patch to turn on the TSO bit
> > > >
> > > Just saying, worth to check if the required flags are already set when
> > > i40e_features_check was called.
> > >
> > > > 2. dev_hard_start_xmit -- this delivers packets to taps, then to the driver
> > > >
> > > > dev_hard_start_xmit internally hands packets to any taps installed
> > > > (for example pcap), before handing them to the driver
> > > > (i40e_lan_xmit_frame).
> > > >
> > > > In our tests of the patch below, we see that tcpdump reports large
> > > > packet sizes. Since we see them in tcpdump, I think this suggests that
> > > > everything leading up to pcap delivery (including i40e_features_check)
> > > > is correct... otherwise we'd see smaller packet sizes being delivered
> > > > to pcap.
> > > >
> > > > This leads me to believe the issue is somewhere in i40e_lan_xmit_frame
> > > > or below -- most likely in i40e_tso, which my patch attempts to tweak.
> > > >
> > > > Let me know if I'm off track and misunderstanding your analysis, but
> > > > based on the above, I suspect the changes to i40e_tso are buggy.
> > > >
> > >
> > > Seems like your understanding is correct.
> > > Are those packets actually sent to the wire?
> > > Any stats are incremented?
> > > Anything at all shows up on the client?
> >
> > The bytes are never ACK'd by the client and are eventually re-transmit.
> >
> > Based on the tcpdump output I think the packet data makes it to the
> > driver unsegmented (which is what we want), but due to some issue in
> > i40e_tso (or a hardware limitation) the NIC fails to TSO the bytes and
> > they are eventually re-transmit.
> >
>
> I think good place to start would be the 710 datasheet:
> https://cdrdv2.intel.com/v1/dl/getContent/332464?explicitVersion=true
> i.e. 8.4.4.3.2 Transmit Segmentation Flow
> Please verify if your change is following the expected flow,

I think I found the issue.

The original patch only modified i40e_tso, but I needed two more tweaks:

- a similar change is needed in i40e_tx_enable_csum.
- tx_flags need to be tweaked slightly so that the l4 protocol
detection works in i40e_tx_enable_csum.

Tests in my test environment show large MPLS packet TX which are now
ACK'd by the client :)

I will submit this patch to the mailing list now and CC you on it for
a formal review.

I noticed ice probably needs the same change (assuming it supports
mpls+tso), but I don't have any ice hardware to test on. I am happy to
port this change to the ice driver and submit that as well, if you
like, but I'll need to ask the ice folks to test on my behalf.

Let me know if you'd like me to submit to ice, as well.

Thanks,
Joe

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
  2022-02-24 16:27       ` Joe Damato
@ 2022-03-01 13:57         ` Kubalewski, Arkadiusz
  2022-03-02 20:24           ` Joe Damato
  0 siblings, 1 reply; 9+ messages in thread
From: Kubalewski, Arkadiusz @ 2022-03-01 13:57 UTC (permalink / raw)
  To: intel-wired-lan

> On Thu, Feb 24, 2022 at 6:28 AM Kubalewski, Arkadiusz
> <arkadiusz.kubalewski@intel.com> wrote:
> >
> > > On Wed, Feb 23, 2022 at 9:56 AM Kubalewski, Arkadiusz
> > > <arkadiusz.kubalewski@intel.com> wrote:
> > > >
> > > > +Joe
> > > >
> > > > > Greetings:
> > > > >
> > > > > Does i40e (XL710) support TSO with MPLS?
> > > > >
> > > > > We are using firmware version: 7.10 0x80006469 1.2527.0
> > > > >
> > > > > We've attempted to add support for TSO+MPLS to i40e, but were unable to
> > > > > get it working. The patch is included below for reference, but it is almost
> > > > > certainly incorrect - and I am not clear if the hardware itself would
> > > > > support this even if the patch was correct.
> > > > >
> > > > > Applying the patch below and using tcpdump shows that:
> > > > >
> > > > >         - packet data, as seen by the pcap filter in the kernel, is large.
> > > > >           This suggests that the kernel is attempting to offload
> > > > >           segmentation to the device,
> > > > >
> > > > >         but
> > > > >
> > > > >         - those large packets are not ACK'd by the client
> > > > >
> > > > > This suggests that either:
> > > > >
> > > > >         - the device does not support TSO + MPLS, and/or
> > > > >         - the patch below is incorrect
> > > > >
> > > > > Does anyone working on i40e have any insight on this?
> > > >
> > > > Hi Joe,
> > > >
> > > > I have done some research for your case, good news is that we believe that 710
> > > > hardware supports it. Currently we do not have plans to implement such feature
> > > > ourselves, but we will do our best in reviewing if you decide to implement it.
> > >
> > > OK, thanks. I appreciate the information and your willingness to
> > > review. I am pleased to hear that the hardware supports it.
> > >
> > > > Such offloads should be supported on packets with up to 2 MPLS tags before the
> > > > IP header. For start, you might take a look for the features pre check function:
> > > > static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
> > > > It probably requires an update after the changes you have posted.
> > >
> > > I took a look at i40e_features_check, as you suggested, but I am
> > > probably missing something.
> > >
> > > My understanding is that the call graph on the xmit path is roughly:
> > >
> > > __dev_queue_xmit which calls (in order):
> > > 1. validate_xmit_skb -- this eventually calls i40e_features_check and
> > > harmonize_features which will use the mpls_features bitfield set in
> > > the patch to turn on the TSO bit
> > >
> > Just saying, worth to check if the required flags are already set when
> > i40e_features_check was called.
> >
> > > 2. dev_hard_start_xmit -- this delivers packets to taps, then to the driver
> > >
> > > dev_hard_start_xmit internally hands packets to any taps installed
> > > (for example pcap), before handing them to the driver
> > > (i40e_lan_xmit_frame).
> > >
> > > In our tests of the patch below, we see that tcpdump reports large
> > > packet sizes. Since we see them in tcpdump, I think this suggests that
> > > everything leading up to pcap delivery (including i40e_features_check)
> > > is correct... otherwise we'd see smaller packet sizes being delivered
> > > to pcap.
> > >
> > > This leads me to believe the issue is somewhere in i40e_lan_xmit_frame
> > > or below -- most likely in i40e_tso, which my patch attempts to tweak.
> > >
> > > Let me know if I'm off track and misunderstanding your analysis, but
> > > based on the above, I suspect the changes to i40e_tso are buggy.
> > >
> >
> > Seems like your understanding is correct.
> > Are those packets actually sent to the wire?
> > Any stats are incremented?
> > Anything at all shows up on the client?
> 
> The bytes are never ACK'd by the client and are eventually re-transmit.
> 
> Based on the tcpdump output I think the packet data makes it to the
> driver unsegmented (which is what we want), but due to some issue in
> i40e_tso (or a hardware limitation) the NIC fails to TSO the bytes and
> they are eventually re-transmit.
> 

I think good place to start would be the 710 datasheet:
https://cdrdv2.intel.com/v1/dl/getContent/332464?explicitVersion=true
i.e. 8.4.4.3.2 Transmit Segmentation Flow
Please verify if your change is following the expected flow,

Thank you!

> The retransmit shows smaller packets being handed to the pcap tap,
> which are then acked by the client.
> 
> Thanks,
> Joe
> 



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
  2022-02-24 14:28     ` Kubalewski, Arkadiusz
@ 2022-02-24 16:27       ` Joe Damato
  2022-03-01 13:57         ` Kubalewski, Arkadiusz
  0 siblings, 1 reply; 9+ messages in thread
From: Joe Damato @ 2022-02-24 16:27 UTC (permalink / raw)
  To: intel-wired-lan

On Thu, Feb 24, 2022 at 6:28 AM Kubalewski, Arkadiusz
<arkadiusz.kubalewski@intel.com> wrote:
>
> > On Wed, Feb 23, 2022 at 9:56 AM Kubalewski, Arkadiusz
> > <arkadiusz.kubalewski@intel.com> wrote:
> > >
> > > +Joe
> > >
> > > > Greetings:
> > > >
> > > > Does i40e (XL710) support TSO with MPLS?
> > > >
> > > > We are using firmware version: 7.10 0x80006469 1.2527.0
> > > >
> > > > We've attempted to add support for TSO+MPLS to i40e, but were unable to
> > > > get it working. The patch is included below for reference, but it is almost
> > > > certainly incorrect - and I am not clear if the hardware itself would
> > > > support this even if the patch was correct.
> > > >
> > > > Applying the patch below and using tcpdump shows that:
> > > >
> > > >         - packet data, as seen by the pcap filter in the kernel, is large.
> > > >           This suggests that the kernel is attempting to offload
> > > >           segmentation to the device,
> > > >
> > > >         but
> > > >
> > > >         - those large packets are not ACK'd by the client
> > > >
> > > > This suggests that either:
> > > >
> > > >         - the device does not support TSO + MPLS, and/or
> > > >         - the patch below is incorrect
> > > >
> > > > Does anyone working on i40e have any insight on this?
> > >
> > > Hi Joe,
> > >
> > > I have done some research for your case, good news is that we believe that 710
> > > hardware supports it. Currently we do not have plans to implement such feature
> > > ourselves, but we will do our best in reviewing if you decide to implement it.
> >
> > OK, thanks. I appreciate the information and your willingness to
> > review. I am pleased to hear that the hardware supports it.
> >
> > > Such offloads should be supported on packets with up to 2 MPLS tags before the
> > > IP header. For start, you might take a look for the features pre check function:
> > > static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
> > > It probably requires an update after the changes you have posted.
> >
> > I took a look at i40e_features_check, as you suggested, but I am
> > probably missing something.
> >
> > My understanding is that the call graph on the xmit path is roughly:
> >
> > __dev_queue_xmit which calls (in order):
> > 1. validate_xmit_skb -- this eventually calls i40e_features_check and
> > harmonize_features which will use the mpls_features bitfield set in
> > the patch to turn on the TSO bit
> >
> Just saying, worth to check if the required flags are already set when
> i40e_features_check was called.
>
> > 2. dev_hard_start_xmit -- this delivers packets to taps, then to the driver
> >
> > dev_hard_start_xmit internally hands packets to any taps installed
> > (for example pcap), before handing them to the driver
> > (i40e_lan_xmit_frame).
> >
> > In our tests of the patch below, we see that tcpdump reports large
> > packet sizes. Since we see them in tcpdump, I think this suggests that
> > everything leading up to pcap delivery (including i40e_features_check)
> > is correct... otherwise we'd see smaller packet sizes being delivered
> > to pcap.
> >
> > This leads me to believe the issue is somewhere in i40e_lan_xmit_frame
> > or below -- most likely in i40e_tso, which my patch attempts to tweak.
> >
> > Let me know if I'm off track and misunderstanding your analysis, but
> > based on the above, I suspect the changes to i40e_tso are buggy.
> >
>
> Seems like your understanding is correct.
> Are those packets actually sent to the wire?
> Any stats are incremented?
> Anything at all shows up on the client?

The bytes are never ACK'd by the client and are eventually re-transmit.

Based on the tcpdump output I think the packet data makes it to the
driver unsegmented (which is what we want), but due to some issue in
i40e_tso (or a hardware limitation) the NIC fails to TSO the bytes and
they are eventually re-transmit.

The retransmit shows smaller packets being handed to the pcap tap,
which are then acked by the client.

Thanks,
Joe

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
  2022-02-24  2:16   ` Joe Damato
@ 2022-02-24 14:28     ` Kubalewski, Arkadiusz
  2022-02-24 16:27       ` Joe Damato
  0 siblings, 1 reply; 9+ messages in thread
From: Kubalewski, Arkadiusz @ 2022-02-24 14:28 UTC (permalink / raw)
  To: intel-wired-lan

> On Wed, Feb 23, 2022 at 9:56 AM Kubalewski, Arkadiusz
> <arkadiusz.kubalewski@intel.com> wrote:
> >
> > +Joe
> >
> > > Greetings:
> > >
> > > Does i40e (XL710) support TSO with MPLS?
> > >
> > > We are using firmware version: 7.10 0x80006469 1.2527.0
> > >
> > > We've attempted to add support for TSO+MPLS to i40e, but were unable to
> > > get it working. The patch is included below for reference, but it is almost
> > > certainly incorrect - and I am not clear if the hardware itself would
> > > support this even if the patch was correct.
> > >
> > > Applying the patch below and using tcpdump shows that:
> > >
> > >         - packet data, as seen by the pcap filter in the kernel, is large.
> > >           This suggests that the kernel is attempting to offload
> > >           segmentation to the device,
> > >
> > >         but
> > >
> > >         - those large packets are not ACK'd by the client
> > >
> > > This suggests that either:
> > >
> > >         - the device does not support TSO + MPLS, and/or
> > >         - the patch below is incorrect
> > >
> > > Does anyone working on i40e have any insight on this?
> >
> > Hi Joe,
> >
> > I have done some research for your case, good news is that we believe that 710
> > hardware supports it. Currently we do not have plans to implement such feature
> > ourselves, but we will do our best in reviewing if you decide to implement it.
> 
> OK, thanks. I appreciate the information and your willingness to
> review. I am pleased to hear that the hardware supports it.
> 
> > Such offloads should be supported on packets with up to 2 MPLS tags before the
> > IP header. For start, you might take a look for the features pre check function:
> > static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
> > It probably requires an update after the changes you have posted.
> 
> I took a look at i40e_features_check, as you suggested, but I am
> probably missing something.
> 
> My understanding is that the call graph on the xmit path is roughly:
> 
> __dev_queue_xmit which calls (in order):
> 1. validate_xmit_skb -- this eventually calls i40e_features_check and
> harmonize_features which will use the mpls_features bitfield set in
> the patch to turn on the TSO bit
> 
Just saying, worth to check if the required flags are already set when
i40e_features_check was called.

> 2. dev_hard_start_xmit -- this delivers packets to taps, then to the driver
> 
> dev_hard_start_xmit internally hands packets to any taps installed
> (for example pcap), before handing them to the driver
> (i40e_lan_xmit_frame).
> 
> In our tests of the patch below, we see that tcpdump reports large
> packet sizes. Since we see them in tcpdump, I think this suggests that
> everything leading up to pcap delivery (including i40e_features_check)
> is correct... otherwise we'd see smaller packet sizes being delivered
> to pcap.
> 
> This leads me to believe the issue is somewhere in i40e_lan_xmit_frame
> or below -- most likely in i40e_tso, which my patch attempts to tweak.
> 
> Let me know if I'm off track and misunderstanding your analysis, but
> based on the above, I suspect the changes to i40e_tso are buggy.
> 

Seems like your understanding is correct.
Are those packets actually sent to the wire?
Any stats are incremented?
Anything at all shows up on the client?

> Thanks,
> Joe
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
  2022-02-23 17:55 ` Kubalewski, Arkadiusz
@ 2022-02-24  2:16   ` Joe Damato
  2022-02-24 14:28     ` Kubalewski, Arkadiusz
  0 siblings, 1 reply; 9+ messages in thread
From: Joe Damato @ 2022-02-24  2:16 UTC (permalink / raw)
  To: intel-wired-lan

On Wed, Feb 23, 2022 at 9:56 AM Kubalewski, Arkadiusz
<arkadiusz.kubalewski@intel.com> wrote:
>
> +Joe
>
> > Greetings:
> >
> > Does i40e (XL710) support TSO with MPLS?
> >
> > We are using firmware version: 7.10 0x80006469 1.2527.0
> >
> > We've attempted to add support for TSO+MPLS to i40e, but were unable to
> > get it working. The patch is included below for reference, but it is almost
> > certainly incorrect - and I am not clear if the hardware itself would
> > support this even if the patch was correct.
> >
> > Applying the patch below and using tcpdump shows that:
> >
> >         - packet data, as seen by the pcap filter in the kernel, is large.
> >           This suggests that the kernel is attempting to offload
> >           segmentation to the device,
> >
> >         but
> >
> >         - those large packets are not ACK'd by the client
> >
> > This suggests that either:
> >
> >         - the device does not support TSO + MPLS, and/or
> >         - the patch below is incorrect
> >
> > Does anyone working on i40e have any insight on this?
>
> Hi Joe,
>
> I have done some research for your case, good news is that we believe that 710
> hardware supports it. Currently we do not have plans to implement such feature
> ourselves, but we will do our best in reviewing if you decide to implement it.

OK, thanks. I appreciate the information and your willingness to
review. I am pleased to hear that the hardware supports it.

> Such offloads should be supported on packets with up to 2 MPLS tags before the
> IP header. For start, you might take a look for the features pre check function:
> static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
> It probably requires an update after the changes you have posted.

I took a look at i40e_features_check, as you suggested, but I am
probably missing something.

My understanding is that the call graph on the xmit path is roughly:

__dev_queue_xmit which calls (in order):
1. validate_xmit_skb -- this eventually calls i40e_features_check and
harmonize_features which will use the mpls_features bitfield set in
the patch to turn on the TSO bit

2. dev_hard_start_xmit -- this delivers packets to taps, then to the driver

dev_hard_start_xmit internally hands packets to any taps installed
(for example pcap), before handing them to the driver
(i40e_lan_xmit_frame).

In our tests of the patch below, we see that tcpdump reports large
packet sizes. Since we see them in tcpdump, I think this suggests that
everything leading up to pcap delivery (including i40e_features_check)
is correct... otherwise we'd see smaller packet sizes being delivered
to pcap.

This leads me to believe the issue is somewhere in i40e_lan_xmit_frame
or below -- most likely in i40e_tso, which my patch attempts to tweak.

Let me know if I'm off track and misunderstanding your analysis, but
based on the above, I suspect the changes to i40e_tso are buggy.

Thanks,
Joe

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
  2022-02-23 17:19 Kubalewski, Arkadiusz
@ 2022-02-23 17:55 ` Kubalewski, Arkadiusz
  2022-02-24  2:16   ` Joe Damato
  0 siblings, 1 reply; 9+ messages in thread
From: Kubalewski, Arkadiusz @ 2022-02-23 17:55 UTC (permalink / raw)
  To: intel-wired-lan

+Joe

> Greetings:
>
> Does i40e (XL710) support TSO with MPLS?
>
> We are using firmware version: 7.10 0x80006469 1.2527.0
>
> We've attempted to add support for TSO+MPLS to i40e, but were unable to
> get it working. The patch is included below for reference, but it is almost
> certainly incorrect - and I am not clear if the hardware itself would
> support this even if the patch was correct.
>
> Applying the patch below and using tcpdump shows that:
>
>         - packet data, as seen by the pcap filter in the kernel, is large.
>           This suggests that the kernel is attempting to offload
>           segmentation to the device,
>
>         but
>
>         - those large packets are not ACK'd by the client
>
> This suggests that either:
>
>         - the device does not support TSO + MPLS, and/or
>         - the patch below is incorrect
>
> Does anyone working on i40e have any insight on this?

Hi Joe,

I have done some research for your case, good news is that we believe that 710
hardware supports it. Currently we do not have plans to implement such feature
ourselves, but we will do our best in reviewing if you decide to implement it.

Such offloads should be supported on packets with up to 2 MPLS tags before the
IP header. For start, you might take a look for the features pre check function:
static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
It probably requires an update after the changes you have posted.

Hope that helps!
Arek

>
> Thanks,
> Joe
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index a21833c..b7455cc 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -12920,8 +12920,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
>         np->vsi = vsi;
>
>         hw_enc_features = NETIF_F_SG                    |
> -                         NETIF_F_IP_CSUM               |
> -                         NETIF_F_IPV6_CSUM             |
> +                         NETIF_F_HW_CSUM               |
>                           NETIF_F_HIGHDMA               |
>                           NETIF_F_SOFT_FEATURES         |
>                           NETIF_F_TSO                   |
> @@ -12952,6 +12951,23 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
>         /* record features VLANs can make use of */
>         netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
>
> +#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
> +                                  NETIF_F_GSO_GRE_CSUM | \
> +                                  NETIF_F_GSO_IPXIP4 | \
> +                                  NETIF_F_GSO_IPXIP6 | \
> +                                  NETIF_F_GSO_UDP_TUNNEL | \
> +                                  NETIF_F_GSO_UDP_TUNNEL_CSUM)
> +
> +       netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES;
> +       netdev->features |= NETIF_F_GSO_PARTIAL |
> +                           I40E_GSO_PARTIAL_FEATURES;
> +
> +       netdev->mpls_features    |= NETIF_F_SG;
> +       netdev->mpls_features    |= NETIF_F_HW_CSUM;
> +       netdev->mpls_features    |= NETIF_F_TSO;
> +       netdev->mpls_features    |= NETIF_F_TSO6;
> +       netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES;
> +
>         /* enable macvlan offloads */
>         netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index 5ad2812..9e641a9 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -9,6 +9,7 @@
>  #include "i40e_prototype.h"
>  #include "i40e_txrx_common.h"
>  #include "i40e_xsk.h"
> +#include <net/mpls.h>
>
>  #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
>  /**
> @@ -2908,6 +2909,7 @@ static int i40e_tso(struct i40e_tx_buffer
> *first, u8 *hdr_len,
>  {
>         struct sk_buff *skb = first->skb;
>         u64 cd_cmd, cd_tso_len, cd_mss;
> +       __be16 protocol;
>         union {
>                 struct iphdr *v4;
>                 struct ipv6hdr *v6;
> @@ -2932,15 +2934,24 @@ static int i40e_tso(struct i40e_tx_buffer
> *first, u8 *hdr_len,
>         if (err < 0)
>                 return err;
>
> -       ip.hdr = skb_network_header(skb);
> -       l4.hdr = skb_transport_header(skb);
> +       protocol = vlan_get_protocol(skb);
> +
> +       if (eth_p_mpls(protocol))
> +               ip.hdr = skb_inner_network_header(skb);
> +       else
> +               ip.hdr = skb_network_header(skb);
> +       l4.hdr = skb_checksum_start(skb);
>
>         /* initialize outer IP header fields */
>         if (ip.v4->version == 4) {
>                 ip.v4->tot_len = 0;
>                 ip.v4->check = 0;
> +
> +               first->tx_flags |= I40E_TX_FLAGS_TSO |
> +                                  I40E_TX_FLAGS_IPV4;
>         } else {
>                 ip.v6->payload_len = 0;
> +               first->tx_flags |= I40E_TX_FLAGS_TSO;
>         }
>
>         if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
> @@ -2962,10 +2973,6 @@ static int i40e_tso(struct i40e_tx_buffer
> *first, u8 *hdr_len,
>                                              (__force __wsum)htonl(paylen));
>                 }
>
> -               /* reset pointers to inner headers */
> -               ip.hdr = skb_inner_network_header(skb);
> -               l4.hdr = skb_inner_transport_header(skb);
> -
>                 /* initialize inner IP header fields */
>                 if (ip.v4->version == 4) {
>                         ip.v4->tot_len = 0;
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan at osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
_______________________________________________
Intel-wired-lan mailing list
Intel-wired-lan at osuosl.org
https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] i40e and TSO with MPLS ?
@ 2022-02-23 17:19 Kubalewski, Arkadiusz
  2022-02-23 17:55 ` Kubalewski, Arkadiusz
  0 siblings, 1 reply; 9+ messages in thread
From: Kubalewski, Arkadiusz @ 2022-02-23 17:19 UTC (permalink / raw)
  To: intel-wired-lan

> Greetings:
>
> Does i40e (XL710) support TSO with MPLS?
>
> We are using firmware version: 7.10 0x80006469 1.2527.0
>
> We've attempted to add support for TSO+MPLS to i40e, but were unable to
> get it working. The patch is included below for reference, but it is almost
> certainly incorrect - and I am not clear if the hardware itself would
> support this even if the patch was correct.
>
> Applying the patch below and using tcpdump shows that:
>
>         - packet data, as seen by the pcap filter in the kernel, is large.
>           This suggests that the kernel is attempting to offload
>           segmentation to the device,
>
>         but
>
>         - those large packets are not ACK'd by the client
>
> This suggests that either:
>
>         - the device does not support TSO + MPLS, and/or
>         - the patch below is incorrect
>
> Does anyone working on i40e have any insight on this?

Hi Joe,

I have done some research for your case, good news is that we believe that 710
hardware supports it. Currently we do not have plans to implement such feature
ourselves, but we will do our best in reviewing if you decide to implement it.

Such offloads should be supported on packets with up to 2 MPLS tags before the
IP header. For start, you might take a look for the features pre check function:
static netdev_features_t i40e_features_check(struct sk_buff *skb, ...
It probably requires an update after the changes you have posted.

Hope that helps!
Arek

>
> Thanks,
> Joe
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index a21833c..b7455cc 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -12920,8 +12920,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
>         np->vsi = vsi;
>
>         hw_enc_features = NETIF_F_SG                    |
> -                         NETIF_F_IP_CSUM               |
> -                         NETIF_F_IPV6_CSUM             |
> +                         NETIF_F_HW_CSUM               |
>                           NETIF_F_HIGHDMA               |
>                           NETIF_F_SOFT_FEATURES         |
>                           NETIF_F_TSO                   |
> @@ -12952,6 +12951,23 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
>         /* record features VLANs can make use of */
>         netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
>
> +#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
> +                                  NETIF_F_GSO_GRE_CSUM | \
> +                                  NETIF_F_GSO_IPXIP4 | \
> +                                  NETIF_F_GSO_IPXIP6 | \
> +                                  NETIF_F_GSO_UDP_TUNNEL | \
> +                                  NETIF_F_GSO_UDP_TUNNEL_CSUM)
> +
> +       netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES;
> +       netdev->features |= NETIF_F_GSO_PARTIAL |
> +                           I40E_GSO_PARTIAL_FEATURES;
> +
> +       netdev->mpls_features    |= NETIF_F_SG;
> +       netdev->mpls_features    |= NETIF_F_HW_CSUM;
> +       netdev->mpls_features    |= NETIF_F_TSO;
> +       netdev->mpls_features    |= NETIF_F_TSO6;
> +       netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES;
> +
>         /* enable macvlan offloads */
>         netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index 5ad2812..9e641a9 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -9,6 +9,7 @@
>  #include "i40e_prototype.h"
>  #include "i40e_txrx_common.h"
>  #include "i40e_xsk.h"
> +#include <net/mpls.h>
>
>  #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
>  /**
> @@ -2908,6 +2909,7 @@ static int i40e_tso(struct i40e_tx_buffer
> *first, u8 *hdr_len,
>  {
>         struct sk_buff *skb = first->skb;
>         u64 cd_cmd, cd_tso_len, cd_mss;
> +       __be16 protocol;
>         union {
>                 struct iphdr *v4;
>                 struct ipv6hdr *v6;
> @@ -2932,15 +2934,24 @@ static int i40e_tso(struct i40e_tx_buffer
> *first, u8 *hdr_len,
>         if (err < 0)
>                 return err;
>
> -       ip.hdr = skb_network_header(skb);
> -       l4.hdr = skb_transport_header(skb);
> +       protocol = vlan_get_protocol(skb);
> +
> +       if (eth_p_mpls(protocol))
> +               ip.hdr = skb_inner_network_header(skb);
> +       else
> +               ip.hdr = skb_network_header(skb);
> +       l4.hdr = skb_checksum_start(skb);
>
>         /* initialize outer IP header fields */
>         if (ip.v4->version == 4) {
>                 ip.v4->tot_len = 0;
>                 ip.v4->check = 0;
> +
> +               first->tx_flags |= I40E_TX_FLAGS_TSO |
> +                                  I40E_TX_FLAGS_IPV4;
>         } else {
>                 ip.v6->payload_len = 0;
> +               first->tx_flags |= I40E_TX_FLAGS_TSO;
>         }
>
>         if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
> @@ -2962,10 +2973,6 @@ static int i40e_tso(struct i40e_tx_buffer
> *first, u8 *hdr_len,
>                                              (__force __wsum)htonl(paylen));
>                 }
>
> -               /* reset pointers to inner headers */
> -               ip.hdr = skb_inner_network_header(skb);
> -               l4.hdr = skb_inner_transport_header(skb);
> -
>                 /* initialize inner IP header fields */
>                 if (ip.v4->version == 4) {
>                         ip.v4->tot_len = 0;
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan at osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-03-02 21:10 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-15 21:02 [Intel-wired-lan] i40e and TSO with MPLS ? Joe Damato
2022-02-23 17:19 Kubalewski, Arkadiusz
2022-02-23 17:55 ` Kubalewski, Arkadiusz
2022-02-24  2:16   ` Joe Damato
2022-02-24 14:28     ` Kubalewski, Arkadiusz
2022-02-24 16:27       ` Joe Damato
2022-03-01 13:57         ` Kubalewski, Arkadiusz
2022-03-02 20:24           ` Joe Damato
2022-03-02 21:10             ` Kubalewski, Arkadiusz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.