netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [v2 net PATCH] octeontx2-pf: Disable HW TSO for seg size < 16B
@ 2024-03-13  6:33 Geetha sowjanya
  2024-03-13  7:57 ` Eric Dumazet
  0 siblings, 1 reply; 3+ messages in thread
From: Geetha sowjanya @ 2024-03-13  6:33 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: kuba, davem, pabeni, edumazet, sgoutham, gakula, sbhatta, hkelam

Current NIX hardware do not support TSO for the
segment size less 16 bytes. This patch disable hw
TSO for such packets.

Fixes: 86d7476078b8 ("octeontx2-pf: TCP segmentation offload support").
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
---

v1-v2:
 - As suggested by Eric Dumazet used ndo_features_check().
 - Moved the max fargments support check to ndo_features_check.

 .../marvell/octeontx2/nic/otx2_common.c        | 18 ++++++++++++++++++
 .../marvell/octeontx2/nic/otx2_common.h        |  3 +++
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c   |  1 +
 .../ethernet/marvell/octeontx2/nic/otx2_txrx.c | 11 -----------
 .../ethernet/marvell/octeontx2/nic/otx2_vf.c   |  1 +
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 02d0b707aea5..de61c69370be 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -221,6 +221,24 @@ int otx2_set_mac_address(struct net_device *netdev, void *p)
 }
 EXPORT_SYMBOL(otx2_set_mac_address);
 
+netdev_features_t
+otx2_features_check(struct sk_buff *skb, struct net_device *dev,
+		    netdev_features_t features)
+{
+	/* Due to hw issue segment size less than 16 bytes
+	 * are not supported. Hence do not offload such TSO
+	 * segments.
+	 */
+	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_size < 16)
+		features &= ~NETIF_F_GSO_MASK;
+
+	if (skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE)
+		features &= ~NETIF_F_SG;
+
+	return features;
+}
+EXPORT_SYMBOL(otx2_features_check);
+
 int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
 {
 	struct nix_frs_cfg *req;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 06910307085e..6a4bf43bc77e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -961,6 +961,9 @@ void otx2_get_mac_from_af(struct net_device *netdev);
 void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
 int otx2_config_pause_frm(struct otx2_nic *pfvf);
 void otx2_setup_segmentation(struct otx2_nic *pfvf);
+netdev_features_t otx2_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features);
 
 /* RVU block related APIs */
 int otx2_attach_npa_nix(struct otx2_nic *pfvf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index e5fe67e73865..2364eb8d6732 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -2737,6 +2737,7 @@ static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_xdp_xmit           = otx2_xdp_xmit,
 	.ndo_setup_tc		= otx2_setup_tc,
 	.ndo_set_vf_trust	= otx2_ndo_set_vf_trust,
+	.ndo_features_check	= otx2_features_check,
 };
 
 static int otx2_wq_init(struct otx2_nic *pf)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index f828d32737af..9b89aff42eb0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -1158,17 +1158,6 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
 
 	num_segs = skb_shinfo(skb)->nr_frags + 1;
 
-	/* If SKB doesn't fit in a single SQE, linearize it.
-	 * TODO: Consider adding JUMP descriptor instead.
-	 */
-	if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
-		if (__skb_linearize(skb)) {
-			dev_kfree_skb_any(skb);
-			return true;
-		}
-		num_segs = skb_shinfo(skb)->nr_frags + 1;
-	}
-
 	if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
 		/* Insert vlan tag before giving pkt to tso */
 		if (skb_vlan_tag_present(skb))
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 35e06048356f..04aab04e4ba2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -483,6 +483,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
 	.ndo_tx_timeout = otx2_tx_timeout,
 	.ndo_eth_ioctl	= otx2_ioctl,
 	.ndo_setup_tc = otx2_setup_tc,
+	.ndo_features_check	= otx2_features_check,
 };
 
 static int otx2_wq_init(struct otx2_nic *vf)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [v2 net PATCH] octeontx2-pf: Disable HW TSO for seg size < 16B
  2024-03-13  6:33 [v2 net PATCH] octeontx2-pf: Disable HW TSO for seg size < 16B Geetha sowjanya
@ 2024-03-13  7:57 ` Eric Dumazet
  2024-03-18  0:46   ` [EXTERNAL] " Geethasowjanya Akula
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Dumazet @ 2024-03-13  7:57 UTC (permalink / raw)
  To: Geetha sowjanya
  Cc: netdev, linux-kernel, kuba, davem, pabeni, sgoutham, sbhatta, hkelam

On Wed, Mar 13, 2024 at 7:33 AM Geetha sowjanya <gakula@marvell.com> wrote:
>
> Current NIX hardware do not support TSO for the
> segment size less 16 bytes. This patch disable hw
> TSO for such packets.
>
> Fixes: 86d7476078b8 ("octeontx2-pf: TCP segmentation offload support").
> Signed-off-by: Geetha sowjanya <gakula@marvell.com>
> ---
>
> v1-v2:
>  - As suggested by Eric Dumazet used ndo_features_check().
>  - Moved the max fargments support check to ndo_features_check.
>
>  .../marvell/octeontx2/nic/otx2_common.c        | 18 ++++++++++++++++++
>  .../marvell/octeontx2/nic/otx2_common.h        |  3 +++
>  .../ethernet/marvell/octeontx2/nic/otx2_pf.c   |  1 +
>  .../ethernet/marvell/octeontx2/nic/otx2_txrx.c | 11 -----------
>  .../ethernet/marvell/octeontx2/nic/otx2_vf.c   |  1 +
>  5 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> index 02d0b707aea5..de61c69370be 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> @@ -221,6 +221,24 @@ int otx2_set_mac_address(struct net_device *netdev, void *p)
>  }
>  EXPORT_SYMBOL(otx2_set_mac_address);
>
> +netdev_features_t
> +otx2_features_check(struct sk_buff *skb, struct net_device *dev,
> +                   netdev_features_t features)
> +{
> +       /* Due to hw issue segment size less than 16 bytes
> +        * are not supported. Hence do not offload such TSO
> +        * segments.
> +        */
> +       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_size < 16)
> +               features &= ~NETIF_F_GSO_MASK;
> +
> +       if (skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE)
> +               features &= ~NETIF_F_SG;
> +

This is a bit extreme. I would attempt to remove NETIF_F_GSO_MASK instead.

if (skb_is_gso(skb)) {
     if (skb_shinfo(skb)->gso_size < 16 ||
         skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE))
           features &= ~NETIF_F_GSO_MASK;
}

This would very often end up with 1-MSS packets with fewer fragments.

-> No copy involved, and no high order page allocations.

> +       return features;
> +}
> +EXPORT_SYMBOL(otx2_features_check);
> +
>  int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
>  {
>         struct nix_frs_cfg *req;
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> index 06910307085e..6a4bf43bc77e 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> @@ -961,6 +961,9 @@ void otx2_get_mac_from_af(struct net_device *netdev);
>  void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
>  int otx2_config_pause_frm(struct otx2_nic *pfvf);
>  void otx2_setup_segmentation(struct otx2_nic *pfvf);
> +netdev_features_t otx2_features_check(struct sk_buff *skb,
> +                                     struct net_device *dev,
> +                                     netdev_features_t features);
>
>  /* RVU block related APIs */
>  int otx2_attach_npa_nix(struct otx2_nic *pfvf);
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> index e5fe67e73865..2364eb8d6732 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> @@ -2737,6 +2737,7 @@ static const struct net_device_ops otx2_netdev_ops = {
>         .ndo_xdp_xmit           = otx2_xdp_xmit,
>         .ndo_setup_tc           = otx2_setup_tc,
>         .ndo_set_vf_trust       = otx2_ndo_set_vf_trust,
> +       .ndo_features_check     = otx2_features_check,
>  };
>
>  static int otx2_wq_init(struct otx2_nic *pf)
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> index f828d32737af..9b89aff42eb0 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> @@ -1158,17 +1158,6 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
>
>         num_segs = skb_shinfo(skb)->nr_frags + 1;
>
> -       /* If SKB doesn't fit in a single SQE, linearize it.
> -        * TODO: Consider adding JUMP descriptor instead.
> -        */
> -       if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
> -               if (__skb_linearize(skb)) {
> -                       dev_kfree_skb_any(skb);
> -                       return true;
> -               }
> -               num_segs = skb_shinfo(skb)->nr_frags + 1;
> -       }

Then you need to keep this check for  non GSO packets.

One way to trigger this is to run netperf with tiny fragments.
TCP is unable to cook GSO packets, because we hit MAX_SKB_FRAGS before
even filling a single MSS.

netperf -H $remote -t TCP_SENDFILE -- -m 10



> -
>         if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
>                 /* Insert vlan tag before giving pkt to tso */
>                 if (skb_vlan_tag_present(skb))
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> index 35e06048356f..04aab04e4ba2 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> @@ -483,6 +483,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
>         .ndo_tx_timeout = otx2_tx_timeout,
>         .ndo_eth_ioctl  = otx2_ioctl,
>         .ndo_setup_tc = otx2_setup_tc,
> +       .ndo_features_check     = otx2_features_check,
>  };
>
>  static int otx2_wq_init(struct otx2_nic *vf)
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [EXTERNAL] Re: [v2 net PATCH] octeontx2-pf: Disable HW TSO for seg size < 16B
  2024-03-13  7:57 ` Eric Dumazet
@ 2024-03-18  0:46   ` Geethasowjanya Akula
  0 siblings, 0 replies; 3+ messages in thread
From: Geethasowjanya Akula @ 2024-03-18  0:46 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, linux-kernel, kuba, davem, pabeni, Sunil Kovvuri Goutham,
	Subbaraya Sundeep Bhatta, Hariprasad Kelam



> -----Original Message-----
> From: Eric Dumazet <edumazet@google.com>
> Sent: Wednesday, March 13, 2024 1:28 PM
> To: Geethasowjanya Akula <gakula@marvell.com>
> Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; kuba@kernel.org;
> davem@davemloft.net; pabeni@redhat.com; Sunil Kovvuri Goutham
> <sgoutham@marvell.com>; Subbaraya Sundeep Bhatta
> <sbhatta@marvell.com>; Hariprasad Kelam <hkelam@marvell.com>
> Subject: [EXTERNAL] Re: [v2 net PATCH] octeontx2-pf: Disable HW TSO for seg
> size < 16B
> On Wed, Mar 13, 2024 at 7:33 AM Geetha sowjanya <gakula@marvell.com>
> wrote:
> >
> > Current NIX hardware do not support TSO for the segment size less 16
> > bytes. This patch disable hw TSO for such packets.
> >
> > Fixes: 86d7476078b8 ("octeontx2-pf: TCP segmentation offload support").
> > Signed-off-by: Geetha sowjanya <gakula@marvell.com>
> > ---
> >
> > v1-v2:
> >  - As suggested by Eric Dumazet used ndo_features_check().
> >  - Moved the max fargments support check to ndo_features_check.
> >
> >  .../marvell/octeontx2/nic/otx2_common.c        | 18 ++++++++++++++++++
> >  .../marvell/octeontx2/nic/otx2_common.h        |  3 +++
> >  .../ethernet/marvell/octeontx2/nic/otx2_pf.c   |  1 +
> >  .../ethernet/marvell/octeontx2/nic/otx2_txrx.c | 11 -----------
> >  .../ethernet/marvell/octeontx2/nic/otx2_vf.c   |  1 +
> >  5 files changed, 23 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > index 02d0b707aea5..de61c69370be 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > @@ -221,6 +221,24 @@ int otx2_set_mac_address(struct net_device
> > *netdev, void *p)  }  EXPORT_SYMBOL(otx2_set_mac_address);
> >
> > +netdev_features_t
> > +otx2_features_check(struct sk_buff *skb, struct net_device *dev,
> > +                   netdev_features_t features) {
> > +       /* Due to hw issue segment size less than 16 bytes
> > +        * are not supported. Hence do not offload such TSO
> > +        * segments.
> > +        */
> > +       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_size < 16)
> > +               features &= ~NETIF_F_GSO_MASK;
> > +
> > +       if (skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE)
> > +               features &= ~NETIF_F_SG;
> > +
> 
> This is a bit extreme. I would attempt to remove NETIF_F_GSO_MASK instead.
> 
> if (skb_is_gso(skb)) {
>      if (skb_shinfo(skb)->gso_size < 16 ||
>          skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE))
>            features &= ~NETIF_F_GSO_MASK; }
> 
> This would very often end up with 1-MSS packets with fewer fragments.
> 
> -> No copy involved, and no high order page allocations.
> 
> > +       return features;
> > +}
> > +EXPORT_SYMBOL(otx2_features_check);
> > +
> >  int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)  {
> >         struct nix_frs_cfg *req;
> > diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > index 06910307085e..6a4bf43bc77e 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > @@ -961,6 +961,9 @@ void otx2_get_mac_from_af(struct net_device
> > *netdev);  void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int
> > qidx);  int otx2_config_pause_frm(struct otx2_nic *pfvf);  void
> > otx2_setup_segmentation(struct otx2_nic *pfvf);
> > +netdev_features_t otx2_features_check(struct sk_buff *skb,
> > +                                     struct net_device *dev,
> > +                                     netdev_features_t features);
> >
> >  /* RVU block related APIs */
> >  int otx2_attach_npa_nix(struct otx2_nic *pfvf); diff --git
> > a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > index e5fe67e73865..2364eb8d6732 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > @@ -2737,6 +2737,7 @@ static const struct net_device_ops
> otx2_netdev_ops = {
> >         .ndo_xdp_xmit           = otx2_xdp_xmit,
> >         .ndo_setup_tc           = otx2_setup_tc,
> >         .ndo_set_vf_trust       = otx2_ndo_set_vf_trust,
> > +       .ndo_features_check     = otx2_features_check,
> >  };
> >
> >  static int otx2_wq_init(struct otx2_nic *pf) diff --git
> > a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > index f828d32737af..9b89aff42eb0 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > @@ -1158,17 +1158,6 @@ bool otx2_sq_append_skb(struct net_device
> > *netdev, struct otx2_snd_queue *sq,
> >
> >         num_segs = skb_shinfo(skb)->nr_frags + 1;
> >
> > -       /* If SKB doesn't fit in a single SQE, linearize it.
> > -        * TODO: Consider adding JUMP descriptor instead.
> > -        */
> > -       if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
> > -               if (__skb_linearize(skb)) {
> > -                       dev_kfree_skb_any(skb);
> > -                       return true;
> > -               }
> > -               num_segs = skb_shinfo(skb)->nr_frags + 1;
> > -       }
> 
> Then you need to keep this check for  non GSO packets.
> 
> One way to trigger this is to run netperf with tiny fragments.
> TCP is unable to cook GSO packets, because we hit MAX_SKB_FRAGS before
> even filling a single MSS.
> 
> netperf -H $remote -t TCP_SENDFILE -- -m 10
> 
Will repost the patch with suggested changes and testing.
> 
> 
> > -
> >         if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
> >                 /* Insert vlan tag before giving pkt to tso */
> >                 if (skb_vlan_tag_present(skb)) diff --git
> > a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > index 35e06048356f..04aab04e4ba2 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > @@ -483,6 +483,7 @@ static const struct net_device_ops
> otx2vf_netdev_ops = {
> >         .ndo_tx_timeout = otx2_tx_timeout,
> >         .ndo_eth_ioctl  = otx2_ioctl,
> >         .ndo_setup_tc = otx2_setup_tc,
> > +       .ndo_features_check     = otx2_features_check,
> >  };
> >
> >  static int otx2_wq_init(struct otx2_nic *vf)
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-03-18  0:46 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-13  6:33 [v2 net PATCH] octeontx2-pf: Disable HW TSO for seg size < 16B Geetha sowjanya
2024-03-13  7:57 ` Eric Dumazet
2024-03-18  0:46   ` [EXTERNAL] " Geethasowjanya Akula

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).