On Wed, Aug 11, 2021 at 3:44 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 11 Aug 2021 15:36:34 -0700 Michael Chan wrote:
> > On Wed, Aug 11, 2021 at 2:38 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > > @@ -367,6 +368,13 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
> > >         return md_dst->u.port_info.port_id;
> > >  }
> > >
> > > +static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
> > > +                            u16 prod)
> > > +{
> > > +       bnxt_db_write(bp, &txr->tx_db, prod);
> > > +       txr->kick_pending = 0;
> > > +}
> > > +
> > >  static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
> > >  {
> > >         struct bnxt *bp = netdev_priv(dev);
> > > @@ -396,6 +404,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
> > >         free_size = bnxt_tx_avail(bp, txr);
> > >         if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) {
> > >                 netif_tx_stop_queue(txq);
> > > +               if (net_ratelimit() && txr->kick_pending)
> > > +                       netif_warn(bp, tx_err, dev, "bnxt: ring busy!\n");
> >
> > You forgot to remove this.
>
> I changed my mind. I added the && txr->kick_pending to the condition,
> if there is a race and napi starts the queue unnecessarily the kick
> can't be pending.

I don't understand.  The queue should be stopped if we have <=
MAX_SKB_FRAGS + 1 descriptors left.  If there is a race and the queue
is awake, the first TX packet may slip through if
skb_shinfo(skb)->nr_frags is small and we have enough descriptors for
it.  Let's say xmit_more is set for this packet and so kick is
pending.  The next packet may not fit anymore and it will hit this
check here.

>
> > >                 return NETDEV_TX_BUSY;
> > >         }
> > >
> > > @@ -516,21 +526,16 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
> > >  normal_tx:
> > >         if (length < BNXT_MIN_PKT_SIZE) {
> > >                 pad = BNXT_MIN_PKT_SIZE - length;
> > > -               if (skb_pad(skb, pad)) {
> > > +               if (skb_pad(skb, pad))
> > >                         /* SKB already freed. */
> > > -                       tx_buf->skb = NULL;
> > > -                       return NETDEV_TX_OK;
> > > -               }
> > > +                       goto tx_kick_pending;
> > >                 length = BNXT_MIN_PKT_SIZE;
> > >         }
> > >
> > >         mapping = dma_map_single(&pdev->dev, skb->data, len, DMA_TO_DEVICE);
> > >
> > > -       if (unlikely(dma_mapping_error(&pdev->dev, mapping))) {
> > > -               dev_kfree_skb_any(skb);
> > > -               tx_buf->skb = NULL;
> > > -               return NETDEV_TX_OK;
> > > -       }
> > > +       if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
> > > +               goto tx_free;
> > >
> > >         dma_unmap_addr_set(tx_buf, mapping, mapping);
> > >         flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD |
> > > @@ -617,13 +622,15 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
> > >         txr->tx_prod = prod;
> > >
> > >         if (!netdev_xmit_more() || netif_xmit_stopped(txq))
> > > -               bnxt_db_write(bp, &txr->tx_db, prod);
> > > +               bnxt_txr_db_kick(bp, txr, prod);
> > > +       else
> > > +               txr->kick_pending = 1;
> > >
> > >  tx_done:
> > >
> > >         if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) {
> > >                 if (netdev_xmit_more() && !tx_buf->is_push)
> > > -                       bnxt_db_write(bp, &txr->tx_db, prod);
> > > +                       bnxt_txr_db_kick(bp, txr, prod);
> > >
> > >                 netif_tx_stop_queue(txq);
> > >
> > > @@ -661,7 +668,12 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
> > >                                PCI_DMA_TODEVICE);
> > >         }
> > >
> > > +tx_free:
> > >         dev_kfree_skb_any(skb);
> > > +tx_kick_pending:
> > > +       tx_buf->skb = NULL;
> >
> > I think we should remove the setting of tx_buf->skb to NULL in the
> > tx_dma_error path since we are setting it here now.
>
> But tx_buf gets moved IIRC - if we hit tx_dma_error tx_buf will be one
> of the fragment bufs at this point. It should be legal to clear the skb
> pointer on those AFAICT.

Ah, you're right.

>
> Are you suggesting to do something along the lines of:
>
>         txr->tx_buf_ring[txr->tx_prod].skb = NULL;

Yeah, I like this the best.

>
> ?
>
> > > +       if (txr->kick_pending)
> > > +               bnxt_txr_db_kick(bp, txr, txr->tx_prod);
> > >         return NETDEV_TX_OK;
> > >  }
> > >
>