On Wed, Aug 11, 2021 at 3:44 PM Jakub Kicinski wrote: > > On Wed, 11 Aug 2021 15:36:34 -0700 Michael Chan wrote: > > On Wed, Aug 11, 2021 at 2:38 PM Jakub Kicinski wrote: > > > @@ -367,6 +368,13 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) > > > return md_dst->u.port_info.port_id; > > > } > > > > > > +static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr, > > > + u16 prod) > > > +{ > > > + bnxt_db_write(bp, &txr->tx_db, prod); > > > + txr->kick_pending = 0; > > > +} > > > + > > > static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) > > > { > > > struct bnxt *bp = netdev_priv(dev); > > > @@ -396,6 +404,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) > > > free_size = bnxt_tx_avail(bp, txr); > > > if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) { > > > netif_tx_stop_queue(txq); > > > + if (net_ratelimit() && txr->kick_pending) > > > + netif_warn(bp, tx_err, dev, "bnxt: ring busy!\n"); > > > > You forgot to remove this. > > I changed my mind. I added the && txr->kick_pending to the condition, > if there is a race and napi starts the queue unnecessarily the kick > can't be pending. I don't understand. The queue should be stopped if we have <= MAX_SKB_FRAGS + 1 descriptors left. If there is a race and the queue is awake, the first TX packet may slip through if skb_shinfo(skb)->nr_frags is small and we have enough descriptors for it. Let's say xmit_more is set for this packet and so kick is pending. The next packet may not fit anymore and it will hit this check here. > > > > return NETDEV_TX_BUSY; > > > } > > > > > > @@ -516,21 +526,16 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) > > > normal_tx: > > > if (length < BNXT_MIN_PKT_SIZE) { > > > pad = BNXT_MIN_PKT_SIZE - length; > > > - if (skb_pad(skb, pad)) { > > > + if (skb_pad(skb, pad)) > > > /* SKB already freed. */ > > > - tx_buf->skb = NULL; > > > - return NETDEV_TX_OK; > > > - } > > > + goto tx_kick_pending; > > > length = BNXT_MIN_PKT_SIZE; > > > } > > > > > > mapping = dma_map_single(&pdev->dev, skb->data, len, DMA_TO_DEVICE); > > > > > > - if (unlikely(dma_mapping_error(&pdev->dev, mapping))) { > > > - dev_kfree_skb_any(skb); > > > - tx_buf->skb = NULL; > > > - return NETDEV_TX_OK; > > > - } > > > + if (unlikely(dma_mapping_error(&pdev->dev, mapping))) > > > + goto tx_free; > > > > > > dma_unmap_addr_set(tx_buf, mapping, mapping); > > > flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD | > > > @@ -617,13 +622,15 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) > > > txr->tx_prod = prod; > > > > > > if (!netdev_xmit_more() || netif_xmit_stopped(txq)) > > > - bnxt_db_write(bp, &txr->tx_db, prod); > > > + bnxt_txr_db_kick(bp, txr, prod); > > > + else > > > + txr->kick_pending = 1; > > > > > > tx_done: > > > > > > if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) { > > > if (netdev_xmit_more() && !tx_buf->is_push) > > > - bnxt_db_write(bp, &txr->tx_db, prod); > > > + bnxt_txr_db_kick(bp, txr, prod); > > > > > > netif_tx_stop_queue(txq); > > > > > > @@ -661,7 +668,12 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) > > > PCI_DMA_TODEVICE); > > > } > > > > > > +tx_free: > > > dev_kfree_skb_any(skb); > > > +tx_kick_pending: > > > + tx_buf->skb = NULL; > > > > I think we should remove the setting of tx_buf->skb to NULL in the > > tx_dma_error path since we are setting it here now. > > But tx_buf gets moved IIRC - if we hit tx_dma_error tx_buf will be one > of the fragment bufs at this point. It should be legal to clear the skb > pointer on those AFAICT. Ah, you're right. > > Are you suggesting to do something along the lines of: > > txr->tx_buf_ring[txr->tx_prod].skb = NULL; Yeah, I like this the best. > > ? > > > > + if (txr->kick_pending) > > > + bnxt_txr_db_kick(bp, txr, txr->tx_prod); > > > return NETDEV_TX_OK; > > > } > > > >