All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully
@ 2023-07-10 20:56 Jakub Kicinski
  2023-07-10 20:56 ` [PATCH net-next 1/3] eth: bnxt: move and rename reset helpers Jakub Kicinski
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-10 20:56 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, pabeni, michael.chan, Jakub Kicinski

bnxt trusts the events generated by the device which may lead to kernel
crashes. These are extremely rare but they do happen. For a while
I thought crashing may be intentional, because device reporting invalid
completions should never happen, and having a core dump could be useful
if it does. But in practice I haven't found any clues in the core dumps,
and panic_on_warn exists.

Series was tested by forcing the recovery path manually. Because of
how rare the real crashes are I can't confirm it works for the actual
device errors until it's been widely deployed.

Jakub Kicinski (3):
  eth: bnxt: move and rename reset helpers
  eth: bnxt: take the bit to set as argument of bnxt_queue_sp_work()
  eth: bnxt: handle invalid Tx completions more gracefully

 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 143 +++++++++++-----------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |   1 +
 2 files changed, 74 insertions(+), 70 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next 1/3] eth: bnxt: move and rename reset helpers
  2023-07-10 20:56 [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
@ 2023-07-10 20:56 ` Jakub Kicinski
  2023-07-10 20:56 ` [PATCH net-next 2/3] eth: bnxt: take the bit to set as argument of bnxt_queue_sp_work() Jakub Kicinski
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-10 20:56 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, pabeni, michael.chan, Jakub Kicinski

Move the reset helpers, subsequent patches will need some
of them on the Tx path.

While at it rename bnxt_sched_reset(), on more recent chips
it schedules a queue reset, instead of a fuller reset.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 72 +++++++++++------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e5b54e6025be..568bcf1c9928 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -293,6 +293,38 @@ static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 		BNXT_DB_CQ(db, idx);
 }
 
+static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
+{
+	if (!(test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)))
+		return;
+
+	if (BNXT_PF(bp))
+		queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
+	else
+		schedule_delayed_work(&bp->fw_reset_task, delay);
+}
+
+static void bnxt_queue_sp_work(struct bnxt *bp)
+{
+	if (BNXT_PF(bp))
+		queue_work(bnxt_pf_wq, &bp->sp_task);
+	else
+		schedule_work(&bp->sp_task);
+}
+
+static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
+{
+	if (!rxr->bnapi->in_reset) {
+		rxr->bnapi->in_reset = true;
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
+		else
+			set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
+		bnxt_queue_sp_work(bp);
+	}
+	rxr->rx_next_cons = 0xffff;
+}
+
 const u16 bnxt_lhint_arr[] = {
 	TX_BD_FLAGS_LHINT_512_AND_SMALLER,
 	TX_BD_FLAGS_LHINT_512_TO_1023,
@@ -1234,38 +1266,6 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	return 0;
 }
 
-static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
-{
-	if (!(test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)))
-		return;
-
-	if (BNXT_PF(bp))
-		queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
-	else
-		schedule_delayed_work(&bp->fw_reset_task, delay);
-}
-
-static void bnxt_queue_sp_work(struct bnxt *bp)
-{
-	if (BNXT_PF(bp))
-		queue_work(bnxt_pf_wq, &bp->sp_task);
-	else
-		schedule_work(&bp->sp_task);
-}
-
-static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
-{
-	if (!rxr->bnapi->in_reset) {
-		rxr->bnapi->in_reset = true;
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
-			set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
-		else
-			set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
-	}
-	rxr->rx_next_cons = 0xffff;
-}
-
 static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
 {
 	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
@@ -1320,7 +1320,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 		netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n",
 			    cons, rxr->rx_next_cons,
 			    TPA_START_ERROR_CODE(tpa_start1));
-		bnxt_sched_reset(bp, rxr);
+		bnxt_sched_reset_rxr(bp, rxr);
 		return;
 	}
 	/* Store cfa_code in tpa_info to use in tpa_end
@@ -1844,7 +1844,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		if (rxr->rx_next_cons != 0xffff)
 			netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
 				    cons, rxr->rx_next_cons);
-		bnxt_sched_reset(bp, rxr);
+		bnxt_sched_reset_rxr(bp, rxr);
 		if (rc1)
 			return rc1;
 		goto next_rx_no_prod_no_len;
@@ -1882,7 +1882,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 			    !(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) {
 				netdev_warn_once(bp->dev, "RX buffer error %x\n",
 						 rx_err);
-				bnxt_sched_reset(bp, rxr);
+				bnxt_sched_reset_rxr(bp, rxr);
 			}
 		}
 		goto next_rx_no_len;
@@ -2329,7 +2329,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
 			goto async_event_process_exit;
 		}
 		rxr = bp->bnapi[grp_idx]->rx_ring;
-		bnxt_sched_reset(bp, rxr);
+		bnxt_sched_reset_rxr(bp, rxr);
 		goto async_event_process_exit;
 	}
 	case ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST: {
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 2/3] eth: bnxt: take the bit to set as argument of bnxt_queue_sp_work()
  2023-07-10 20:56 [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
  2023-07-10 20:56 ` [PATCH net-next 1/3] eth: bnxt: move and rename reset helpers Jakub Kicinski
@ 2023-07-10 20:56 ` Jakub Kicinski
  2023-07-10 20:56 ` [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
  2023-07-10 21:44 ` [PATCH net-next 0/3] " Michael Chan
  3 siblings, 0 replies; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-10 20:56 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, pabeni, michael.chan, Jakub Kicinski

Most callers of bnxt_queue_sp_work() set a bit to indicate what work
to perform right before calling it. Pass it to the function instead.

Otherwise for more commonly set bits (like reset) it'd be tempting
to create a single-purpose wrapper.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 61 ++++++++++-------------
 1 file changed, 26 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 568bcf1c9928..b8ddad33b01a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -304,7 +304,7 @@ static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
 		schedule_delayed_work(&bp->fw_reset_task, delay);
 }
 
-static void bnxt_queue_sp_work(struct bnxt *bp)
+static void __bnxt_queue_sp_work(struct bnxt *bp)
 {
 	if (BNXT_PF(bp))
 		queue_work(bnxt_pf_wq, &bp->sp_task);
@@ -312,6 +312,12 @@ static void bnxt_queue_sp_work(struct bnxt *bp)
 		schedule_work(&bp->sp_task);
 }
 
+static void bnxt_queue_sp_work(struct bnxt *bp, unsigned int event)
+{
+	set_bit(event, &bp->sp_event);
+	__bnxt_queue_sp_work(bp);
+}
+
 static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
 {
 	if (!rxr->bnapi->in_reset) {
@@ -320,7 +326,7 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
 			set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
 		else
 			set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
+		__bnxt_queue_sp_work(bp);
 	}
 	rxr->rx_next_cons = 0xffff;
 }
@@ -2384,7 +2390,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
 	default:
 		goto async_event_process_exit;
 	}
-	bnxt_queue_sp_work(bp);
+	__bnxt_queue_sp_work(bp);
 async_event_process_exit:
 	return 0;
 }
@@ -2413,8 +2419,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
 		}
 
 		set_bit(vf_id - bp->pf.first_vf_id, bp->pf.vf_event_bmap);
-		set_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
+		bnxt_queue_sp_work(bp, BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT);
 		break;
 
 	case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT:
@@ -11031,8 +11036,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
 	if (mask != vnic->rx_mask || uc_update || mc_update) {
 		vnic->rx_mask = mask;
 
-		set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
+		bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
 	}
 }
 
@@ -11597,8 +11601,7 @@ static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue)
 	struct bnxt *bp = netdev_priv(dev);
 
 	netdev_err(bp->dev,  "TX timeout detected, starting reset task!\n");
-	set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
-	bnxt_queue_sp_work(bp);
+	bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
 }
 
 static void bnxt_fw_health_check(struct bnxt *bp)
@@ -11635,8 +11638,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
 	return;
 
 fw_reset:
-	set_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event);
-	bnxt_queue_sp_work(bp);
+	bnxt_queue_sp_work(bp, BNXT_FW_EXCEPTION_SP_EVENT);
 }
 
 static void bnxt_timer(struct timer_list *t)
@@ -11653,21 +11655,15 @@ static void bnxt_timer(struct timer_list *t)
 	if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
 		bnxt_fw_health_check(bp);
 
-	if (BNXT_LINK_IS_UP(bp) && bp->stats_coal_ticks) {
-		set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
-	}
+	if (BNXT_LINK_IS_UP(bp) && bp->stats_coal_ticks)
+		bnxt_queue_sp_work(bp, BNXT_PERIODIC_STATS_SP_EVENT);
 
-	if (bnxt_tc_flower_enabled(bp)) {
-		set_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
-	}
+	if (bnxt_tc_flower_enabled(bp))
+		bnxt_queue_sp_work(bp, BNXT_FLOW_STATS_SP_EVENT);
 
 #ifdef CONFIG_RFS_ACCEL
-	if ((bp->flags & BNXT_FLAG_RFS) && bp->ntp_fltr_count) {
-		set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
-	}
+	if ((bp->flags & BNXT_FLAG_RFS) && bp->ntp_fltr_count)
+		bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT);
 #endif /*CONFIG_RFS_ACCEL*/
 
 	if (bp->link_info.phy_retry) {
@@ -11675,21 +11671,17 @@ static void bnxt_timer(struct timer_list *t)
 			bp->link_info.phy_retry = false;
 			netdev_warn(bp->dev, "failed to update phy settings after maximum retries.\n");
 		} else {
-			set_bit(BNXT_UPDATE_PHY_SP_EVENT, &bp->sp_event);
-			bnxt_queue_sp_work(bp);
+			bnxt_queue_sp_work(bp, BNXT_UPDATE_PHY_SP_EVENT);
 		}
 	}
 
-	if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state)) {
-		set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
-	}
+	if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
+		bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
 
 	if ((bp->flags & BNXT_FLAG_CHIP_P5) && !bp->chip_rev &&
-	    netif_carrier_ok(dev)) {
-		set_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event);
-		bnxt_queue_sp_work(bp);
-	}
+	    netif_carrier_ok(dev))
+		bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT);
+
 bnxt_restart_timer:
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
 }
@@ -12968,8 +12960,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 	bp->ntp_fltr_count++;
 	spin_unlock_bh(&bp->ntp_fltr_lock);
 
-	set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event);
-	bnxt_queue_sp_work(bp);
+	bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT);
 
 	return new_fltr->sw_id;
 
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-10 20:56 [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
  2023-07-10 20:56 ` [PATCH net-next 1/3] eth: bnxt: move and rename reset helpers Jakub Kicinski
  2023-07-10 20:56 ` [PATCH net-next 2/3] eth: bnxt: take the bit to set as argument of bnxt_queue_sp_work() Jakub Kicinski
@ 2023-07-10 20:56 ` Jakub Kicinski
  2023-07-11  8:00   ` Michael Chan
  2023-07-11 10:10   ` Paolo Abeni
  2023-07-10 21:44 ` [PATCH net-next 0/3] " Michael Chan
  3 siblings, 2 replies; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-10 20:56 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, pabeni, michael.chan, Jakub Kicinski

Invalid Tx completions should never happen (tm) but when they do
they crash the host, because driver blindly trusts that there is
a valid skb pointer on the ring.

The completions I've seen appear to be some form of FW / HW
miscalculation or staleness, they have typical (small) values
(<100), but they are most often higher than number of queued
descriptors. They usually happen after boot.

Instead of crashing print a warning and schedule a reset.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 +++++++++++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b8ddad33b01a..bfa56f35d2e0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -690,6 +690,16 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 		skb = tx_buf->skb;
 		tx_buf->skb = NULL;
 
+		if (unlikely(!skb)) {
+			netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
+				   txr->txq_index, bnapi->tx_pkts,
+				   txr->tx_cons, txr->tx_prod, i);
+			WARN_ON_ONCE(1);
+			bnapi->tx_fault = 1;
+			bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
+			return;
+		}
+
 		tx_bytes += skb->len;
 
 		if (tx_buf->is_push) {
@@ -2576,7 +2586,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
 {
-	if (bnapi->tx_pkts) {
+	if (bnapi->tx_pkts && !bnapi->tx_fault) {
 		bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
 		bnapi->tx_pkts = 0;
 	}
@@ -9429,6 +9439,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr;
 
+		bnapi->tx_fault = 0;
+
 		cpr = &bnapi->cp_ring;
 		if (bnapi->in_reset)
 			cpr->sw_stats.rx.rx_resets++;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 080e73496066..08ce9046bfd2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1008,6 +1008,7 @@ struct bnxt_napi {
 					  int);
 	int			tx_pkts;
 	u8			events;
+	u8			tx_fault:1;
 
 	u32			flags;
 #define BNXT_NAPI_FLAG_XDP	0x1
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-10 20:56 [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
                   ` (2 preceding siblings ...)
  2023-07-10 20:56 ` [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
@ 2023-07-10 21:44 ` Michael Chan
  2023-07-11  0:38   ` Jakub Kicinski
  3 siblings, 1 reply; 19+ messages in thread
From: Michael Chan @ 2023-07-10 21:44 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, edumazet, pabeni

On Mon, Jul 10, 2023 at 1:56 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> bnxt trusts the events generated by the device which may lead to kernel
> crashes. These are extremely rare but they do happen. For a while
> I thought crashing may be intentional, because device reporting invalid
> completions should never happen, and having a core dump could be useful
> if it does. But in practice I haven't found any clues in the core dumps,
> and panic_on_warn exists.

Indeed, it was intentional to crash the kernel so that we could
analyze the rings in the core dump.  Typically, we would find a bad
completion in one of the rings and we would debug it with the hardware
team during early chip testing.  Either the bug is fixed or some
suitable workaround is implemented.  Ideally, this should never happen
once the chip goes into production.

I suppose in a large enough deployment, this NULL SKB crash can
happen.  I will review your patchset later today.  Thanks.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-10 21:44 ` [PATCH net-next 0/3] " Michael Chan
@ 2023-07-11  0:38   ` Jakub Kicinski
  0 siblings, 0 replies; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-11  0:38 UTC (permalink / raw)
  To: Michael Chan; +Cc: davem, netdev, edumazet, pabeni

On Mon, 10 Jul 2023 14:44:31 -0700 Michael Chan wrote:
> > bnxt trusts the events generated by the device which may lead to kernel
> > crashes. These are extremely rare but they do happen. For a while
> > I thought crashing may be intentional, because device reporting invalid
> > completions should never happen, and having a core dump could be useful
> > if it does. But in practice I haven't found any clues in the core dumps,
> > and panic_on_warn exists.  
> 
> Indeed, it was intentional to crash the kernel so that we could
> analyze the rings in the core dump.  Typically, we would find a bad
> completion in one of the rings and we would debug it with the hardware
> team during early chip testing.  Either the bug is fixed or some
> suitable workaround is implemented.  Ideally, this should never happen
> once the chip goes into production.

I was suspecting bad HW, but some new platforms seems to be hitting it,
too. Which now makes me suspect PXE -> Linux hand off problem? 
Or multi-host?  Hard to tell..
Hopefully once it's not crashing it will be easier to do more analysis -
crashes within softirq during boot don't propagate too well into
monitoring systems :(

> I suppose in a large enough deployment, this NULL SKB crash can
> happen.  I will review your patchset later today.  Thanks.

Thanks!

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-10 20:56 ` [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
@ 2023-07-11  8:00   ` Michael Chan
  2023-07-12  0:01     ` Michael Chan
  2023-07-11 10:10   ` Paolo Abeni
  1 sibling, 1 reply; 19+ messages in thread
From: Michael Chan @ 2023-07-11  8:00 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, edumazet, pabeni

[-- Attachment #1: Type: text/plain, Size: 1015 bytes --]

On Mon, Jul 10, 2023 at 1:56 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Invalid Tx completions should never happen (tm) but when they do
> they crash the host, because driver blindly trusts that there is
> a valid skb pointer on the ring.
>
> The completions I've seen appear to be some form of FW / HW
> miscalculation or staleness, they have typical (small) values
> (<100), but they are most often higher than number of queued
> descriptors. They usually happen after boot.
>
> Instead of crashing print a warning and schedule a reset.

It generally looks good to me.  I have a few comments below.

The logic is very similar to the bnapi->in_reset logic to reset due to
RX errors.  We have a counter for the number of times we do the RX
reset so I think it might be good to add a similar TX reset counter.

The XDP code path can potentially crash in a similar way if we get a
bad completion from hardware.  I'm not sure if we should add similar
logic to the XDP code path.

Thanks.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4209 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-10 20:56 ` [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
  2023-07-11  8:00   ` Michael Chan
@ 2023-07-11 10:10   ` Paolo Abeni
  2023-07-12  1:19     ` Jakub Kicinski
  1 sibling, 1 reply; 19+ messages in thread
From: Paolo Abeni @ 2023-07-11 10:10 UTC (permalink / raw)
  To: Jakub Kicinski, davem; +Cc: netdev, edumazet, michael.chan

On Mon, 2023-07-10 at 13:56 -0700, Jakub Kicinski wrote:
> Invalid Tx completions should never happen (tm) but when they do
> they crash the host, because driver blindly trusts that there is
> a valid skb pointer on the ring.
> 
> The completions I've seen appear to be some form of FW / HW
> miscalculation or staleness, they have typical (small) values
> (<100), but they are most often higher than number of queued
> descriptors. They usually happen after boot.
> 
> Instead of crashing print a warning and schedule a reset.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 +++++++++++++-
>  drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
>  2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index b8ddad33b01a..bfa56f35d2e0 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -690,6 +690,16 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
>  		skb = tx_buf->skb;
>  		tx_buf->skb = NULL;
>  
> +		if (unlikely(!skb)) {
> +			netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
> +				   txr->txq_index, bnapi->tx_pkts,
> +				   txr->tx_cons, txr->tx_prod, i);
> +			WARN_ON_ONCE(1);
> +			bnapi->tx_fault = 1;
> +			bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
> +			return;
> +		}
> +
>  		tx_bytes += skb->len;
>  
>  		if (tx_buf->is_push) {
> @@ -2576,7 +2586,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
>  
>  static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
>  {
> -	if (bnapi->tx_pkts) {
> +	if (bnapi->tx_pkts && !bnapi->tx_fault) {
>  		bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
>  		bnapi->tx_pkts = 0;
>  	}
> @@ -9429,6 +9439,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
>  		struct bnxt_napi *bnapi = bp->bnapi[i];
>  		struct bnxt_cp_ring_info *cpr;
>  
> +		bnapi->tx_fault = 0;
> +
>  		cpr = &bnapi->cp_ring;
>  		if (bnapi->in_reset)
>  			cpr->sw_stats.rx.rx_resets++;
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> index 080e73496066..08ce9046bfd2 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> @@ -1008,6 +1008,7 @@ struct bnxt_napi {
>  					  int);
>  	int			tx_pkts;
>  	u8			events;
> +	u8			tx_fault:1;

Since there are still a few holes avail, I would use a plain u8 (or
bool) to help the compiler emit better code.

Cheers,

Paolo


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-11  8:00   ` Michael Chan
@ 2023-07-12  0:01     ` Michael Chan
  2023-07-12  1:09       ` Jakub Kicinski
  0 siblings, 1 reply; 19+ messages in thread
From: Michael Chan @ 2023-07-12  0:01 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, edumazet, pabeni

[-- Attachment #1: Type: text/plain, Size: 1379 bytes --]

On Tue, Jul 11, 2023 at 1:00 AM Michael Chan <michael.chan@broadcom.com> wrote:
>
> On Mon, Jul 10, 2023 at 1:56 PM Jakub Kicinski <kuba@kernel.org> wrote:
> >
> > Invalid Tx completions should never happen (tm) but when they do
> > they crash the host, because driver blindly trusts that there is
> > a valid skb pointer on the ring.
> >
> > The completions I've seen appear to be some form of FW / HW
> > miscalculation or staleness, they have typical (small) values
> > (<100), but they are most often higher than number of queued
> > descriptors. They usually happen after boot.
> >
> > Instead of crashing print a warning and schedule a reset.
>
> It generally looks good to me.  I have a few comments below.
>
> The logic is very similar to the bnapi->in_reset logic to reset due to
> RX errors.  We have a counter for the number of times we do the RX
> reset so I think it might be good to add a similar TX reset counter.

Never mind about the counter.  Since we are doing a complete reset,
the cpr structure will be freed anyway and the counter won't persist.

Later when we add support for per TX ring reset, we can add the
counter at that time.

>
> The XDP code path can potentially crash in a similar way if we get a
> bad completion from hardware.  I'm not sure if we should add similar
> logic to the XDP code path.
>
> Thanks.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4209 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12  0:01     ` Michael Chan
@ 2023-07-12  1:09       ` Jakub Kicinski
  2023-07-12  4:11         ` Michael Chan
  0 siblings, 1 reply; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-12  1:09 UTC (permalink / raw)
  To: Michael Chan; +Cc: davem, netdev, edumazet, pabeni

On Tue, 11 Jul 2023 17:01:08 -0700 Michael Chan wrote:
> > It generally looks good to me.  I have a few comments below.
> >
> > The logic is very similar to the bnapi->in_reset logic to reset due to
> > RX errors.  We have a counter for the number of times we do the RX
> > reset so I think it might be good to add a similar TX reset counter.  
> 
> Never mind about the counter.  Since we are doing a complete reset,
> the cpr structure will be freed anyway and the counter won't persist.
> 
> Later when we add support for per TX ring reset, we can add the
> counter at that time.

Oh, if all the cpr stats get lost during reset or re-config, that's
quite unfortunate. We should get that fixed without waiting for per
ring resets of any sort, just in case that takes long. 

Can we stash the old sum somewhere and report in ethtool as "non-ring"
or "old" or ..?

> > The XDP code path can potentially crash in a similar way if we get a
> > bad completion from hardware.  I'm not sure if we should add similar
> > logic to the XDP code path.

Ack, will fix.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-11 10:10   ` Paolo Abeni
@ 2023-07-12  1:19     ` Jakub Kicinski
  2023-07-12  6:50       ` Paolo Abeni
  0 siblings, 1 reply; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-12  1:19 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: davem, netdev, edumazet, michael.chan

On Tue, 11 Jul 2023 12:10:28 +0200 Paolo Abeni wrote:
> On Mon, 2023-07-10 at 13:56 -0700, Jakub Kicinski wrote:
> > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> > index 080e73496066..08ce9046bfd2 100644
> > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> > @@ -1008,6 +1008,7 @@ struct bnxt_napi {
> >  					  int);
> >  	int			tx_pkts;
> >  	u8			events;
> > +	u8			tx_fault:1;  
> 
> Since there are still a few holes avail, I would use a plain u8 (or
> bool) to help the compiler emit better code.

Is that still true or was it only true for old compilers?
With gcc version 13.1.1 20230614 :

$ cat /tmp/t.c 
#include <strings.h>

struct some {
    void (*f)(void);
    unsigned char b;
#ifdef BLA
    _Bool a;
#else
    unsigned char a:1;
#endif
};

int bla(struct some *s)
{
    if (s->a)
        s->f();
    return 0;
}

$ gcc -W -Wall -O2  /tmp/t.c -o /tmp/t -c
$ objdump -S /tmp/t > /tmp/a
$ gcc -DBLA -W -Wall -O2  /tmp/t.c -o /tmp/t -c
$ objdump -S /tmp/t > /tmp/b
$ diff /tmp/a /tmp/b
8c8
<    0:	f6 47 09 01          	testb  $0x1,0x9(%rdi)
---
>    0:	80 7f 09 00          	cmpb   $0x0,0x9(%rdi)

$ gcc -V

Shouldn't matter, right?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12  1:09       ` Jakub Kicinski
@ 2023-07-12  4:11         ` Michael Chan
  2023-07-12  4:24           ` Jakub Kicinski
  0 siblings, 1 reply; 19+ messages in thread
From: Michael Chan @ 2023-07-12  4:11 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, edumazet, pabeni

[-- Attachment #1: Type: text/plain, Size: 1842 bytes --]

On Tue, Jul 11, 2023 at 6:09 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 11 Jul 2023 17:01:08 -0700 Michael Chan wrote:
> > > It generally looks good to me.  I have a few comments below.
> > >
> > > The logic is very similar to the bnapi->in_reset logic to reset due to
> > > RX errors.  We have a counter for the number of times we do the RX
> > > reset so I think it might be good to add a similar TX reset counter.
> >
> > Never mind about the counter.  Since we are doing a complete reset,
> > the cpr structure will be freed anyway and the counter won't persist.
> >
> > Later when we add support for per TX ring reset, we can add the
> > counter at that time.
>
> Oh, if all the cpr stats get lost during reset or re-config, that's
> quite unfortunate. We should get that fixed without waiting for per
> ring resets of any sort, just in case that takes long.
>
> Can we stash the old sum somewhere and report in ethtool as "non-ring"
> or "old" or ..?
>

These are all the software stats that will be reset during a complete reset:

struct bnxt_rx_sw_stats {
        u64                     rx_l4_csum_errors;
        u64                     rx_resets;
        u64                     rx_buf_errors;
        u64                     rx_oom_discards;
        u64                     rx_netpoll_discards;
};

struct bnxt_cmn_sw_stats {
        u64                     missed_irqs;
};

These are per ring counters.  During complete reset, we free the ring
and allocate a new ring.  So re-applying these counters to the new
ring doesn't make sense because the new ring is not necessarily the
same as the old one.  So I think we'll need to have a total for each
of these and we'll save the snapshot of the total before reset and
restore the snapshot after reset.  Does that make sense?

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4209 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12  4:11         ` Michael Chan
@ 2023-07-12  4:24           ` Jakub Kicinski
  2023-07-12  4:50             ` Michael Chan
  0 siblings, 1 reply; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-12  4:24 UTC (permalink / raw)
  To: Michael Chan; +Cc: davem, netdev, edumazet, pabeni

On Tue, 11 Jul 2023 21:11:02 -0700 Michael Chan wrote:
> These are per ring counters.  During complete reset, we free the ring
> and allocate a new ring.  So re-applying these counters to the new
> ring doesn't make sense because the new ring is not necessarily the
> same as the old one.  So I think we'll need to have a total for each
> of these and we'll save the snapshot of the total before reset and
> restore the snapshot after reset.  Does that make sense?

Not entirely sure what you mean by total. The counters are reported 
in ethool -S per ring and (aggregated) in ip -s -s.

Are you saying that in ethtool -S in addition to per-ring counts
we'd report a "total" which is sum(current per ring) + saved?
If so - that makes sense, yup.

ip -s -s sort of follows so no need discussing.

You mention reset but the errors counters should survive close() /
open() cycles as well.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12  4:24           ` Jakub Kicinski
@ 2023-07-12  4:50             ` Michael Chan
  2023-07-12 16:35               ` Jakub Kicinski
  0 siblings, 1 reply; 19+ messages in thread
From: Michael Chan @ 2023-07-12  4:50 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, edumazet, pabeni

[-- Attachment #1: Type: text/plain, Size: 699 bytes --]

On Tue, Jul 11, 2023 at 9:24 PM Jakub Kicinski <kuba@kernel.org> wrote:

> Are you saying that in ethtool -S in addition to per-ring counts
> we'd report a "total" which is sum(current per ring) + saved?
> If so - that makes sense, yup.

Yes, for example, we have "rx_resets" which is per ring.  We'll add a
"rx_total_resets" which is the sum of all current "rx_resets" + the
saved snapshot.  The per ring "rx_resets" will reset to 0 during each
reset (including ifdown).  But "rx_total_resets" will be saved across
reset.

> You mention reset but the errors counters should survive close() /
> open() cycles as well.

Yes.  It's the same reset whether it is ifdown/ifup or reset.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4209 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12  1:19     ` Jakub Kicinski
@ 2023-07-12  6:50       ` Paolo Abeni
  2023-07-12 16:34         ` Jakub Kicinski
  0 siblings, 1 reply; 19+ messages in thread
From: Paolo Abeni @ 2023-07-12  6:50 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, edumazet, michael.chan

On Tue, 2023-07-11 at 18:19 -0700, Jakub Kicinski wrote:
> On Tue, 11 Jul 2023 12:10:28 +0200 Paolo Abeni wrote:
> > On Mon, 2023-07-10 at 13:56 -0700, Jakub Kicinski wrote:
> > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> > > index 080e73496066..08ce9046bfd2 100644
> > > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> > > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> > > @@ -1008,6 +1008,7 @@ struct bnxt_napi {
> > >  					  int);
> > >  	int			tx_pkts;
> > >  	u8			events;
> > > +	u8			tx_fault:1;  
> > 
> > Since there are still a few holes avail, I would use a plain u8 (or
> > bool) to help the compiler emit better code.
> 
> Is that still true or was it only true for old compilers?
> With gcc version 13.1.1 20230614 :
> 
> $ cat /tmp/t.c 
> #include <strings.h>
> 
> struct some {
>     void (*f)(void);
>     unsigned char b;
> #ifdef BLA
>     _Bool a;
> #else
>     unsigned char a:1;
> #endif
> };
> 
> int bla(struct some *s)
> {
>     if (s->a)
>         s->f();
>     return 0;
> }
> 
> $ gcc -W -Wall -O2  /tmp/t.c -o /tmp/t -c
> $ objdump -S /tmp/t > /tmp/a
> $ gcc -DBLA -W -Wall -O2  /tmp/t.c -o /tmp/t -c
> $ objdump -S /tmp/t > /tmp/b
> $ diff /tmp/a /tmp/b
> 8c8
> <    0:	f6 47 09 01          	testb  $0x1,0x9(%rdi)
> ---
> >    0:	80 7f 09 00          	cmpb   $0x0,0x9(%rdi)
> 
> $ gcc -V
> 
> Shouldn't matter, right?

Surely not a big deal. But some users (possibly most of them!) have
older compiler. Including an assignment in the test code, I get this
additional difference:

-   c:	80 4b 09 01          	orb    $0x1,0x9(%rbx)
+   c:	c6 43 09 01          	movb   $0x1,0x9(%rbx)

with the bitfield using the 'or' operation. Not a big deal, but the
other option is slightly better ;)

Cheers,

Paolo



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12  6:50       ` Paolo Abeni
@ 2023-07-12 16:34         ` Jakub Kicinski
  2023-07-12 20:31           ` Paolo Abeni
  0 siblings, 1 reply; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-12 16:34 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: davem, netdev, edumazet, michael.chan

On Wed, 12 Jul 2023 08:50:06 +0200 Paolo Abeni wrote:
> Surely not a big deal. But some users (possibly most of them!) have
> older compiler.

I checked GCC 10 and GCC 9, and the code is the same :(
Any idea on how old do we need to go?

> Including an assignment in the test code, I get this
> additional difference:
> 
> -   c:	80 4b 09 01          	orb    $0x1,0x9(%rbx)
> +   c:	c6 43 09 01          	movb   $0x1,0x9(%rbx)
> 
> with the bitfield using the 'or' operation. Not a big deal, but the
> other option is slightly better ;)

Is there really any difference whether one changes a byte or ors
in a bit? Either way it's a partial update of a word.

multi-bit fields may be harder for the compiler, especially weirdly
aligned but for trivial single bit values I think we may be overly
cautious.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12  4:50             ` Michael Chan
@ 2023-07-12 16:35               ` Jakub Kicinski
  0 siblings, 0 replies; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-12 16:35 UTC (permalink / raw)
  To: Michael Chan; +Cc: davem, netdev, edumazet, pabeni

On Tue, 11 Jul 2023 21:50:16 -0700 Michael Chan wrote:
> > Are you saying that in ethtool -S in addition to per-ring counts
> > we'd report a "total" which is sum(current per ring) + saved?
> > If so - that makes sense, yup.  
> 
> Yes, for example, we have "rx_resets" which is per ring.  We'll add a
> "rx_total_resets" which is the sum of all current "rx_resets" + the
> saved snapshot.  The per ring "rx_resets" will reset to 0 during each
> reset (including ifdown).  But "rx_total_resets" will be saved across
> reset.
> 
> > You mention reset but the errors counters should survive close() /
> > open() cycles as well.  
> 
> Yes.  It's the same reset whether it is ifdown/ifup or reset.

Makes sense, and sounds good!

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12 16:34         ` Jakub Kicinski
@ 2023-07-12 20:31           ` Paolo Abeni
  2023-07-12 20:50             ` Jakub Kicinski
  0 siblings, 1 reply; 19+ messages in thread
From: Paolo Abeni @ 2023-07-12 20:31 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: davem, netdev, edumazet, michael.chan

On Wed, 2023-07-12 at 09:34 -0700, Jakub Kicinski wrote:
> On Wed, 12 Jul 2023 08:50:06 +0200 Paolo Abeni wrote:
> > Surely not a big deal. But some users (possibly most of them!) have
> > older compiler.
> 
> I checked GCC 10 and GCC 9, and the code is the same :(
> Any idea on how old do we need to go?

I guess that would be more then enough!

> > Including an assignment in the test code, I get this
> > additional difference:
> > 
> > -   c:	80 4b 09 01          	orb    $0x1,0x9(%rbx)
> > +   c:	c6 43 09 01          	movb   $0x1,0x9(%rbx)
> > 
> > with the bitfield using the 'or' operation. Not a big deal, but the
> > other option is slightly better ;)
> 
> Is there really any difference whether one changes a byte or ors
> in a bit? Either way it's a partial update of a word.

Really not a big deal, but 'or' fetches memory and then store it, while
move [immediate] is a single store. In case of a cache miss, 'or'
should stall, while 'mov' should not. In general with 'mov' there
should be less pressure on the cache and/or bus.

Cheers,

Paolo


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully
  2023-07-12 20:31           ` Paolo Abeni
@ 2023-07-12 20:50             ` Jakub Kicinski
  0 siblings, 0 replies; 19+ messages in thread
From: Jakub Kicinski @ 2023-07-12 20:50 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: davem, netdev, edumazet, michael.chan

On Wed, 12 Jul 2023 22:31:45 +0200 Paolo Abeni wrote:
> > Is there really any difference whether one changes a byte or ors
> > in a bit? Either way it's a partial update of a word.  
> 
> Really not a big deal, but 'or' fetches memory and then store it, while
> move [immediate] is a single store. In case of a cache miss, 'or'
> should stall, while 'mov' should not. In general with 'mov' there
> should be less pressure on the cache and/or bus.

You're right, if the store buffer can buffer 1B stores then we won't
stall the instruction pile line. But we most likely will for the bit
op. The setting is an extremely rare path, tho, so given your
"not a big deal" disclaimer I'm intending to keep the bitfield :)

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2023-07-12 20:50 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-10 20:56 [PATCH net-next 0/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
2023-07-10 20:56 ` [PATCH net-next 1/3] eth: bnxt: move and rename reset helpers Jakub Kicinski
2023-07-10 20:56 ` [PATCH net-next 2/3] eth: bnxt: take the bit to set as argument of bnxt_queue_sp_work() Jakub Kicinski
2023-07-10 20:56 ` [PATCH net-next 3/3] eth: bnxt: handle invalid Tx completions more gracefully Jakub Kicinski
2023-07-11  8:00   ` Michael Chan
2023-07-12  0:01     ` Michael Chan
2023-07-12  1:09       ` Jakub Kicinski
2023-07-12  4:11         ` Michael Chan
2023-07-12  4:24           ` Jakub Kicinski
2023-07-12  4:50             ` Michael Chan
2023-07-12 16:35               ` Jakub Kicinski
2023-07-11 10:10   ` Paolo Abeni
2023-07-12  1:19     ` Jakub Kicinski
2023-07-12  6:50       ` Paolo Abeni
2023-07-12 16:34         ` Jakub Kicinski
2023-07-12 20:31           ` Paolo Abeni
2023-07-12 20:50             ` Jakub Kicinski
2023-07-10 21:44 ` [PATCH net-next 0/3] " Michael Chan
2023-07-11  0:38   ` Jakub Kicinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.