On Tue, 2020-03-03 at 17:06 +0000, David Laight wrote: > Instead of spinning waiting for the ME to be idle defer the ring > tail updates until one of the following: > - The next update for that ring. > - The receive frame processing. > - The next timer tick. > > Reduce the delay between checks for the ME being idle from 50us > to uus. > > Part fix for bdc125f7. > > Signed-off-by: David Laight Added intel-wired-lan@lists.osuosl.org mailing list, so the right people can review your patch. > --- > This change probably applies as back as far as 3.16. > > drivers/net/ethernet/intel/e1000e/e1000.h | 3 + > drivers/net/ethernet/intel/e1000e/ich8lan.h | 2 +- > drivers/net/ethernet/intel/e1000e/netdev.c | 131 > ++++++++++++++++++++-------- > 3 files changed, 99 insertions(+), 37 deletions(-) > > diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h > b/drivers/net/ethernet/intel/e1000e/e1000.h > index 6c51b1b..c7819e0 100644 > --- a/drivers/net/ethernet/intel/e1000e/e1000.h > +++ b/drivers/net/ethernet/intel/e1000e/e1000.h > @@ -155,6 +155,8 @@ struct e1000_ring { > > u16 next_to_use; > u16 next_to_clean; > +#define E1000_RING_NOT_DEFERRED 0xffff > + u16 next_to_use_deferred; > > void __iomem *head; > void __iomem *tail; > @@ -190,6 +192,7 @@ struct e1000_adapter { > > struct work_struct reset_task; > struct delayed_work watchdog_task; > + struct delayed_work delay_ring_write_task; > > struct workqueue_struct *e1000_workqueue; > > diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h > b/drivers/net/ethernet/intel/e1000e/ich8lan.h > index 1502895..75e5a53 100644 > --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h > +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h > @@ -34,7 +34,7 @@ > /* FW established a valid mode */ > #define E1000_ICH_FWSM_FW_VALID 0x00008000 > #define E1000_ICH_FWSM_PCIM2PCI 0x01000000 /* ME PCIm- > to-PCI active */ > -#define E1000_ICH_FWSM_PCIM2PCI_COUNT 2000 > +#define E1000_ICH_FWSM_PCIM2PCI_COUNT 20000 > > #define E1000_ICH_MNG_IAMT_MODE 0x2 > > diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c > b/drivers/net/ethernet/intel/e1000e/netdev.c > index d7d56e4..299e5af 100644 > --- a/drivers/net/ethernet/intel/e1000e/netdev.c > +++ b/drivers/net/ethernet/intel/e1000e/netdev.c > @@ -118,13 +118,20 @@ struct e1000_reg_info { > * an incorrect value. Workaround this by checking the FWSM > register which > * has bit 24 set while ME is accessing MAC CSR registers, wait if > it is set > * and try again a number of times. > + * NB: This bit can stay set for 300us even if polled every 5us. > **/ > + > +static bool __ew32_prepare_busy(struct e1000_hw *hw) > +{ > + return (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI); > +} > + > s32 __ew32_prepare(struct e1000_hw *hw) > { > s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT; > > - while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i) > - udelay(50); > + while (__ew32_prepare_busy(hw) && --i) > + udelay(5); > > return i; > } > @@ -603,38 +610,75 @@ static void e1000_rx_checksum(struct > e1000_adapter *adapter, u32 status_err, > adapter->hw_csum_good++; > } > > -static void e1000e_update_rdt_wa(struct e1000_ring *rx_ring, > unsigned int i) > +static bool deferred_update_dt_wa(struct e1000_ring *ring) > { > - struct e1000_adapter *adapter = rx_ring->adapter; > - struct e1000_hw *hw = &adapter->hw; > - s32 ret_val = __ew32_prepare(hw); > - > - writel(i, rx_ring->tail); > + struct e1000_adapter *adapter; > + struct e1000_hw *hw; > + unsigned long flags; > + u32 deferred; > > - if (unlikely(!ret_val && (i != readl(rx_ring->tail)))) { > - u32 rctl = er32(RCTL); > + adapter = ring->adapter; > + hw = &adapter->hw; > + spin_lock_irqsave(&adapter->systim_lock, flags); > + deferred = ring->next_to_use_deferred; > + if (deferred != E1000_RING_NOT_DEFERRED) { > + if (__ew32_prepare_busy(hw)) { > + spin_unlock_irqrestore(&adapter->systim_lock, > flags); > + /* Caller needs to ensure timer running */ > + return true; > + } > > - ew32(RCTL, rctl & ~E1000_RCTL_EN); > - e_err("ME firmware caused invalid RDT - resetting\n"); > - schedule_work(&adapter->reset_task); > + writel(deferred, ring->tail); > + ring->next_to_use_deferred = E1000_RING_NOT_DEFERRED; > } > + spin_unlock_irqrestore(&adapter->systim_lock, flags); > + return false; > } > > -static void e1000e_update_tdt_wa(struct e1000_ring *tx_ring, > unsigned int i) > +/* Inline the cheap unlocked test */ > +#define e1000e_deferred_update_dt_wa(ring) \ > + (likely(READ_ONCE(ring->next_to_use_deferred) == \ > + E1000_RING_NOT_DEFERRED) ? false : \ > + deferred_update_dt_wa(ring)) > + > +static void e1000e_delay_ring_write_task(struct work_struct *work) > { > - struct e1000_adapter *adapter = tx_ring->adapter; > - struct e1000_hw *hw = &adapter->hw; > - s32 ret_val = __ew32_prepare(hw); > + struct e1000_adapter *adapter; > > - writel(i, tx_ring->tail); > + adapter = container_of(work, struct e1000_adapter, > + delay_ring_write_task.work); > > - if (unlikely(!ret_val && (i != readl(tx_ring->tail)))) { > - u32 tctl = er32(TCTL); > + if (e1000e_deferred_update_dt_wa(adapter->tx_ring) | > + e1000e_deferred_update_dt_wa(adapter->rx_ring)) > + mod_delayed_work(adapter->e1000_workqueue, > + &adapter->delay_ring_write_task, 1); > +} > > - ew32(TCTL, tctl & ~E1000_TCTL_EN); > - e_err("ME firmware caused invalid TDT - resetting\n"); > - schedule_work(&adapter->reset_task); > +static void e1000e_update_dt_wa(struct e1000_ring *ring, unsigned > int i) > +{ > + struct e1000_adapter *adapter = ring->adapter; > + struct e1000_hw *hw = &adapter->hw; > + unsigned long flags; > + u32 deferred; > + > + deferred = READ_ONCE(ring->next_to_use_deferred); > + if (unlikely(deferred != E1000_RING_NOT_DEFERRED)) { > + /* previous write was deferred - forget about it */ > + spin_lock_irqsave(&adapter->systim_lock, flags); > + ring->next_to_use_deferred = E1000_RING_NOT_DEFERRED; > + spin_unlock_irqrestore(&adapter->systim_lock, flags); > + } > + > + if (!__ew32_prepare_busy(hw)) { > + writel(i, ring->tail); > + return; > } > + > + /* We can't write now, defer to rx clean or timeout */ > + WRITE_ONCE(ring->next_to_use_deferred, i); > + > + mod_delayed_work(adapter->e1000_workqueue, > + &adapter->delay_ring_write_task, 1); > } > > /** > @@ -692,7 +736,7 @@ static void e1000_alloc_rx_buffers(struct > e1000_ring *rx_ring, > */ > wmb(); > if (adapter->flags2 & > FLAG2_PCIM2PCI_ARBITER_WA) > - e1000e_update_rdt_wa(rx_ring, i); > + e1000e_update_dt_wa(rx_ring, i); > else > writel(i, rx_ring->tail); > } > @@ -792,7 +836,7 @@ static void e1000_alloc_rx_buffers_ps(struct > e1000_ring *rx_ring, > */ > wmb(); > if (adapter->flags2 & > FLAG2_PCIM2PCI_ARBITER_WA) > - e1000e_update_rdt_wa(rx_ring, i << 1); > + e1000e_update_dt_wa(rx_ring, i << 1); > else > writel(i << 1, rx_ring->tail); > } > @@ -884,7 +928,7 @@ static void e1000_alloc_jumbo_rx_buffers(struct > e1000_ring *rx_ring, > */ > wmb(); > if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) > - e1000e_update_rdt_wa(rx_ring, i); > + e1000e_update_dt_wa(rx_ring, i); > else > writel(i, rx_ring->tail); > } > @@ -1256,6 +1300,8 @@ static bool e1000_clean_tx_irq(struct > e1000_ring *tx_ring) > > tx_ring->next_to_clean = i; > > + e1000e_deferred_update_dt_wa(tx_ring); > + > netdev_completed_queue(netdev, pkts_compl, bytes_compl); > > #define TX_WAKE_THRESHOLD 32 > @@ -1729,6 +1775,7 @@ static void e1000_clean_rx_ring(struct > e1000_ring *rx_ring) > > rx_ring->next_to_clean = 0; > rx_ring->next_to_use = 0; > + rx_ring->next_to_use_deferred = E1000_RING_NOT_DEFERRED; > adapter->flags2 &= ~FLAG2_IS_DISCARDING; > } > > @@ -2342,6 +2389,7 @@ int e1000e_setup_tx_resources(struct e1000_ring > *tx_ring) > > tx_ring->next_to_use = 0; > tx_ring->next_to_clean = 0; > + tx_ring->next_to_use_deferred = E1000_RING_NOT_DEFERRED; > > return 0; > err: > @@ -2388,6 +2436,7 @@ int e1000e_setup_rx_resources(struct e1000_ring > *rx_ring) > > rx_ring->next_to_clean = 0; > rx_ring->next_to_use = 0; > + rx_ring->next_to_use_deferred = E1000_RING_NOT_DEFERRED; > rx_ring->rx_skb_top = NULL; > > return 0; > @@ -2427,6 +2476,7 @@ static void e1000_clean_tx_ring(struct > e1000_ring *tx_ring) > > tx_ring->next_to_use = 0; > tx_ring->next_to_clean = 0; > + tx_ring->next_to_use_deferred = E1000_RING_NOT_DEFERRED; > } > > /** > @@ -2666,6 +2716,9 @@ static int e1000e_poll(struct napi_struct > *napi, int budget) > > adapter = netdev_priv(poll_dev); > > + e1000e_deferred_update_dt_wa(adapter->tx_ring); > + e1000e_deferred_update_dt_wa(adapter->rx_ring); > + > if (!adapter->msix_entries || > (adapter->rx_ring->ims_val & adapter->tx_ring->ims_val)) > tx_cleaned = e1000_clean_tx_irq(adapter->tx_ring); > @@ -2930,10 +2983,11 @@ static void e1000_configure_tx(struct > e1000_adapter *adapter) > tx_ring->tail = adapter->hw.hw_addr + E1000_TDT(0); > > writel(0, tx_ring->head); > - if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) > - e1000e_update_tdt_wa(tx_ring, 0); > - else > - writel(0, tx_ring->tail); > + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) { > + while (__ew32_prepare_busy(hw)) > + usleep_range(50, 100); > + } > + writel(0, tx_ring->tail); > > /* Set the Tx Interrupt Delay register */ > ew32(TIDV, adapter->tx_int_delay); > @@ -3254,10 +3308,11 @@ static void e1000_configure_rx(struct > e1000_adapter *adapter) > rx_ring->tail = adapter->hw.hw_addr + E1000_RDT(0); > > writel(0, rx_ring->head); > - if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) > - e1000e_update_rdt_wa(rx_ring, 0); > - else > - writel(0, rx_ring->tail); > + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) { > + while (__ew32_prepare_busy(hw)) > + usleep_range(50, 100); > + } > + writel(0, rx_ring->tail); > > /* Enable Receive Checksum Offload for TCP and UDP */ > rxcsum = er32(RXCSUM); > @@ -5908,8 +5963,8 @@ static netdev_tx_t e1000_xmit_frame(struct > sk_buff *skb, > if (!netdev_xmit_more() || > netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) > { > if (adapter->flags2 & > FLAG2_PCIM2PCI_ARBITER_WA) > - e1000e_update_tdt_wa(tx_ring, > - tx_ring- > >next_to_use); > + e1000e_update_dt_wa(tx_ring, > + tx_ring- > >next_to_use); > else > writel(tx_ring->next_to_use, tx_ring- > >tail); > } > @@ -7271,6 +7326,9 @@ static int e1000_probe(struct pci_dev *pdev, > const struct pci_device_id *ent) > queue_delayed_work(adapter->e1000_workqueue, &adapter- > >watchdog_task, > 0); > > + INIT_DELAYED_WORK(&adapter->delay_ring_write_task, > + e1000e_delay_ring_write_task); > + > timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, > 0); > > INIT_WORK(&adapter->reset_task, e1000_reset_task); > @@ -7424,6 +7482,7 @@ static void e1000_remove(struct pci_dev *pdev) > cancel_work_sync(&adapter->print_hang_task); > > cancel_delayed_work(&adapter->watchdog_task); > + cancel_delayed_work(&adapter->delay_ring_write_task); > flush_workqueue(adapter->e1000_workqueue); > destroy_workqueue(adapter->e1000_workqueue); >