linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
@ 2021-12-14  5:33 Kai-Heng Feng
  2021-12-14  5:46 ` Kalle Valo
  2021-12-14  5:59 ` Pkshih
  0 siblings, 2 replies; 6+ messages in thread
From: Kai-Heng Feng @ 2021-12-14  5:33 UTC (permalink / raw)
  To: tony0620emma, pkshih
  Cc: jian-hong, Kai-Heng Feng, Kalle Valo, David S. Miller,
	Jakub Kicinski, Po-Hao Huang, Brian Norris, linux-wireless,
	netdev, linux-kernel

Many Intel based platforms face system random freeze after commit
9e2fd29864c5 ("rtw88: add napi support").

The commit itself shouldn't be the culprit. My guess is that the 8821CE
only leaves ASPM L1 for a short period when IRQ is raised. Since IRQ is
masked during NAPI polling, the PCIe link stays at L1 and makes RX DMA
extremely slow. Eventually the RX ring becomes messed up:
[ 1133.194697] rtw_8821ce 0000:02:00.0: pci bus timeout, check dma status

Since the 8821CE hardware may fail to leave ASPM L1, manually do it in
the driver to resolve the issue.

Fixes: 9e2fd29864c5 ("rtw88: add napi support")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215131
BugLink: https://bugs.launchpad.net/bugs/1927808
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
v2:
 - Add default value for module parameter.

 drivers/net/wireless/realtek/rtw88/pci.c | 74 ++++++++----------------
 drivers/net/wireless/realtek/rtw88/pci.h |  1 +
 2 files changed, 24 insertions(+), 51 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 3b367c9085eba..4ab75ac2500e9 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -2,7 +2,6 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
-#include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include "main.h"
@@ -16,10 +15,13 @@
 
 static bool rtw_disable_msi;
 static bool rtw_pci_disable_aspm;
+static int rtw_rx_aspm = -1;
 module_param_named(disable_msi, rtw_disable_msi, bool, 0644);
 module_param_named(disable_aspm, rtw_pci_disable_aspm, bool, 0644);
+module_param_named(rx_aspm, rtw_rx_aspm, int, 0444);
 MODULE_PARM_DESC(disable_msi, "Set Y to disable MSI interrupt support");
 MODULE_PARM_DESC(disable_aspm, "Set Y to disable PCI ASPM support");
+MODULE_PARM_DESC(rx_aspm, "Use PCIe ASPM for RX (0=disable, 1=enable, -1=default)");
 
 static u32 rtw_pci_tx_queue_idx_addr[] = {
 	[RTW_TX_QUEUE_BK]	= RTK_PCI_TXBD_IDX_BKQ,
@@ -1409,7 +1411,11 @@ static void rtw_pci_link_ps(struct rtw_dev *rtwdev, bool enter)
 	 * throughput. This is probably because the ASPM behavior slightly
 	 * varies from different SOC.
 	 */
-	if (rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1)
+	if (!(rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1))
+		return;
+
+	if ((enter && atomic_dec_return(&rtwpci->link_usage) == 0) ||
+	    (!enter && atomic_inc_return(&rtwpci->link_usage) == 1))
 		rtw_pci_aspm_set(rtwdev, enter);
 }
 
@@ -1658,6 +1664,9 @@ static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
 					      priv);
 	int work_done = 0;
 
+	if (!rtw_rx_aspm)
+		rtw_pci_link_ps(rtwdev, false);
+
 	while (work_done < budget) {
 		u32 work_done_once;
 
@@ -1681,6 +1690,8 @@ static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
 		if (rtw_pci_get_hw_rx_ring_nr(rtwdev, rtwpci))
 			napi_schedule(napi);
 	}
+	if (!rtw_rx_aspm)
+		rtw_pci_link_ps(rtwdev, true);
 
 	return work_done;
 }
@@ -1702,59 +1713,13 @@ static void rtw_pci_napi_deinit(struct rtw_dev *rtwdev)
 	netif_napi_del(&rtwpci->napi);
 }
 
-enum rtw88_quirk_dis_pci_caps {
-	QUIRK_DIS_PCI_CAP_MSI,
-	QUIRK_DIS_PCI_CAP_ASPM,
-};
-
-static int disable_pci_caps(const struct dmi_system_id *dmi)
-{
-	uintptr_t dis_caps = (uintptr_t)dmi->driver_data;
-
-	if (dis_caps & BIT(QUIRK_DIS_PCI_CAP_MSI))
-		rtw_disable_msi = true;
-	if (dis_caps & BIT(QUIRK_DIS_PCI_CAP_ASPM))
-		rtw_pci_disable_aspm = true;
-
-	return 1;
-}
-
-static const struct dmi_system_id rtw88_pci_quirks[] = {
-	{
-		.callback = disable_pci_caps,
-		.ident = "Protempo Ltd L116HTN6SPW",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Protempo Ltd"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "L116HTN6SPW"),
-		},
-		.driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
-	},
-	{
-		.callback = disable_pci_caps,
-		.ident = "HP HP Pavilion Laptop 14-ce0xxx",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "HP"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Laptop 14-ce0xxx"),
-		},
-		.driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
-	},
-	{
-		.callback = disable_pci_caps,
-		.ident = "HP HP 250 G7 Notebook PC",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "HP"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "HP 250 G7 Notebook PC"),
-		},
-		.driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
-	},
-	{}
-};
-
 int rtw_pci_probe(struct pci_dev *pdev,
 		  const struct pci_device_id *id)
 {
+	struct pci_dev *bridge = pci_upstream_bridge(pdev);
 	struct ieee80211_hw *hw;
 	struct rtw_dev *rtwdev;
+	struct rtw_pci *rtwpci;
 	int drv_data_size;
 	int ret;
 
@@ -1772,6 +1737,9 @@ int rtw_pci_probe(struct pci_dev *pdev,
 	rtwdev->hci.ops = &rtw_pci_ops;
 	rtwdev->hci.type = RTW_HCI_TYPE_PCIE;
 
+	rtwpci = (struct rtw_pci *)rtwdev->priv;
+	atomic_set(&rtwpci->link_usage, 1);
+
 	ret = rtw_core_init(rtwdev);
 	if (ret)
 		goto err_release_hw;
@@ -1800,7 +1768,11 @@ int rtw_pci_probe(struct pci_dev *pdev,
 		goto err_destroy_pci;
 	}
 
-	dmi_check_system(rtw88_pci_quirks);
+	/* Disable PCIe ASPM L1 while doing NAPI poll for 8821CE */
+	if (pdev->device == 0xc821 && bridge->vendor == PCI_VENDOR_ID_INTEL &&
+	    rtw_rx_aspm == -1)
+		rtw_rx_aspm = 0;
+
 	rtw_pci_phy_cfg(rtwdev);
 
 	ret = rtw_register_hw(rtwdev, hw);
diff --git a/drivers/net/wireless/realtek/rtw88/pci.h b/drivers/net/wireless/realtek/rtw88/pci.h
index 66f78eb7757c5..0aaa12ea03739 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.h
+++ b/drivers/net/wireless/realtek/rtw88/pci.h
@@ -223,6 +223,7 @@ struct rtw_pci {
 	struct rtw_pci_tx_ring tx_rings[RTK_MAX_TX_QUEUE_NUM];
 	struct rtw_pci_rx_ring rx_rings[RTK_MAX_RX_QUEUE_NUM];
 	u16 link_ctrl;
+	atomic_t link_usage;
 	DECLARE_BITMAP(flags, NUM_OF_RTW_PCI_FLAGS);
 
 	void __iomem *mmap;
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
  2021-12-14  5:33 [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE Kai-Heng Feng
@ 2021-12-14  5:46 ` Kalle Valo
  2021-12-14  6:03   ` Kai-Heng Feng
  2021-12-14  5:59 ` Pkshih
  1 sibling, 1 reply; 6+ messages in thread
From: Kalle Valo @ 2021-12-14  5:46 UTC (permalink / raw)
  To: Kai-Heng Feng
  Cc: tony0620emma, pkshih, jian-hong, David S. Miller, Jakub Kicinski,
	Po-Hao Huang, Brian Norris, linux-wireless, netdev, linux-kernel

Kai-Heng Feng <kai.heng.feng@canonical.com> writes:

> Many Intel based platforms face system random freeze after commit
> 9e2fd29864c5 ("rtw88: add napi support").
>
> The commit itself shouldn't be the culprit. My guess is that the 8821CE
> only leaves ASPM L1 for a short period when IRQ is raised. Since IRQ is
> masked during NAPI polling, the PCIe link stays at L1 and makes RX DMA
> extremely slow. Eventually the RX ring becomes messed up:
> [ 1133.194697] rtw_8821ce 0000:02:00.0: pci bus timeout, check dma status
>
> Since the 8821CE hardware may fail to leave ASPM L1, manually do it in
> the driver to resolve the issue.
>
> Fixes: 9e2fd29864c5 ("rtw88: add napi support")
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215131
> BugLink: https://bugs.launchpad.net/bugs/1927808
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

[...]

>  static bool rtw_disable_msi;
>  static bool rtw_pci_disable_aspm;
> +static int rtw_rx_aspm = -1;
>  module_param_named(disable_msi, rtw_disable_msi, bool, 0644);
>  module_param_named(disable_aspm, rtw_pci_disable_aspm, bool, 0644);
> +module_param_named(rx_aspm, rtw_rx_aspm, int, 0444);
>  MODULE_PARM_DESC(disable_msi, "Set Y to disable MSI interrupt support");
>  MODULE_PARM_DESC(disable_aspm, "Set Y to disable PCI ASPM support");
> +MODULE_PARM_DESC(rx_aspm, "Use PCIe ASPM for RX (0=disable, 1=enable, -1=default)")

We already have disable_aspm parameter, why do we need yet another one?
There's a high bar for new module parameters.

-- 
https://patchwork.kernel.org/project/linux-wireless/list/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
  2021-12-14  5:33 [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE Kai-Heng Feng
  2021-12-14  5:46 ` Kalle Valo
@ 2021-12-14  5:59 ` Pkshih
  2021-12-14  6:06   ` Kai-Heng Feng
  1 sibling, 1 reply; 6+ messages in thread
From: Pkshih @ 2021-12-14  5:59 UTC (permalink / raw)
  To: Kai-Heng Feng, tony0620emma
  Cc: jian-hong, Kalle Valo, David S. Miller, Jakub Kicinski,
	Bernie Huang, Brian Norris, linux-wireless, netdev, linux-kernel


> -----Original Message-----
> From: Kai-Heng Feng <kai.heng.feng@canonical.com>
> Sent: Tuesday, December 14, 2021 1:33 PM
> To: tony0620emma@gmail.com; Pkshih <pkshih@realtek.com>
> Cc: jian-hong@endlessm.com; Kai-Heng Feng <kai.heng.feng@canonical.com>; Kalle Valo
> <kvalo@codeaurora.org>; David S. Miller <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>; Bernie
> Huang <phhuang@realtek.com>; Brian Norris <briannorris@chromium.org>; linux-wireless@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
> 
> Many Intel based platforms face system random freeze after commit
> 9e2fd29864c5 ("rtw88: add napi support").
> 
> The commit itself shouldn't be the culprit. My guess is that the 8821CE
> only leaves ASPM L1 for a short period when IRQ is raised. Since IRQ is
> masked during NAPI polling, the PCIe link stays at L1 and makes RX DMA
> extremely slow. Eventually the RX ring becomes messed up:
> [ 1133.194697] rtw_8821ce 0000:02:00.0: pci bus timeout, check dma status
> 
> Since the 8821CE hardware may fail to leave ASPM L1, manually do it in
> the driver to resolve the issue.
> 
> Fixes: 9e2fd29864c5 ("rtw88: add napi support")
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215131
> BugLink: https://bugs.launchpad.net/bugs/1927808
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> ---
> v2:
>  - Add default value for module parameter.
> 
>  drivers/net/wireless/realtek/rtw88/pci.c | 74 ++++++++----------------
>  drivers/net/wireless/realtek/rtw88/pci.h |  1 +
>  2 files changed, 24 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
> index 3b367c9085eba..4ab75ac2500e9 100644
> --- a/drivers/net/wireless/realtek/rtw88/pci.c
> +++ b/drivers/net/wireless/realtek/rtw88/pci.c
> @@ -2,7 +2,6 @@
>  /* Copyright(c) 2018-2019  Realtek Corporation
>   */
> 
> -#include <linux/dmi.h>
>  #include <linux/module.h>
>  #include <linux/pci.h>
>  #include "main.h"
> @@ -16,10 +15,13 @@
> 
>  static bool rtw_disable_msi;
>  static bool rtw_pci_disable_aspm;
> +static int rtw_rx_aspm = -1;
>  module_param_named(disable_msi, rtw_disable_msi, bool, 0644);
>  module_param_named(disable_aspm, rtw_pci_disable_aspm, bool, 0644);
> +module_param_named(rx_aspm, rtw_rx_aspm, int, 0444);
>  MODULE_PARM_DESC(disable_msi, "Set Y to disable MSI interrupt support");
>  MODULE_PARM_DESC(disable_aspm, "Set Y to disable PCI ASPM support");
> +MODULE_PARM_DESC(rx_aspm, "Use PCIe ASPM for RX (0=disable, 1=enable, -1=default)");
> 
>  static u32 rtw_pci_tx_queue_idx_addr[] = {
>  	[RTW_TX_QUEUE_BK]	= RTK_PCI_TXBD_IDX_BKQ,
> @@ -1409,7 +1411,11 @@ static void rtw_pci_link_ps(struct rtw_dev *rtwdev, bool enter)
>  	 * throughput. This is probably because the ASPM behavior slightly
>  	 * varies from different SOC.
>  	 */
> -	if (rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1)
> +	if (!(rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1))
> +		return;
> +
> +	if ((enter && atomic_dec_return(&rtwpci->link_usage) == 0) ||
> +	    (!enter && atomic_inc_return(&rtwpci->link_usage) == 1))
>  		rtw_pci_aspm_set(rtwdev, enter);
>  }
> 

I found calling pci_link_ps isn't always symmetric, so we need to reset
ref_cnt at pci_start() like below, or we can't enter rtw_pci_aspm_set()
anymore. The negative flow I face is 
ifup -> connect AP -> ifdown -> ifup (ref_cnt isn't expected now).


@@ -582,6 +582,8 @@ static int rtw_pci_start(struct rtw_dev *rtwdev)
        rtw_pci_napi_start(rtwdev);

        spin_lock_bh(&rtwpci->irq_lock);
+       atomic_set(&rtwpci->link_usage, 1);
        rtwpci->running = true;
        rtw_pci_enable_interrupt(rtwdev, rtwpci, false);
        spin_unlock_bh(&rtwpci->irq_lock);

--
Ping-Ke



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
  2021-12-14  5:46 ` Kalle Valo
@ 2021-12-14  6:03   ` Kai-Heng Feng
  0 siblings, 0 replies; 6+ messages in thread
From: Kai-Heng Feng @ 2021-12-14  6:03 UTC (permalink / raw)
  To: Kalle Valo
  Cc: tony0620emma, pkshih, jhp, David S. Miller, Jakub Kicinski,
	Po-Hao Huang, Brian Norris, linux-wireless, netdev, linux-kernel

On Tue, Dec 14, 2021 at 1:46 PM Kalle Valo <kvalo@kernel.org> wrote:
>
> Kai-Heng Feng <kai.heng.feng@canonical.com> writes:
>
> > Many Intel based platforms face system random freeze after commit
> > 9e2fd29864c5 ("rtw88: add napi support").
> >
> > The commit itself shouldn't be the culprit. My guess is that the 8821CE
> > only leaves ASPM L1 for a short period when IRQ is raised. Since IRQ is
> > masked during NAPI polling, the PCIe link stays at L1 and makes RX DMA
> > extremely slow. Eventually the RX ring becomes messed up:
> > [ 1133.194697] rtw_8821ce 0000:02:00.0: pci bus timeout, check dma status
> >
> > Since the 8821CE hardware may fail to leave ASPM L1, manually do it in
> > the driver to resolve the issue.
> >
> > Fixes: 9e2fd29864c5 ("rtw88: add napi support")
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215131
> > BugLink: https://bugs.launchpad.net/bugs/1927808
> > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
>
> [...]
>
> >  static bool rtw_disable_msi;
> >  static bool rtw_pci_disable_aspm;
> > +static int rtw_rx_aspm = -1;
> >  module_param_named(disable_msi, rtw_disable_msi, bool, 0644);
> >  module_param_named(disable_aspm, rtw_pci_disable_aspm, bool, 0644);
> > +module_param_named(rx_aspm, rtw_rx_aspm, int, 0444);
> >  MODULE_PARM_DESC(disable_msi, "Set Y to disable MSI interrupt support");
> >  MODULE_PARM_DESC(disable_aspm, "Set Y to disable PCI ASPM support");
> > +MODULE_PARM_DESC(rx_aspm, "Use PCIe ASPM for RX (0=disable, 1=enable, -1=default)")
>
> We already have disable_aspm parameter, why do we need yet another one?
> There's a high bar for new module parameters.

It's a good way for (un)affected users to try out different settings.
But yes the parameter isn't necessary. Let me send another version without it.

Kai-Heng

>
> --
> https://patchwork.kernel.org/project/linux-wireless/list/
>
> https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
  2021-12-14  5:59 ` Pkshih
@ 2021-12-14  6:06   ` Kai-Heng Feng
  2021-12-14  6:17     ` Pkshih
  0 siblings, 1 reply; 6+ messages in thread
From: Kai-Heng Feng @ 2021-12-14  6:06 UTC (permalink / raw)
  To: Pkshih
  Cc: tony0620emma, jian-hong@endlessm.com, Kalle Valo,
	David S. Miller, Jakub Kicinski, Bernie Huang, Brian Norris,
	linux-wireless, netdev, linux-kernel

On Tue, Dec 14, 2021 at 1:59 PM Pkshih <pkshih@realtek.com> wrote:
>
>
> > -----Original Message-----
> > From: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > Sent: Tuesday, December 14, 2021 1:33 PM
> > To: tony0620emma@gmail.com; Pkshih <pkshih@realtek.com>
> > Cc: jian-hong@endlessm.com; Kai-Heng Feng <kai.heng.feng@canonical.com>; Kalle Valo
> > <kvalo@codeaurora.org>; David S. Miller <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>; Bernie
> > Huang <phhuang@realtek.com>; Brian Norris <briannorris@chromium.org>; linux-wireless@vger.kernel.org;
> > netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
> >
> > Many Intel based platforms face system random freeze after commit
> > 9e2fd29864c5 ("rtw88: add napi support").
> >
> > The commit itself shouldn't be the culprit. My guess is that the 8821CE
> > only leaves ASPM L1 for a short period when IRQ is raised. Since IRQ is
> > masked during NAPI polling, the PCIe link stays at L1 and makes RX DMA
> > extremely slow. Eventually the RX ring becomes messed up:
> > [ 1133.194697] rtw_8821ce 0000:02:00.0: pci bus timeout, check dma status
> >
> > Since the 8821CE hardware may fail to leave ASPM L1, manually do it in
> > the driver to resolve the issue.
> >
> > Fixes: 9e2fd29864c5 ("rtw88: add napi support")
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215131
> > BugLink: https://bugs.launchpad.net/bugs/1927808
> > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > ---
> > v2:
> >  - Add default value for module parameter.
> >
> >  drivers/net/wireless/realtek/rtw88/pci.c | 74 ++++++++----------------
> >  drivers/net/wireless/realtek/rtw88/pci.h |  1 +
> >  2 files changed, 24 insertions(+), 51 deletions(-)
> >
> > diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
> > index 3b367c9085eba..4ab75ac2500e9 100644
> > --- a/drivers/net/wireless/realtek/rtw88/pci.c
> > +++ b/drivers/net/wireless/realtek/rtw88/pci.c
> > @@ -2,7 +2,6 @@
> >  /* Copyright(c) 2018-2019  Realtek Corporation
> >   */
> >
> > -#include <linux/dmi.h>
> >  #include <linux/module.h>
> >  #include <linux/pci.h>
> >  #include "main.h"
> > @@ -16,10 +15,13 @@
> >
> >  static bool rtw_disable_msi;
> >  static bool rtw_pci_disable_aspm;
> > +static int rtw_rx_aspm = -1;
> >  module_param_named(disable_msi, rtw_disable_msi, bool, 0644);
> >  module_param_named(disable_aspm, rtw_pci_disable_aspm, bool, 0644);
> > +module_param_named(rx_aspm, rtw_rx_aspm, int, 0444);
> >  MODULE_PARM_DESC(disable_msi, "Set Y to disable MSI interrupt support");
> >  MODULE_PARM_DESC(disable_aspm, "Set Y to disable PCI ASPM support");
> > +MODULE_PARM_DESC(rx_aspm, "Use PCIe ASPM for RX (0=disable, 1=enable, -1=default)");
> >
> >  static u32 rtw_pci_tx_queue_idx_addr[] = {
> >       [RTW_TX_QUEUE_BK]       = RTK_PCI_TXBD_IDX_BKQ,
> > @@ -1409,7 +1411,11 @@ static void rtw_pci_link_ps(struct rtw_dev *rtwdev, bool enter)
> >        * throughput. This is probably because the ASPM behavior slightly
> >        * varies from different SOC.
> >        */
> > -     if (rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1)
> > +     if (!(rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1))
> > +             return;
> > +
> > +     if ((enter && atomic_dec_return(&rtwpci->link_usage) == 0) ||
> > +         (!enter && atomic_inc_return(&rtwpci->link_usage) == 1))
> >               rtw_pci_aspm_set(rtwdev, enter);
> >  }
> >
>
> I found calling pci_link_ps isn't always symmetric, so we need to reset
> ref_cnt at pci_start() like below, or we can't enter rtw_pci_aspm_set()
> anymore. The negative flow I face is
> ifup -> connect AP -> ifdown -> ifup (ref_cnt isn't expected now).

Is it expected to be asymmetric?
If it's intended to be this way, I'll change where we set link_usage.
Otherwise I think making it symmetric makes more sense.

Kai-Heng

>
>
> @@ -582,6 +582,8 @@ static int rtw_pci_start(struct rtw_dev *rtwdev)
>         rtw_pci_napi_start(rtwdev);
>
>         spin_lock_bh(&rtwpci->irq_lock);
> +       atomic_set(&rtwpci->link_usage, 1);
>         rtwpci->running = true;
>         rtw_pci_enable_interrupt(rtwdev, rtwpci, false);
>         spin_unlock_bh(&rtwpci->irq_lock);
>
> --
> Ping-Ke
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
  2021-12-14  6:06   ` Kai-Heng Feng
@ 2021-12-14  6:17     ` Pkshih
  0 siblings, 0 replies; 6+ messages in thread
From: Pkshih @ 2021-12-14  6:17 UTC (permalink / raw)
  To: Kai-Heng Feng
  Cc: tony0620emma, jian-hong@endlessm.com, Kalle Valo,
	David S. Miller, Jakub Kicinski, Bernie Huang, Brian Norris,
	linux-wireless, netdev, linux-kernel


> -----Original Message-----
> From: Kai-Heng Feng <kai.heng.feng@canonical.com>
> Sent: Tuesday, December 14, 2021 2:07 PM
> To: Pkshih <pkshih@realtek.com>
> Cc: tony0620emma@gmail.com; jian-hong@endlessm.com <jhp@endlessos.org>; Kalle Valo <kvalo@codeaurora.org>;
> David S. Miller <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>; Bernie Huang
> <phhuang@realtek.com>; Brian Norris <briannorris@chromium.org>; linux-wireless@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
> 
> On Tue, Dec 14, 2021 at 1:59 PM Pkshih <pkshih@realtek.com> wrote:
> >
> >
> > > -----Original Message-----
> > > From: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > > Sent: Tuesday, December 14, 2021 1:33 PM
> > > To: tony0620emma@gmail.com; Pkshih <pkshih@realtek.com>
> > > Cc: jian-hong@endlessm.com; Kai-Heng Feng <kai.heng.feng@canonical.com>; Kalle Valo
> > > <kvalo@codeaurora.org>; David S. Miller <davem@davemloft.net>; Jakub Kicinski <kuba@kernel.org>; Bernie
> > > Huang <phhuang@realtek.com>; Brian Norris <briannorris@chromium.org>; linux-wireless@vger.kernel.org;
> > > netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE
> > >
> > > Many Intel based platforms face system random freeze after commit
> > > 9e2fd29864c5 ("rtw88: add napi support").
> > >
> > > The commit itself shouldn't be the culprit. My guess is that the 8821CE
> > > only leaves ASPM L1 for a short period when IRQ is raised. Since IRQ is
> > > masked during NAPI polling, the PCIe link stays at L1 and makes RX DMA
> > > extremely slow. Eventually the RX ring becomes messed up:
> > > [ 1133.194697] rtw_8821ce 0000:02:00.0: pci bus timeout, check dma status
> > >
> > > Since the 8821CE hardware may fail to leave ASPM L1, manually do it in
> > > the driver to resolve the issue.
> > >
> > > Fixes: 9e2fd29864c5 ("rtw88: add napi support")
> > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215131
> > > BugLink: https://bugs.launchpad.net/bugs/1927808
> > > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > > ---
> > > v2:
> > >  - Add default value for module parameter.
> > >
> > >  drivers/net/wireless/realtek/rtw88/pci.c | 74 ++++++++----------------
> > >  drivers/net/wireless/realtek/rtw88/pci.h |  1 +
> > >  2 files changed, 24 insertions(+), 51 deletions(-)
> > >
> > > diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
> > > index 3b367c9085eba..4ab75ac2500e9 100644
> > > --- a/drivers/net/wireless/realtek/rtw88/pci.c
> > > +++ b/drivers/net/wireless/realtek/rtw88/pci.c
> > > @@ -2,7 +2,6 @@
> > >  /* Copyright(c) 2018-2019  Realtek Corporation
> > >   */
> > >
> > > -#include <linux/dmi.h>
> > >  #include <linux/module.h>
> > >  #include <linux/pci.h>
> > >  #include "main.h"
> > > @@ -16,10 +15,13 @@
> > >
> > >  static bool rtw_disable_msi;
> > >  static bool rtw_pci_disable_aspm;
> > > +static int rtw_rx_aspm = -1;
> > >  module_param_named(disable_msi, rtw_disable_msi, bool, 0644);
> > >  module_param_named(disable_aspm, rtw_pci_disable_aspm, bool, 0644);
> > > +module_param_named(rx_aspm, rtw_rx_aspm, int, 0444);
> > >  MODULE_PARM_DESC(disable_msi, "Set Y to disable MSI interrupt support");
> > >  MODULE_PARM_DESC(disable_aspm, "Set Y to disable PCI ASPM support");
> > > +MODULE_PARM_DESC(rx_aspm, "Use PCIe ASPM for RX (0=disable, 1=enable, -1=default)");
> > >
> > >  static u32 rtw_pci_tx_queue_idx_addr[] = {
> > >       [RTW_TX_QUEUE_BK]       = RTK_PCI_TXBD_IDX_BKQ,
> > > @@ -1409,7 +1411,11 @@ static void rtw_pci_link_ps(struct rtw_dev *rtwdev, bool enter)
> > >        * throughput. This is probably because the ASPM behavior slightly
> > >        * varies from different SOC.
> > >        */
> > > -     if (rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1)
> > > +     if (!(rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1))
> > > +             return;
> > > +
> > > +     if ((enter && atomic_dec_return(&rtwpci->link_usage) == 0) ||
> > > +         (!enter && atomic_inc_return(&rtwpci->link_usage) == 1))
> > >               rtw_pci_aspm_set(rtwdev, enter);
> > >  }
> > >
> >
> > I found calling pci_link_ps isn't always symmetric, so we need to reset
> > ref_cnt at pci_start() like below, or we can't enter rtw_pci_aspm_set()
> > anymore. The negative flow I face is
> > ifup -> connect AP -> ifdown -> ifup (ref_cnt isn't expected now).
> 
> Is it expected to be asymmetric?
> If it's intended to be this way, I'll change where we set link_usage.
> Otherwise I think making it symmetric makes more sense.

Agree with your thought, but I don't remember clearly why it enters idle twice
in above flow. So, you may use two flags to indicate the state wanted by
two callers.

--
Ping-Ke



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-12-14  6:17 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-14  5:33 [PATCH v2] rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE Kai-Heng Feng
2021-12-14  5:46 ` Kalle Valo
2021-12-14  6:03   ` Kai-Heng Feng
2021-12-14  5:59 ` Pkshih
2021-12-14  6:06   ` Kai-Heng Feng
2021-12-14  6:17     ` Pkshih

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).