Linux-PCI Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling
@ 2019-02-11  7:02 Dongdong Liu
  2019-02-11 15:46 ` Keith Busch
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Dongdong Liu @ 2019-02-11  7:02 UTC (permalink / raw)
  To: helgaas, keith.busch, okaya
  Cc: linux-pci, linuxarm, Dongdong Liu, stable, Bjorn Helgaas

Current info->severity have not assigned a value before calling
aer_get_device_error_info() and aer_get_device_error_info(),
Fix the bug to get the severity by reading the port's AER status, mask
and severity registers. At the same time, add code to clear the port's
fatal errors.

Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling")
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
Cc: stable@vger.kernel.org
Cc: Keith Busch <keith.busch@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/dpc.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index e435d12..7b77754 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -202,6 +202,28 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
 	pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
 }
 
+static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
+					  struct aer_err_info *info)
+{
+	int pos = dev->aer_cap;
+	u32 status, mask, sev;
+
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
+	status &= ~mask;
+	if (!status)
+		return 0;
+
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
+	status &= sev;
+	if (status)
+		info->severity = AER_FATAL;
+	else
+		info->severity = AER_NONFATAL;
+
+	return 1;
+}
+
 static irqreturn_t dpc_handler(int irq, void *context)
 {
 	struct aer_err_info info;
@@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context)
 	/* show RP PIO error detail information */
 	if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
 		dpc_process_rp_pio_error(dpc);
-	else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
+	else if (reason == 0 &&
+		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
+		 aer_get_device_error_info(pdev, &info)) {
 		aer_print_error(pdev, &info);
 		pci_cleanup_aer_uncorrect_error_status(pdev);
+		pci_aer_clear_fatal_status(pdev);
 	}
 
 	/* We configure DPC so it only triggers on ERR_FATAL */
-- 
1.9.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling
  2019-02-11  7:02 [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling Dongdong Liu
@ 2019-02-11 15:46 ` Keith Busch
  2019-02-12  2:46   ` Dongdong Liu
  2019-02-15 15:11 ` Keith Busch
  2019-02-21 23:16 ` Bjorn Helgaas
  2 siblings, 1 reply; 5+ messages in thread
From: Keith Busch @ 2019-02-11 15:46 UTC (permalink / raw)
  To: Dongdong Liu; +Cc: helgaas, okaya, linux-pci, linuxarm, stable, Bjorn Helgaas

On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
> +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
> +					  struct aer_err_info *info)
> +{
> +	int pos = dev->aer_cap;
> +	u32 status, mask, sev;
> +
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
> +	status &= ~mask;
> +	if (!status)
> +		return 0;
> +
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
> +	status &= sev;
> +	if (status)
> +		info->severity = AER_FATAL;
> +	else
> +		info->severity = AER_NONFATAL;
> +
> +	return 1;
> +}
> +

You can set info->severity to AER_FATAL since that's the only type we
enable DPC triggering.

>  static irqreturn_t dpc_handler(int irq, void *context)
>  {
>  	struct aer_err_info info;
> @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context)
>  	/* show RP PIO error detail information */
>  	if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
>  		dpc_process_rp_pio_error(dpc);
> -	else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
> +	else if (reason == 0 &&
> +		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
> +		 aer_get_device_error_info(pdev, &info)) {
>  		aer_print_error(pdev, &info);
>  		pci_cleanup_aer_uncorrect_error_status(pdev);
> +		pci_aer_clear_fatal_status(pdev);

Good catch here, but let's clear the pending bits with a single call
to pci_cleanup_aer_error_status_regs() rather than NONFATAL and
FATAL separately.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling
  2019-02-11 15:46 ` Keith Busch
@ 2019-02-12  2:46   ` Dongdong Liu
  0 siblings, 0 replies; 5+ messages in thread
From: Dongdong Liu @ 2019-02-12  2:46 UTC (permalink / raw)
  To: Keith Busch; +Cc: helgaas, okaya, linux-pci, linuxarm, stable, Bjorn Helgaas

Hi Keith

Many thanks for your review.

在 2019/2/11 23:46, Keith Busch 写道:
> On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
>> +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>> +					  struct aer_err_info *info)
>> +{
>> +	int pos = dev->aer_cap;
>> +	u32 status, mask, sev;
>> +
>> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
>> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
>> +	status &= ~mask;
>> +	if (!status)
>> +		return 0;
>> +
>> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
>> +	status &= sev;
>> +	if (status)
>> +		info->severity = AER_FATAL;
>> +	else
>> +		info->severity = AER_NONFATAL;
>> +
>> +	return 1;
>> +}
>> +
>
> You can set info->severity to AER_FATAL since that's the only type we
> enable DPC triggering.

DPC Trigger Enable
01b-DPC is enabled and is triggered when the Downstream Port detects
an unmasked uncorrectable error or when the Downstream Port receives an
ERR_FATAL Message.

DPC Trigger Reason
00b-DPC was triggered due to an unmasked uncorrectable error
reason == 0, due to detect an unmasked uncorrectable error, include non-fatal
and fatal error, so need to get the severity.

>
>>  static irqreturn_t dpc_handler(int irq, void *context)
>>  {
>>  	struct aer_err_info info;
>> @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context)
>>  	/* show RP PIO error detail information */
>>  	if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
>>  		dpc_process_rp_pio_error(dpc);
>> -	else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
>> +	else if (reason == 0 &&
>> +		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
>> +		 aer_get_device_error_info(pdev, &info)) {
>>  		aer_print_error(pdev, &info);
>>  		pci_cleanup_aer_uncorrect_error_status(pdev);
>> +		pci_aer_clear_fatal_status(pdev);
>
> Good catch here, but let's clear the pending bits with a single call
> to pci_cleanup_aer_error_status_regs() rather than NONFATAL and
> FATAL separately.

pci_cleanup_aer_error_status_regs() also clear correctable error status.
seems not good enough as reason == 0 means detect an unmasked uncorrectable error.

Thanks,
Dongdong
>
> .
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling
  2019-02-11  7:02 [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling Dongdong Liu
  2019-02-11 15:46 ` Keith Busch
@ 2019-02-15 15:11 ` Keith Busch
  2019-02-21 23:16 ` Bjorn Helgaas
  2 siblings, 0 replies; 5+ messages in thread
From: Keith Busch @ 2019-02-15 15:11 UTC (permalink / raw)
  To: Dongdong Liu; +Cc: helgaas, okaya, linux-pci, linuxarm, stable, Bjorn Helgaas

On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
> Current info->severity have not assigned a value before calling
> aer_get_device_error_info() and aer_get_device_error_info(),
> Fix the bug to get the severity by reading the port's AER status, mask
> and severity registers. At the same time, add code to clear the port's
> fatal errors.
> 
> Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling")
> Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
> Cc: stable@vger.kernel.org
> Cc: Keith Busch <keith.busch@intel.com>
> Cc: Bjorn Helgaas <bhelgaas@google.com>


Looks good.

Reviewed-by: Keith Busch <keith.busch@intel.com>

> ---
>  drivers/pci/pcie/dpc.c | 27 ++++++++++++++++++++++++++-
>  1 file changed, 26 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index e435d12..7b77754 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -202,6 +202,28 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
>  	pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
>  }
>  
> +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
> +					  struct aer_err_info *info)
> +{
> +	int pos = dev->aer_cap;
> +	u32 status, mask, sev;
> +
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
> +	status &= ~mask;
> +	if (!status)
> +		return 0;
> +
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
> +	status &= sev;
> +	if (status)
> +		info->severity = AER_FATAL;
> +	else
> +		info->severity = AER_NONFATAL;
> +
> +	return 1;
> +}
> +
>  static irqreturn_t dpc_handler(int irq, void *context)
>  {
>  	struct aer_err_info info;
> @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context)
>  	/* show RP PIO error detail information */
>  	if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
>  		dpc_process_rp_pio_error(dpc);
> -	else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
> +	else if (reason == 0 &&
> +		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
> +		 aer_get_device_error_info(pdev, &info)) {
>  		aer_print_error(pdev, &info);
>  		pci_cleanup_aer_uncorrect_error_status(pdev);
> +		pci_aer_clear_fatal_status(pdev);
>  	}
>  
>  	/* We configure DPC so it only triggers on ERR_FATAL */
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling
  2019-02-11  7:02 [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling Dongdong Liu
  2019-02-11 15:46 ` Keith Busch
  2019-02-15 15:11 ` Keith Busch
@ 2019-02-21 23:16 ` Bjorn Helgaas
  2 siblings, 0 replies; 5+ messages in thread
From: Bjorn Helgaas @ 2019-02-21 23:16 UTC (permalink / raw)
  To: Dongdong Liu; +Cc: keith.busch, okaya, linux-pci, linuxarm, stable

On Mon, Feb 11, 2019 at 03:02:59PM +0800, Dongdong Liu wrote:
> Current info->severity have not assigned a value before calling
> aer_get_device_error_info() and aer_get_device_error_info(),
> Fix the bug to get the severity by reading the port's AER status, mask
> and severity registers. At the same time, add code to clear the port's
> fatal errors.
> 
> Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling")
> Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
> Cc: stable@vger.kernel.org
> Cc: Keith Busch <keith.busch@intel.com>
> Cc: Bjorn Helgaas <bhelgaas@google.com>

Applied to pci/dpc for v5.1, thanks!

> ---
>  drivers/pci/pcie/dpc.c | 27 ++++++++++++++++++++++++++-
>  1 file changed, 26 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index e435d12..7b77754 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -202,6 +202,28 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
>  	pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
>  }
>  
> +static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
> +					  struct aer_err_info *info)
> +{
> +	int pos = dev->aer_cap;
> +	u32 status, mask, sev;
> +
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
> +	status &= ~mask;
> +	if (!status)
> +		return 0;
> +
> +	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
> +	status &= sev;
> +	if (status)
> +		info->severity = AER_FATAL;
> +	else
> +		info->severity = AER_NONFATAL;
> +
> +	return 1;
> +}
> +
>  static irqreturn_t dpc_handler(int irq, void *context)
>  {
>  	struct aer_err_info info;
> @@ -229,9 +251,12 @@ static irqreturn_t dpc_handler(int irq, void *context)
>  	/* show RP PIO error detail information */
>  	if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
>  		dpc_process_rp_pio_error(dpc);
> -	else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
> +	else if (reason == 0 &&
> +		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
> +		 aer_get_device_error_info(pdev, &info)) {
>  		aer_print_error(pdev, &info);
>  		pci_cleanup_aer_uncorrect_error_status(pdev);
> +		pci_aer_clear_fatal_status(pdev);
>  	}
>  
>  	/* We configure DPC so it only triggers on ERR_FATAL */
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-11  7:02 [PATCH RESEND] PCI/DPC: Fix print AER status in DPC event handling Dongdong Liu
2019-02-11 15:46 ` Keith Busch
2019-02-12  2:46   ` Dongdong Liu
2019-02-15 15:11 ` Keith Busch
2019-02-21 23:16 ` Bjorn Helgaas

Linux-PCI Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-pci/0 linux-pci/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-pci linux-pci/ https://lore.kernel.org/linux-pci \
		linux-pci@vger.kernel.org linux-pci@archiver.kernel.org
	public-inbox-index linux-pci


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-pci


AGPL code for this site: git clone https://public-inbox.org/ public-inbox