Linux-EFI Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2 1/1] efi: cper: print AER info of PCIe fatal error
@ 2019-07-26  1:43 Xiaofei Tan
  2019-08-12  6:51 ` tanxiaofei
  0 siblings, 1 reply; 2+ messages in thread
From: Xiaofei Tan @ 2019-07-26  1:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xiaofei Tan, linux-acpi, linux-efi, rjw, lenb, tony.luck, bp,
	ying.huang, ross.lagerwall, ard.biesheuvel, james.morse

AER info of PCIe fatal error is not printed in the current driver.
Because APEI driver will panic directly for fatal error, and can't
run to the place of printing AER info.

An example log is as following:
{763}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 11
{763}[Hardware Error]: event severity: fatal
{763}[Hardware Error]:  Error 0, type: fatal
{763}[Hardware Error]:   section_type: PCIe error
{763}[Hardware Error]:   port_type: 0, PCIe end point
{763}[Hardware Error]:   version: 4.0
{763}[Hardware Error]:   command: 0x0000, status: 0x0010
{763}[Hardware Error]:   device_id: 0000:82:00.0
{763}[Hardware Error]:   slot: 0
{763}[Hardware Error]:   secondary_bus: 0x00
{763}[Hardware Error]:   vendor_id: 0x8086, device_id: 0x10fb
{763}[Hardware Error]:   class_code: 000002
Kernel panic - not syncing: Fatal hardware error!

This issue was imported by the patch, '37448adfc7ce ("aerdrv: Move
cper_print_aer() call out of interrupt context")'. To fix this issue,
this patch adds print of AER info in cper_print_pcie() for fatal error.

Here is the example log after this patch applied:
{24}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 10
{24}[Hardware Error]: event severity: fatal
{24}[Hardware Error]:  Error 0, type: fatal
{24}[Hardware Error]:   section_type: PCIe error
{24}[Hardware Error]:   port_type: 0, PCIe end point
{24}[Hardware Error]:   version: 4.0
{24}[Hardware Error]:   command: 0x0546, status: 0x4010
{24}[Hardware Error]:   device_id: 0000:01:00.0
{24}[Hardware Error]:   slot: 0
{24}[Hardware Error]:   secondary_bus: 0x00
{24}[Hardware Error]:   vendor_id: 0x15b3, device_id: 0x1019
{24}[Hardware Error]:   class_code: 000002
{24}[Hardware Error]:   aer_uncor_status: 0x00040000, aer_uncor_mask: 0x00000000
{24}[Hardware Error]:   aer_uncor_severity: 0x00062010
{24}[Hardware Error]:   TLP Header: 000000c0 01010000 00000001 00000000
Kernel panic - not syncing: Fatal hardware error!

Fixes: 37448adfc7ce ("aerdrv: Move cper_print_aer() call out of interrupt context")
Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
---
 drivers/firmware/efi/cper.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 8fa977c..78b8922 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -390,6 +390,21 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 		printk(
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
+
+	/* Fatal errors call __ghes_panic() before AER handler prints this */
+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO &&
+	    gdata->error_severity & CPER_SEV_FATAL) {
+		struct aer_capability_regs *aer;
+
+		aer = (struct aer_capability_regs *)pcie->aer_info;
+		printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
+		       pfx, aer->uncor_status, aer->uncor_mask);
+		printk("%saer_uncor_severity: 0x%08x\n",
+		       pfx, aer->uncor_severity);
+		printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
+		       aer->header_log.dw0, aer->header_log.dw1,
+		       aer->header_log.dw2, aer->header_log.dw3);
+	}
 }
 
 static void cper_print_tstamp(const char *pfx,
-- 
2.8.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v2 1/1] efi: cper: print AER info of PCIe fatal error
  2019-07-26  1:43 [PATCH v2 1/1] efi: cper: print AER info of PCIe fatal error Xiaofei Tan
@ 2019-08-12  6:51 ` tanxiaofei
  0 siblings, 0 replies; 2+ messages in thread
From: tanxiaofei @ 2019-08-12  6:51 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-acpi, linux-efi, rjw, lenb, tony.luck, bp, ying.huang,
	ross.lagerwall, ard.biesheuvel, james.morse


ping...

On 2019/7/26 9:43, Xiaofei Tan wrote:
> AER info of PCIe fatal error is not printed in the current driver.
> Because APEI driver will panic directly for fatal error, and can't
> run to the place of printing AER info.
> 
> An example log is as following:
> {763}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 11
> {763}[Hardware Error]: event severity: fatal
> {763}[Hardware Error]:  Error 0, type: fatal
> {763}[Hardware Error]:   section_type: PCIe error
> {763}[Hardware Error]:   port_type: 0, PCIe end point
> {763}[Hardware Error]:   version: 4.0
> {763}[Hardware Error]:   command: 0x0000, status: 0x0010
> {763}[Hardware Error]:   device_id: 0000:82:00.0
> {763}[Hardware Error]:   slot: 0
> {763}[Hardware Error]:   secondary_bus: 0x00
> {763}[Hardware Error]:   vendor_id: 0x8086, device_id: 0x10fb
> {763}[Hardware Error]:   class_code: 000002
> Kernel panic - not syncing: Fatal hardware error!
> 
> This issue was imported by the patch, '37448adfc7ce ("aerdrv: Move
> cper_print_aer() call out of interrupt context")'. To fix this issue,
> this patch adds print of AER info in cper_print_pcie() for fatal error.
> 
> Here is the example log after this patch applied:
> {24}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 10
> {24}[Hardware Error]: event severity: fatal
> {24}[Hardware Error]:  Error 0, type: fatal
> {24}[Hardware Error]:   section_type: PCIe error
> {24}[Hardware Error]:   port_type: 0, PCIe end point
> {24}[Hardware Error]:   version: 4.0
> {24}[Hardware Error]:   command: 0x0546, status: 0x4010
> {24}[Hardware Error]:   device_id: 0000:01:00.0
> {24}[Hardware Error]:   slot: 0
> {24}[Hardware Error]:   secondary_bus: 0x00
> {24}[Hardware Error]:   vendor_id: 0x15b3, device_id: 0x1019
> {24}[Hardware Error]:   class_code: 000002
> {24}[Hardware Error]:   aer_uncor_status: 0x00040000, aer_uncor_mask: 0x00000000
> {24}[Hardware Error]:   aer_uncor_severity: 0x00062010
> {24}[Hardware Error]:   TLP Header: 000000c0 01010000 00000001 00000000
> Kernel panic - not syncing: Fatal hardware error!
> 
> Fixes: 37448adfc7ce ("aerdrv: Move cper_print_aer() call out of interrupt context")
> Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
> Reviewed-by: James Morse <james.morse@arm.com>
> ---
>  drivers/firmware/efi/cper.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index 8fa977c..78b8922 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -390,6 +390,21 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
>  		printk(
>  	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
>  	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
> +
> +	/* Fatal errors call __ghes_panic() before AER handler prints this */
> +	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO &&
> +	    gdata->error_severity & CPER_SEV_FATAL) {
> +		struct aer_capability_regs *aer;
> +
> +		aer = (struct aer_capability_regs *)pcie->aer_info;
> +		printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
> +		       pfx, aer->uncor_status, aer->uncor_mask);
> +		printk("%saer_uncor_severity: 0x%08x\n",
> +		       pfx, aer->uncor_severity);
> +		printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
> +		       aer->header_log.dw0, aer->header_log.dw1,
> +		       aer->header_log.dw2, aer->header_log.dw3);
> +	}
>  }
>  
>  static void cper_print_tstamp(const char *pfx,
> 

-- 
 thanks
tanxiaofei


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-26  1:43 [PATCH v2 1/1] efi: cper: print AER info of PCIe fatal error Xiaofei Tan
2019-08-12  6:51 ` tanxiaofei

Linux-EFI Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-efi/0 linux-efi/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-efi linux-efi/ https://lore.kernel.org/linux-efi \
		linux-efi@vger.kernel.org linux-efi@archiver.kernel.org
	public-inbox-index linux-efi


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-efi


AGPL code for this site: git clone https://public-inbox.org/ public-inbox