linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] arm64: RAS: add ras extension runtime detection
@ 2017-03-03 10:39 Xie XiuQi
  2017-03-03 10:39 ` [PATCH 2/2] acpi: apei: handle SEI notification type for ARMv8 Xie XiuQi
  0 siblings, 1 reply; 4+ messages in thread
From: Xie XiuQi @ 2017-03-03 10:39 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, tbaicar, zjzhang, marc.zyngier,
	james.morse
  Cc: wangkefeng.wang, shiju.jose, guohanjun, hanjun.guo, fu.wei,
	wangxiongfeng2, zhengqiang10, linux-arm-kernel, linux-kernel,
	linux-acpi

According to <<RAS Extension PRD03>> document, we add RAS extension
feature runtime detection, which would be used for error recovery
in the future.

Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/arm64/include/asm/cpucaps.h |  3 ++-
 arch/arm64/include/asm/sysreg.h  |  2 ++
 arch/arm64/kernel/cpufeature.c   | 11 +++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index fb78a5d..3847cf8 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -37,7 +37,8 @@
 #define ARM64_HAS_NO_FPSIMD			16
 #define ARM64_WORKAROUND_REPEAT_TLBI		17
 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003	18
+#define ARM64_HAS_RAS_EXTN			19
 
-#define ARM64_NCAPS				19
+#define ARM64_NCAPS				20
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index ac24b6e..32964c7 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -157,6 +157,7 @@
 #define ID_AA64ISAR0_AES_SHIFT		4
 
 /* id_aa64pfr0 */
+#define ID_AA64PFR0_RAS_SHIFT		28
 #define ID_AA64PFR0_GIC_SHIFT		24
 #define ID_AA64PFR0_ASIMD_SHIFT		20
 #define ID_AA64PFR0_FP_SHIFT		16
@@ -165,6 +166,7 @@
 #define ID_AA64PFR0_EL1_SHIFT		4
 #define ID_AA64PFR0_EL0_SHIFT		0
 
+#define ID_AA64PFR0_RAS_V1		0x1
 #define ID_AA64PFR0_FP_NI		0xf
 #define ID_AA64PFR0_FP_SUPPORTED	0x0
 #define ID_AA64PFR0_ASIMD_NI		0xf
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index abda8e8..b0fb81e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -98,6 +98,7 @@
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_RAS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
@@ -860,6 +861,16 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
 		.min_field_value = 0,
 		.matches = has_no_fpsimd,
 	},
+	{
+		.desc = "ARM64 RAS Extension Support",
+		.capability = ARM64_HAS_RAS_EXTN,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64PFR0_RAS_SHIFT,
+		.min_field_value = ID_AA64PFR0_RAS_V1,
+	},
 	{},
 };
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] acpi: apei: handle SEI notification type for ARMv8
  2017-03-03 10:39 [PATCH 1/2] arm64: RAS: add ras extension runtime detection Xie XiuQi
@ 2017-03-03 10:39 ` Xie XiuQi
  2017-03-06 10:00   ` James Morse
  0 siblings, 1 reply; 4+ messages in thread
From: Xie XiuQi @ 2017-03-03 10:39 UTC (permalink / raw)
  To: catalin.marinas, will.deacon, tbaicar, zjzhang, marc.zyngier,
	james.morse
  Cc: wangkefeng.wang, shiju.jose, guohanjun, hanjun.guo, fu.wei,
	wangxiongfeng2, zhengqiang10, linux-arm-kernel, linux-kernel,
	linux-acpi

ARM APEI extension proposal added SEI (asynchronous SError interrupt)
notification type for ARMv8.

Add a new GHES error source handling function for SEI. In firmware
first mode, if an error source's notification type is SEI. Then GHES
could parse and report the detail error information.

Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
---
 arch/arm64/kernel/traps.c |  5 ++++
 drivers/acpi/apei/Kconfig | 14 ++++++++++++
 drivers/acpi/apei/ghes.c  | 58 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/acpi/ghes.h       |  1 +
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 7d47c2c..43f616d 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -44,6 +44,8 @@
 #include <asm/system_misc.h>
 #include <asm/sysreg.h>
 
+#include <acpi/ghes.h>
+
 static const char *handler[]= {
 	"Synchronous Abort",
 	"IRQ",
@@ -622,6 +624,9 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
 		handler[reason], smp_processor_id(), esr,
 		esr_get_class_string(esr));
 
+	if (IS_ENABLED(ACPI_APEI_SEI) && ESR_ELx_EC(esr) == ESR_ELx_EC_SERROR)
+		ghes_notify_sei();
+
 	die("Oops - bad mode", regs, 0);
 	local_irq_disable();
 	panic("bad mode");
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 1122d7f..a32f046 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -18,6 +18,20 @@ config HAVE_ACPI_APEI_SEA
 	  option allows the OS to look for such hardware error record, and
 	  take appropriate action.
 
+config ACPI_APEI_SEI
+	bool "APEI Asynchronous SError Interrupt logging/recovering support"
+	depends on ARM64 && ACPI_APEI_GHES
+	help
+	  This option should be enabled if the system supports
+	  firmware first handling of SEI (asynchronous SError interrupt).
+
+	  SEI happens with invalid instruction access or asynchronous exceptions
+	  on ARMv8 systems. If a system supports firmware first handling of SEI,
+	  the platform analyzes and handles hardware error notifications from
+	  SEI, and it may then form a HW error record for the OS to parse and
+	  handle. This option allows the OS to look for such hardware error
+	  record, and take appropriate action.
+
 config ACPI_APEI
 	bool "ACPI Platform Error Interface (APEI)"
 	select MISC_FILESYSTEMS
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 3e4ea1b..d084a09 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -850,6 +850,50 @@ static inline void ghes_sea_remove(struct ghes *ghes)
 }
 #endif /* CONFIG_HAVE_ACPI_APEI_SEA */
 
+#ifdef CONFIG_ACPI_APEI_SEI
+static LIST_HEAD(ghes_sei);
+
+void ghes_notify_sei(void)
+{
+	struct ghes *ghes;
+
+	/*
+	 * synchronize_rcu() will wait for nmi_exit(), so no need to
+	 * rcu_read_lock().
+	 */
+	list_for_each_entry_rcu(ghes, &ghes_sei, list) {
+		ghes_proc(ghes);
+	}
+}
+
+static void ghes_sei_add(struct ghes *ghes)
+{
+	mutex_lock(&ghes_list_mutex);
+	list_add_rcu(&ghes->list, &ghes_sei);
+	mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_sei_remove(struct ghes *ghes)
+{
+	mutex_lock(&ghes_list_mutex);
+	list_del_rcu(&ghes->list);
+	mutex_unlock(&ghes_list_mutex);
+	synchronize_rcu();
+}
+#else /* CONFIG_ACPI_APEI_SEI */
+static inline void ghes_sei_add(struct ghes *ghes)
+{
+	pr_err(GHES_PFX "ID: %d, trying to add SEI notification which is not supported\n",
+	       ghes->generic->header.source_id);
+}
+
+static inline void ghes_sei_remove(struct ghes *ghes)
+{
+	pr_err(GHES_PFX "ID: %d, trying to remove SEI notification which is not supported\n",
+	       ghes->generic->header.source_id);
+}
+#endif /* CONFIG_HAVE_ACPI_APEI_SEI */
+
 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
 /*
  * printk is not safe in NMI context.  So in NMI handler, we allocate
@@ -1099,6 +1143,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
 			goto err;
 		}
 		break;
+	case ACPI_HEST_NOTIFY_SEI:
+		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_SEI)) {
+			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEI is not supported!\n",
+				generic->header.source_id);
+			goto err;
+		}
+		break;
 	case ACPI_HEST_NOTIFY_NMI:
 		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
@@ -1111,7 +1162,6 @@ static int ghes_probe(struct platform_device *ghes_dev)
 			   generic->header.source_id);
 		goto err;
 	case ACPI_HEST_NOTIFY_GPIO:
-	case ACPI_HEST_NOTIFY_SEI:
 	case ACPI_HEST_NOTIFY_GSIV:
 		pr_warn(GHES_PFX "Generic hardware error source: %d notified via notification type %u is not supported\n",
 			generic->header.source_id, generic->header.source_id);
@@ -1174,6 +1224,9 @@ static int ghes_probe(struct platform_device *ghes_dev)
 	case ACPI_HEST_NOTIFY_SEA:
 		ghes_sea_add(ghes);
 		break;
+	case ACPI_HEST_NOTIFY_SEI:
+		ghes_sei_add(ghes);
+		break;
 	case ACPI_HEST_NOTIFY_NMI:
 		ghes_nmi_add(ghes);
 		break;
@@ -1219,6 +1272,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
 	case ACPI_HEST_NOTIFY_SEA:
 		ghes_sea_remove(ghes);
 		break;
+	case ACPI_HEST_NOTIFY_SEI:
+		ghes_sei_remove(ghes);
+		break;
 	case ACPI_HEST_NOTIFY_NMI:
 		ghes_nmi_remove(ghes);
 		break;
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 18bc935..7554658 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -100,5 +100,6 @@ static inline void *acpi_hest_generic_data_payload(struct acpi_hest_generic_data
 }
 
 void ghes_notify_sea(void);
+void ghes_notify_sei(void);
 
 #endif /* GHES_H */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] acpi: apei: handle SEI notification type for ARMv8
  2017-03-03 10:39 ` [PATCH 2/2] acpi: apei: handle SEI notification type for ARMv8 Xie XiuQi
@ 2017-03-06 10:00   ` James Morse
  2017-03-06 11:06     ` Xie XiuQi
  0 siblings, 1 reply; 4+ messages in thread
From: James Morse @ 2017-03-06 10:00 UTC (permalink / raw)
  To: Xie XiuQi, tbaicar
  Cc: catalin.marinas, will.deacon, zjzhang, marc.zyngier,
	linux-arm-kernel, wangkefeng.wang, linux-kernel, linux-acpi,
	hanjun.guo, guohanjun, zhengqiang10, wangxiongfeng2, fu.wei,
	shiju.jose

Hi Xie XiuQi,

On 03/03/17 10:39, Xie XiuQi wrote:
> ARM APEI extension proposal added SEI (asynchronous SError interrupt)
> notification type for ARMv8.
> 
> Add a new GHES error source handling function for SEI. In firmware
> first mode, if an error source's notification type is SEI. Then GHES
> could parse and report the detail error information.

This patch doesn't apply to any upstream tree. Is this based on Tyler's larger
UEFI/ACPI update series? If so, please mention this in your cover letter, (Nit:
please include a cover letter when sending two or more patches!).

What happens if the SError Interrupt arrives while KVM was doing its work? We
set the HCR_EL2.AMO bit when running a guest, so KVM may receive these instead
of the host kernel.


> diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
> index 1122d7f..a32f046 100644
> --- a/drivers/acpi/apei/Kconfig
> +++ b/drivers/acpi/apei/Kconfig
> @@ -18,6 +18,20 @@ config HAVE_ACPI_APEI_SEA
>  	  option allows the OS to look for such hardware error record, and
>  	  take appropriate action.
>  
> +config ACPI_APEI_SEI
> +	bool "APEI Asynchronous SError Interrupt logging/recovering support"
> +	depends on ARM64 && ACPI_APEI_GHES
> +	help
> +	  This option should be enabled if the system supports
> +	  firmware first handling of SEI (asynchronous SError interrupt).
> +
> +	  SEI happens with invalid instruction access or asynchronous exceptions
> +	  on ARMv8 systems. If a system supports firmware first handling of SEI,
> +	  the platform analyzes and handles hardware error notifications from
> +	  SEI, and it may then form a HW error record for the OS to parse and
> +	  handle. This option allows the OS to look for such hardware error
> +	  record, and take appropriate action.
> +
>  config ACPI_APEI
>  	bool "ACPI Platform Error Interface (APEI)"
>  	select MISC_FILESYSTEMS
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 3e4ea1b..d084a09 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -850,6 +850,50 @@ static inline void ghes_sea_remove(struct ghes *ghes)
>  }
>  #endif /* CONFIG_HAVE_ACPI_APEI_SEA */
>  
> +#ifdef CONFIG_ACPI_APEI_SEI
> +static LIST_HEAD(ghes_sei);
> +
> +void ghes_notify_sei(void)
> +{
> +	struct ghes *ghes;
> +
> +	/*
> +	 * synchronize_rcu() will wait for nmi_exit(), so no need to

Where nmi_exit()?

This nmi enter/exit was to prevent APEI being interrupted by APEI and trying to
take the same set of locks. APEI masks IRQs to prevent this happening normally,
but Synchronous External Abort couldn't be masked.
We don't mask Asynchronous Exceptions in APEI so the same thing can happen here.
Adding nmi_{enter,exit}() round the ghes call in the arch bad_mode() will
prevent this lockup.


Thanks,

James


> +	 * rcu_read_lock().
> +	 */
> +	list_for_each_entry_rcu(ghes, &ghes_sei, list) {
> +		ghes_proc(ghes);
> +	}
> +}
> +
> +static void ghes_sei_add(struct ghes *ghes)
> +{
> +	mutex_lock(&ghes_list_mutex);
> +	list_add_rcu(&ghes->list, &ghes_sei);
> +	mutex_unlock(&ghes_list_mutex);
> +}
> +
> +static void ghes_sei_remove(struct ghes *ghes)
> +{
> +	mutex_lock(&ghes_list_mutex);
> +	list_del_rcu(&ghes->list);
> +	mutex_unlock(&ghes_list_mutex);
> +	synchronize_rcu();
> +}
> +#else /* CONFIG_ACPI_APEI_SEI */
> +static inline void ghes_sei_add(struct ghes *ghes)
> +{
> +	pr_err(GHES_PFX "ID: %d, trying to add SEI notification which is not supported\n",
> +	       ghes->generic->header.source_id);
> +}
> +
> +static inline void ghes_sei_remove(struct ghes *ghes)
> +{
> +	pr_err(GHES_PFX "ID: %d, trying to remove SEI notification which is not supported\n",
> +	       ghes->generic->header.source_id);
> +}
> +#endif /* CONFIG_HAVE_ACPI_APEI_SEI */
> +
>  #ifdef CONFIG_HAVE_ACPI_APEI_NMI
>  /*
>   * printk is not safe in NMI context.  So in NMI handler, we allocate

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] acpi: apei: handle SEI notification type for ARMv8
  2017-03-06 10:00   ` James Morse
@ 2017-03-06 11:06     ` Xie XiuQi
  0 siblings, 0 replies; 4+ messages in thread
From: Xie XiuQi @ 2017-03-06 11:06 UTC (permalink / raw)
  To: James Morse, tbaicar
  Cc: catalin.marinas, will.deacon, zjzhang, marc.zyngier,
	linux-arm-kernel, wangkefeng.wang, linux-kernel, linux-acpi,
	hanjun.guo, guohanjun, zhengqiang10, wangxiongfeng2, fu.wei,
	shiju.jose

Hi James,

Thanks for your comments.

On 2017/3/6 18:00, James Morse wrote:
> Hi Xie XiuQi,
> 
> On 03/03/17 10:39, Xie XiuQi wrote:
>> ARM APEI extension proposal added SEI (asynchronous SError interrupt)
>> notification type for ARMv8.
>>
>> Add a new GHES error source handling function for SEI. In firmware
>> first mode, if an error source's notification type is SEI. Then GHES
>> could parse and report the detail error information.
> 
> This patch doesn't apply to any upstream tree. Is this based on Tyler's larger
> UEFI/ACPI update series? If so, please mention this in your cover letter, (Nit:
> please include a cover letter when sending two or more patches!).
> 

Yes, this patch is based on Tyler's series "[PATCH V11 00/10] Add UEFI 2.6 and ACPI 6.1 updates
for RAS on ARM64" and linux-next 20170302.

I'll add a cover letter next time, thanks.


> What happens if the SError Interrupt arrives while KVM was doing its work? We
> set the HCR_EL2.AMO bit when running a guest, so KVM may receive these instead
> of the host kernel.
> 

OK, I'll do it in next version.

> 
>> diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
>> index 1122d7f..a32f046 100644
>> --- a/drivers/acpi/apei/Kconfig
>> +++ b/drivers/acpi/apei/Kconfig
>> @@ -18,6 +18,20 @@ config HAVE_ACPI_APEI_SEA
>>  	  option allows the OS to look for such hardware error record, and
>>  	  take appropriate action.
>>  
>> +config ACPI_APEI_SEI
>> +	bool "APEI Asynchronous SError Interrupt logging/recovering support"
>> +	depends on ARM64 && ACPI_APEI_GHES
>> +	help
>> +	  This option should be enabled if the system supports
>> +	  firmware first handling of SEI (asynchronous SError interrupt).
>> +
>> +	  SEI happens with invalid instruction access or asynchronous exceptions
>> +	  on ARMv8 systems. If a system supports firmware first handling of SEI,
>> +	  the platform analyzes and handles hardware error notifications from
>> +	  SEI, and it may then form a HW error record for the OS to parse and
>> +	  handle. This option allows the OS to look for such hardware error
>> +	  record, and take appropriate action.
>> +
>>  config ACPI_APEI
>>  	bool "ACPI Platform Error Interface (APEI)"
>>  	select MISC_FILESYSTEMS
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 3e4ea1b..d084a09 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -850,6 +850,50 @@ static inline void ghes_sea_remove(struct ghes *ghes)
>>  }
>>  #endif /* CONFIG_HAVE_ACPI_APEI_SEA */
>>  
>> +#ifdef CONFIG_ACPI_APEI_SEI
>> +static LIST_HEAD(ghes_sei);
>> +
>> +void ghes_notify_sei(void)
>> +{
>> +	struct ghes *ghes;
>> +
>> +	/*
>> +	 * synchronize_rcu() will wait for nmi_exit(), so no need to
> 
> Where nmi_exit()?
> 
> This nmi enter/exit was to prevent APEI being interrupted by APEI and trying to
> take the same set of locks. APEI masks IRQs to prevent this happening normally,
> but Synchronous External Abort couldn't be masked.
> We don't mask Asynchronous Exceptions in APEI so the same thing can happen here.
> Adding nmi_{enter,exit}() round the ghes call in the arch bad_mode() will
> prevent this lockup.
> 

Thank you for your detailed explanation, I'll add it in next version.

Thanks,
Xie XiuQi

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-03-06 11:10 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-03 10:39 [PATCH 1/2] arm64: RAS: add ras extension runtime detection Xie XiuQi
2017-03-03 10:39 ` [PATCH 2/2] acpi: apei: handle SEI notification type for ARMv8 Xie XiuQi
2017-03-06 10:00   ` James Morse
2017-03-06 11:06     ` Xie XiuQi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).