All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] soc: tegra: Add Tegra186 ARI driver
@ 2021-06-17 12:13 ` Mikko Perttunen
  0 siblings, 0 replies; 8+ messages in thread
From: Mikko Perttunen @ 2021-06-17 12:13 UTC (permalink / raw)
  To: thierry.reding, jonathanh; +Cc: linux-tegra, linux-arm-kernel, Mikko Perttunen

Add a driver to hook into panic notifiers and print machine check
status for debugging. Status information is retrieved via SMC. This
is supported by upstream ARM Trusted Firmware.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
v2:
* Changed to use panic notifier instead of serror hook
---
 drivers/soc/tegra/Makefile       |  1 +
 drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 drivers/soc/tegra/ari-tegra186.c

diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile
index 9c809c1814bd..054e862b63d8 100644
--- a/drivers/soc/tegra/Makefile
+++ b/drivers/soc/tegra/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o
 obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o
 obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o
 obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o
+obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o
diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c
new file mode 100644
index 000000000000..02577853ec49
--- /dev/null
+++ b/drivers/soc/tegra/ari-tegra186.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/panic_notifier.h>
+
+#define SMC_SIP_INVOKE_MCE			0xc2ffff00
+#define MCE_SMC_READ_MCA			12
+
+#define MCA_ARI_CMD_RD_SERR			1
+
+#define MCA_ARI_RW_SUBIDX_STAT			1
+#define SERR_STATUS_VAL				BIT_ULL(63)
+
+#define MCA_ARI_RW_SUBIDX_ADDR			2
+#define MCA_ARI_RW_SUBIDX_MSC1			3
+#define MCA_ARI_RW_SUBIDX_MSC2			4
+
+static const char * const bank_names[] = {
+	"SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU",
+};
+
+static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA,
+		      ((u64)inst << 24) | ((u64)idx << 16) |
+			      ((u64)subidx << 8) | ((u64)cmd << 0),
+		      0, 0, 0, 0, 0, 0, &res);
+
+	*data = res.a2;
+}
+
+static int tegra186_ari_panic_handler(struct notifier_block *nb,
+				      unsigned long code, void *unused)
+{
+	u64 status;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(bank_names); i++) {
+		read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT,
+				0, &status);
+
+		if (status & SERR_STATUS_VAL) {
+			u64 addr, misc1, misc2;
+
+			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
+					MCA_ARI_RW_SUBIDX_ADDR, 0, &addr);
+			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
+					MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1);
+			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
+					MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2);
+
+			pr_crit("Machine Check Error in %s\n"
+				"  status=0x%llx addr=0x%llx\n"
+				"  msc1=0x%llx msc2=0x%llx\n",
+				bank_names[i], status, addr, misc1, misc2);
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block tegra186_ari_panic_nb = {
+	.notifier_call = tegra186_ari_panic_handler,
+};
+
+static int __init tegra186_ari_init(void)
+{
+	if (of_machine_is_compatible("nvidia,tegra186"))
+		atomic_notifier_chain_register(&panic_notifier_list, &tegra186_ari_panic_nb);
+
+	return 0;
+}
+early_initcall(tegra186_ari_init);
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2] soc: tegra: Add Tegra186 ARI driver
@ 2021-06-17 12:13 ` Mikko Perttunen
  0 siblings, 0 replies; 8+ messages in thread
From: Mikko Perttunen @ 2021-06-17 12:13 UTC (permalink / raw)
  To: thierry.reding, jonathanh; +Cc: linux-tegra, linux-arm-kernel, Mikko Perttunen

Add a driver to hook into panic notifiers and print machine check
status for debugging. Status information is retrieved via SMC. This
is supported by upstream ARM Trusted Firmware.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
v2:
* Changed to use panic notifier instead of serror hook
---
 drivers/soc/tegra/Makefile       |  1 +
 drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 drivers/soc/tegra/ari-tegra186.c

diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile
index 9c809c1814bd..054e862b63d8 100644
--- a/drivers/soc/tegra/Makefile
+++ b/drivers/soc/tegra/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o
 obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o
 obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o
 obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o
+obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o
diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c
new file mode 100644
index 000000000000..02577853ec49
--- /dev/null
+++ b/drivers/soc/tegra/ari-tegra186.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/panic_notifier.h>
+
+#define SMC_SIP_INVOKE_MCE			0xc2ffff00
+#define MCE_SMC_READ_MCA			12
+
+#define MCA_ARI_CMD_RD_SERR			1
+
+#define MCA_ARI_RW_SUBIDX_STAT			1
+#define SERR_STATUS_VAL				BIT_ULL(63)
+
+#define MCA_ARI_RW_SUBIDX_ADDR			2
+#define MCA_ARI_RW_SUBIDX_MSC1			3
+#define MCA_ARI_RW_SUBIDX_MSC2			4
+
+static const char * const bank_names[] = {
+	"SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU",
+};
+
+static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA,
+		      ((u64)inst << 24) | ((u64)idx << 16) |
+			      ((u64)subidx << 8) | ((u64)cmd << 0),
+		      0, 0, 0, 0, 0, 0, &res);
+
+	*data = res.a2;
+}
+
+static int tegra186_ari_panic_handler(struct notifier_block *nb,
+				      unsigned long code, void *unused)
+{
+	u64 status;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(bank_names); i++) {
+		read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT,
+				0, &status);
+
+		if (status & SERR_STATUS_VAL) {
+			u64 addr, misc1, misc2;
+
+			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
+					MCA_ARI_RW_SUBIDX_ADDR, 0, &addr);
+			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
+					MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1);
+			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
+					MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2);
+
+			pr_crit("Machine Check Error in %s\n"
+				"  status=0x%llx addr=0x%llx\n"
+				"  msc1=0x%llx msc2=0x%llx\n",
+				bank_names[i], status, addr, misc1, misc2);
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block tegra186_ari_panic_nb = {
+	.notifier_call = tegra186_ari_panic_handler,
+};
+
+static int __init tegra186_ari_init(void)
+{
+	if (of_machine_is_compatible("nvidia,tegra186"))
+		atomic_notifier_chain_register(&panic_notifier_list, &tegra186_ari_panic_nb);
+
+	return 0;
+}
+early_initcall(tegra186_ari_init);
-- 
2.30.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] soc: tegra: Add Tegra186 ARI driver
  2021-06-17 12:13 ` Mikko Perttunen
@ 2021-06-18 12:03   ` Thierry Reding
  -1 siblings, 0 replies; 8+ messages in thread
From: Thierry Reding @ 2021-06-18 12:03 UTC (permalink / raw)
  To: Mikko Perttunen; +Cc: jonathanh, linux-tegra, linux-arm-kernel

[-- Attachment #1: Type: text/plain, Size: 3934 bytes --]

On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote:
> Add a driver to hook into panic notifiers and print machine check
> status for debugging. Status information is retrieved via SMC. This
> is supported by upstream ARM Trusted Firmware.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
> v2:
> * Changed to use panic notifier instead of serror hook
> ---
>  drivers/soc/tegra/Makefile       |  1 +
>  drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
>  2 files changed, 81 insertions(+)
>  create mode 100644 drivers/soc/tegra/ari-tegra186.c
> 
> diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile
> index 9c809c1814bd..054e862b63d8 100644
> --- a/drivers/soc/tegra/Makefile
> +++ b/drivers/soc/tegra/Makefile
> @@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o
>  obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o
>  obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o
>  obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o
> +obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o
> diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c
> new file mode 100644
> index 000000000000..02577853ec49
> --- /dev/null
> +++ b/drivers/soc/tegra/ari-tegra186.c
> @@ -0,0 +1,80 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
> + */
> +
> +#include <linux/arm-smccc.h>
> +#include <linux/kernel.h>
> +#include <linux/of.h>
> +#include <linux/panic_notifier.h>
> +
> +#define SMC_SIP_INVOKE_MCE			0xc2ffff00
> +#define MCE_SMC_READ_MCA			12
> +
> +#define MCA_ARI_CMD_RD_SERR			1
> +
> +#define MCA_ARI_RW_SUBIDX_STAT			1
> +#define SERR_STATUS_VAL				BIT_ULL(63)
> +
> +#define MCA_ARI_RW_SUBIDX_ADDR			2
> +#define MCA_ARI_RW_SUBIDX_MSC1			3
> +#define MCA_ARI_RW_SUBIDX_MSC2			4
> +
> +static const char * const bank_names[] = {
> +	"SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU",
> +};
> +
> +static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data)
> +{
> +	struct arm_smccc_res res;
> +
> +	arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA,
> +		      ((u64)inst << 24) | ((u64)idx << 16) |
> +			      ((u64)subidx << 8) | ((u64)cmd << 0),
> +		      0, 0, 0, 0, 0, 0, &res);
> +
> +	*data = res.a2;
> +}
> +
> +static int tegra186_ari_panic_handler(struct notifier_block *nb,
> +				      unsigned long code, void *unused)
> +{
> +	u64 status;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(bank_names); i++) {
> +		read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT,
> +				0, &status);
> +
> +		if (status & SERR_STATUS_VAL) {
> +			u64 addr, misc1, misc2;
> +
> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
> +					MCA_ARI_RW_SUBIDX_ADDR, 0, &addr);
> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
> +					MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1);
> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
> +					MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2);
> +
> +			pr_crit("Machine Check Error in %s\n"
> +				"  status=0x%llx addr=0x%llx\n"
> +				"  msc1=0x%llx msc2=0x%llx\n",
> +				bank_names[i], status, addr, misc1, misc2);

This still looks rather cryptic to me. Is there some way to further
decode things like the status and MSC registers? Or is this something
that people are supposed to know how to intepret?

Also, I'm not sure it's evident what those various banks are. Is there
some way we can provide a description for these?

Additional information doesn't necessarily have to go into code, but
it'd be nice if at least there was some sort of comment somewhere that
goes into a bit more detail so that people know how to use this. Or
perhaps this is documented in the TRM? If so, perhaps provide a
reference to that so that people know where to find the information.

Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] soc: tegra: Add Tegra186 ARI driver
@ 2021-06-18 12:03   ` Thierry Reding
  0 siblings, 0 replies; 8+ messages in thread
From: Thierry Reding @ 2021-06-18 12:03 UTC (permalink / raw)
  To: Mikko Perttunen; +Cc: jonathanh, linux-tegra, linux-arm-kernel


[-- Attachment #1.1: Type: text/plain, Size: 3934 bytes --]

On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote:
> Add a driver to hook into panic notifiers and print machine check
> status for debugging. Status information is retrieved via SMC. This
> is supported by upstream ARM Trusted Firmware.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
> v2:
> * Changed to use panic notifier instead of serror hook
> ---
>  drivers/soc/tegra/Makefile       |  1 +
>  drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
>  2 files changed, 81 insertions(+)
>  create mode 100644 drivers/soc/tegra/ari-tegra186.c
> 
> diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile
> index 9c809c1814bd..054e862b63d8 100644
> --- a/drivers/soc/tegra/Makefile
> +++ b/drivers/soc/tegra/Makefile
> @@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o
>  obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o
>  obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o
>  obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o
> +obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o
> diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c
> new file mode 100644
> index 000000000000..02577853ec49
> --- /dev/null
> +++ b/drivers/soc/tegra/ari-tegra186.c
> @@ -0,0 +1,80 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
> + */
> +
> +#include <linux/arm-smccc.h>
> +#include <linux/kernel.h>
> +#include <linux/of.h>
> +#include <linux/panic_notifier.h>
> +
> +#define SMC_SIP_INVOKE_MCE			0xc2ffff00
> +#define MCE_SMC_READ_MCA			12
> +
> +#define MCA_ARI_CMD_RD_SERR			1
> +
> +#define MCA_ARI_RW_SUBIDX_STAT			1
> +#define SERR_STATUS_VAL				BIT_ULL(63)
> +
> +#define MCA_ARI_RW_SUBIDX_ADDR			2
> +#define MCA_ARI_RW_SUBIDX_MSC1			3
> +#define MCA_ARI_RW_SUBIDX_MSC2			4
> +
> +static const char * const bank_names[] = {
> +	"SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU",
> +};
> +
> +static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data)
> +{
> +	struct arm_smccc_res res;
> +
> +	arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA,
> +		      ((u64)inst << 24) | ((u64)idx << 16) |
> +			      ((u64)subidx << 8) | ((u64)cmd << 0),
> +		      0, 0, 0, 0, 0, 0, &res);
> +
> +	*data = res.a2;
> +}
> +
> +static int tegra186_ari_panic_handler(struct notifier_block *nb,
> +				      unsigned long code, void *unused)
> +{
> +	u64 status;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(bank_names); i++) {
> +		read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT,
> +				0, &status);
> +
> +		if (status & SERR_STATUS_VAL) {
> +			u64 addr, misc1, misc2;
> +
> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
> +					MCA_ARI_RW_SUBIDX_ADDR, 0, &addr);
> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
> +					MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1);
> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
> +					MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2);
> +
> +			pr_crit("Machine Check Error in %s\n"
> +				"  status=0x%llx addr=0x%llx\n"
> +				"  msc1=0x%llx msc2=0x%llx\n",
> +				bank_names[i], status, addr, misc1, misc2);

This still looks rather cryptic to me. Is there some way to further
decode things like the status and MSC registers? Or is this something
that people are supposed to know how to intepret?

Also, I'm not sure it's evident what those various banks are. Is there
some way we can provide a description for these?

Additional information doesn't necessarily have to go into code, but
it'd be nice if at least there was some sort of comment somewhere that
goes into a bit more detail so that people know how to use this. Or
perhaps this is documented in the TRM? If so, perhaps provide a
reference to that so that people know where to find the information.

Thierry

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 176 bytes --]

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] soc: tegra: Add Tegra186 ARI driver
  2021-06-18 12:03   ` Thierry Reding
@ 2021-06-18 12:32     ` Mikko Perttunen
  -1 siblings, 0 replies; 8+ messages in thread
From: Mikko Perttunen @ 2021-06-18 12:32 UTC (permalink / raw)
  To: Thierry Reding, Mikko Perttunen; +Cc: jonathanh, linux-tegra, linux-arm-kernel

On 6/18/21 3:03 PM, Thierry Reding wrote:
> On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote:
>> Add a driver to hook into panic notifiers and print machine check
>> status for debugging. Status information is retrieved via SMC. This
>> is supported by upstream ARM Trusted Firmware.
>>
>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>> ---
>> v2:
>> * Changed to use panic notifier instead of serror hook
>> ---
>>   drivers/soc/tegra/Makefile       |  1 +
>>   drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
>>   2 files changed, 81 insertions(+)
>>   create mode 100644 drivers/soc/tegra/ari-tegra186.c
>>
>> diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile
>> index 9c809c1814bd..054e862b63d8 100644
>> --- a/drivers/soc/tegra/Makefile
>> +++ b/drivers/soc/tegra/Makefile
>> @@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o
>>   obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o
>>   obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o
>>   obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o
>> +obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o
>> diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c
>> new file mode 100644
>> index 000000000000..02577853ec49
>> --- /dev/null
>> +++ b/drivers/soc/tegra/ari-tegra186.c
>> @@ -0,0 +1,80 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
>> + */
>> +
>> +#include <linux/arm-smccc.h>
>> +#include <linux/kernel.h>
>> +#include <linux/of.h>
>> +#include <linux/panic_notifier.h>
>> +
>> +#define SMC_SIP_INVOKE_MCE			0xc2ffff00
>> +#define MCE_SMC_READ_MCA			12
>> +
>> +#define MCA_ARI_CMD_RD_SERR			1
>> +
>> +#define MCA_ARI_RW_SUBIDX_STAT			1
>> +#define SERR_STATUS_VAL				BIT_ULL(63)
>> +
>> +#define MCA_ARI_RW_SUBIDX_ADDR			2
>> +#define MCA_ARI_RW_SUBIDX_MSC1			3
>> +#define MCA_ARI_RW_SUBIDX_MSC2			4
>> +
>> +static const char * const bank_names[] = {
>> +	"SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU",
>> +};
>> +
>> +static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data)
>> +{
>> +	struct arm_smccc_res res;
>> +
>> +	arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA,
>> +		      ((u64)inst << 24) | ((u64)idx << 16) |
>> +			      ((u64)subidx << 8) | ((u64)cmd << 0),
>> +		      0, 0, 0, 0, 0, 0, &res);
>> +
>> +	*data = res.a2;
>> +}
>> +
>> +static int tegra186_ari_panic_handler(struct notifier_block *nb,
>> +				      unsigned long code, void *unused)
>> +{
>> +	u64 status;
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(bank_names); i++) {
>> +		read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT,
>> +				0, &status);
>> +
>> +		if (status & SERR_STATUS_VAL) {
>> +			u64 addr, misc1, misc2;
>> +
>> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
>> +					MCA_ARI_RW_SUBIDX_ADDR, 0, &addr);
>> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
>> +					MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1);
>> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
>> +					MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2);
>> +
>> +			pr_crit("Machine Check Error in %s\n"
>> +				"  status=0x%llx addr=0x%llx\n"
>> +				"  msc1=0x%llx msc2=0x%llx\n",
>> +				bank_names[i], status, addr, misc1, misc2);
> 
> This still looks rather cryptic to me. Is there some way to further
> decode things like the status and MSC registers? Or is this something
> that people are supposed to know how to intepret?

Indeed, it is very cryptic. The corresponding downstream driver does a 
lot of work to print more human-readable descriptions of these - that's 
what I used as a decoding reference as well myself.

I was thinking that having the barebones here is good to at least have 
the error data to decode manually, and we can add the more 
human-readable decoding afterwards, or perhaps provide some script to 
decode it.

> 
> Also, I'm not sure it's evident what those various banks are. Is there
> some way we can provide a description for these?

I don't know if the bank names themselves are very useful - each bank is 
decoded differently and the decoded information is the useful part.

> 
> Additional information doesn't necessarily have to go into code, but
> it'd be nice if at least there was some sort of comment somewhere that
> goes into a bit more detail so that people know how to use this. Or
> perhaps this is documented in the TRM? If so, perhaps provide a
> reference to that so that people know where to find the information.

I don't know if the TRM has this information. How to decode these values 
can be seen in

https://nv-tegra.nvidia.com/gitweb/?p=linux-nvidia.git;a=blob;f=drivers/platform/tegra/ari_mca.c;h=040d05978ca49755a68365bebe7c46f6628c5162;hb=6dc57fec39c444e4c4448be61ddd19c55693daf1

and

https://nv-tegra.nvidia.com/gitweb/?p=linux-nvidia.git;a=blob;f=include/linux/platform/tegra/ari_mca.h;h=e6e4ac3abf674a1fc93f7b0dbcac4d6e672772d6;hb=6dc57fec39c444e4c4448be61ddd19c55693daf1

Not sure if these kinds of links are very appropriate as references though.

> 
> Thierry
> 

Thanks,
Mikko

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] soc: tegra: Add Tegra186 ARI driver
@ 2021-06-18 12:32     ` Mikko Perttunen
  0 siblings, 0 replies; 8+ messages in thread
From: Mikko Perttunen @ 2021-06-18 12:32 UTC (permalink / raw)
  To: Thierry Reding, Mikko Perttunen; +Cc: jonathanh, linux-tegra, linux-arm-kernel

On 6/18/21 3:03 PM, Thierry Reding wrote:
> On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote:
>> Add a driver to hook into panic notifiers and print machine check
>> status for debugging. Status information is retrieved via SMC. This
>> is supported by upstream ARM Trusted Firmware.
>>
>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>> ---
>> v2:
>> * Changed to use panic notifier instead of serror hook
>> ---
>>   drivers/soc/tegra/Makefile       |  1 +
>>   drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
>>   2 files changed, 81 insertions(+)
>>   create mode 100644 drivers/soc/tegra/ari-tegra186.c
>>
>> diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile
>> index 9c809c1814bd..054e862b63d8 100644
>> --- a/drivers/soc/tegra/Makefile
>> +++ b/drivers/soc/tegra/Makefile
>> @@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o
>>   obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o
>>   obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o
>>   obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o
>> +obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o
>> diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c
>> new file mode 100644
>> index 000000000000..02577853ec49
>> --- /dev/null
>> +++ b/drivers/soc/tegra/ari-tegra186.c
>> @@ -0,0 +1,80 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
>> + */
>> +
>> +#include <linux/arm-smccc.h>
>> +#include <linux/kernel.h>
>> +#include <linux/of.h>
>> +#include <linux/panic_notifier.h>
>> +
>> +#define SMC_SIP_INVOKE_MCE			0xc2ffff00
>> +#define MCE_SMC_READ_MCA			12
>> +
>> +#define MCA_ARI_CMD_RD_SERR			1
>> +
>> +#define MCA_ARI_RW_SUBIDX_STAT			1
>> +#define SERR_STATUS_VAL				BIT_ULL(63)
>> +
>> +#define MCA_ARI_RW_SUBIDX_ADDR			2
>> +#define MCA_ARI_RW_SUBIDX_MSC1			3
>> +#define MCA_ARI_RW_SUBIDX_MSC2			4
>> +
>> +static const char * const bank_names[] = {
>> +	"SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU",
>> +};
>> +
>> +static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data)
>> +{
>> +	struct arm_smccc_res res;
>> +
>> +	arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA,
>> +		      ((u64)inst << 24) | ((u64)idx << 16) |
>> +			      ((u64)subidx << 8) | ((u64)cmd << 0),
>> +		      0, 0, 0, 0, 0, 0, &res);
>> +
>> +	*data = res.a2;
>> +}
>> +
>> +static int tegra186_ari_panic_handler(struct notifier_block *nb,
>> +				      unsigned long code, void *unused)
>> +{
>> +	u64 status;
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(bank_names); i++) {
>> +		read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT,
>> +				0, &status);
>> +
>> +		if (status & SERR_STATUS_VAL) {
>> +			u64 addr, misc1, misc2;
>> +
>> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
>> +					MCA_ARI_RW_SUBIDX_ADDR, 0, &addr);
>> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
>> +					MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1);
>> +			read_uncore_mca(MCA_ARI_CMD_RD_SERR, i,
>> +					MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2);
>> +
>> +			pr_crit("Machine Check Error in %s\n"
>> +				"  status=0x%llx addr=0x%llx\n"
>> +				"  msc1=0x%llx msc2=0x%llx\n",
>> +				bank_names[i], status, addr, misc1, misc2);
> 
> This still looks rather cryptic to me. Is there some way to further
> decode things like the status and MSC registers? Or is this something
> that people are supposed to know how to intepret?

Indeed, it is very cryptic. The corresponding downstream driver does a 
lot of work to print more human-readable descriptions of these - that's 
what I used as a decoding reference as well myself.

I was thinking that having the barebones here is good to at least have 
the error data to decode manually, and we can add the more 
human-readable decoding afterwards, or perhaps provide some script to 
decode it.

> 
> Also, I'm not sure it's evident what those various banks are. Is there
> some way we can provide a description for these?

I don't know if the bank names themselves are very useful - each bank is 
decoded differently and the decoded information is the useful part.

> 
> Additional information doesn't necessarily have to go into code, but
> it'd be nice if at least there was some sort of comment somewhere that
> goes into a bit more detail so that people know how to use this. Or
> perhaps this is documented in the TRM? If so, perhaps provide a
> reference to that so that people know where to find the information.

I don't know if the TRM has this information. How to decode these values 
can be seen in

https://nv-tegra.nvidia.com/gitweb/?p=linux-nvidia.git;a=blob;f=drivers/platform/tegra/ari_mca.c;h=040d05978ca49755a68365bebe7c46f6628c5162;hb=6dc57fec39c444e4c4448be61ddd19c55693daf1

and

https://nv-tegra.nvidia.com/gitweb/?p=linux-nvidia.git;a=blob;f=include/linux/platform/tegra/ari_mca.h;h=e6e4ac3abf674a1fc93f7b0dbcac4d6e672772d6;hb=6dc57fec39c444e4c4448be61ddd19c55693daf1

Not sure if these kinds of links are very appropriate as references though.

> 
> Thierry
> 

Thanks,
Mikko

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] soc: tegra: Add Tegra186 ARI driver
  2021-06-17 12:13 ` Mikko Perttunen
@ 2021-10-07 18:47   ` Thierry Reding
  -1 siblings, 0 replies; 8+ messages in thread
From: Thierry Reding @ 2021-10-07 18:47 UTC (permalink / raw)
  To: Mikko Perttunen; +Cc: jonathanh, linux-tegra, linux-arm-kernel

[-- Attachment #1: Type: text/plain, Size: 641 bytes --]

On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote:
> Add a driver to hook into panic notifiers and print machine check
> status for debugging. Status information is retrieved via SMC. This
> is supported by upstream ARM Trusted Firmware.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
> v2:
> * Changed to use panic notifier instead of serror hook
> ---
>  drivers/soc/tegra/Makefile       |  1 +
>  drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
>  2 files changed, 81 insertions(+)
>  create mode 100644 drivers/soc/tegra/ari-tegra186.c

Applied, thanks.

Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] soc: tegra: Add Tegra186 ARI driver
@ 2021-10-07 18:47   ` Thierry Reding
  0 siblings, 0 replies; 8+ messages in thread
From: Thierry Reding @ 2021-10-07 18:47 UTC (permalink / raw)
  To: Mikko Perttunen; +Cc: jonathanh, linux-tegra, linux-arm-kernel


[-- Attachment #1.1: Type: text/plain, Size: 641 bytes --]

On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote:
> Add a driver to hook into panic notifiers and print machine check
> status for debugging. Status information is retrieved via SMC. This
> is supported by upstream ARM Trusted Firmware.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
> v2:
> * Changed to use panic notifier instead of serror hook
> ---
>  drivers/soc/tegra/Makefile       |  1 +
>  drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++
>  2 files changed, 81 insertions(+)
>  create mode 100644 drivers/soc/tegra/ari-tegra186.c

Applied, thanks.

Thierry

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 176 bytes --]

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-10-07 18:49 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-17 12:13 [PATCH v2] soc: tegra: Add Tegra186 ARI driver Mikko Perttunen
2021-06-17 12:13 ` Mikko Perttunen
2021-06-18 12:03 ` Thierry Reding
2021-06-18 12:03   ` Thierry Reding
2021-06-18 12:32   ` Mikko Perttunen
2021-06-18 12:32     ` Mikko Perttunen
2021-10-07 18:47 ` Thierry Reding
2021-10-07 18:47   ` Thierry Reding

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.