All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yazen Ghannam <Yazen.Ghannam@amd.com>
To: linux-edac@vger.kernel.org
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>,
	linux-kernel@vger.kernel.org, bp@suse.de, tony.luck@intel.com,
	x86@kernel.org
Subject: [PATCH 2/2] x86/MCE: Always save MCA_{ADDR,MISC,SYND} register contents
Date: Mon, 26 Mar 2018 14:15:26 -0500	[thread overview]
Message-ID: <20180326191526.64314-2-Yazen.Ghannam@amd.com> (raw)
In-Reply-To: <20180326191526.64314-1-Yazen.Ghannam@amd.com>

From: Yazen Ghannam <yazen.ghannam@amd.com>

The Intel SDM and AMD APM both state that the contents of the MCA_ADDR
register should be saved if MCA_STATUS[ADDRV] is set. The same applies
to MCA_MISC and MCA_SYND (on SMCA systems) and their respective valid
bits.

However, the Fam17h Processor Programming Reference states
"Error handlers should save the values in MCA_ADDR, MCA_MISC0, and
MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
MCA_STATUS[SyndV] are zero."

This is to ensure that all MCA state information is collected even if
software cannot act upon it (because the valid bits are cleared).

So always save the auxiliary MCA register contents even if the valid
bits are cleared. This should not affect error processing because
software should still check the valid bits before using the register
contents for error processing.

Also, print MCA_{ADDR,MISC,SYND} even if their valid bits are not set.
Printing from EDAC/mce_amd is included here since we want to do this on
AMD systems.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c     | 23 +++++++----------------
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +++-------
 drivers/edac/mce_amd.c               | 10 +++-------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 42cf2880d0ed..a556e1cadfbc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -248,19 +248,14 @@ static void __print_mce(struct mce *m)
 	}
 
 	pr_emerg(HW_ERR "TSC %llx ", m->tsc);
-	if (m->addr)
-		pr_cont("ADDR %llx ", m->addr);
-	if (m->misc)
-		pr_cont("MISC %llx ", m->misc);
+	pr_cont("ADDR %016llx ", m->addr);
+	pr_cont("MISC %016llx\n", m->misc);
 
 	if (mce_flags.smca) {
-		if (m->synd)
-			pr_cont("SYND %llx ", m->synd);
-		if (m->ipid)
-			pr_cont("IPID %llx ", m->ipid);
+		pr_emerg(HW_ERR "IPID %016llx ", m->ipid);
+		pr_cont("SYND %016llx\n", m->synd);
 	}
 
-	pr_cont("\n");
 	/*
 	 * Note this output is parsed by external tools and old fields
 	 * should not be changed.
@@ -639,12 +634,10 @@ static struct notifier_block mce_default_nb = {
  */
 static void mce_read_aux(struct mce *m, int i)
 {
-	if (m->status & MCI_STATUS_MISCV)
-		m->misc = mce_rdmsrl(msr_ops.misc(i));
+	m->misc = mce_rdmsrl(msr_ops.misc(i));
+	m->addr = mce_rdmsrl(msr_ops.addr(i));
 
 	if (m->status & MCI_STATUS_ADDRV) {
-		m->addr = mce_rdmsrl(msr_ops.addr(i));
-
 		/*
 		 * Mask the reported address by the reported granularity.
 		 */
@@ -667,9 +660,7 @@ static void mce_read_aux(struct mce *m, int i)
 
 	if (mce_flags.smca) {
 		m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
-
-		if (m->status & MCI_STATUS_SYNDV)
-			m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+		m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..5a37ae704578 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -799,13 +799,12 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 	mce_setup(&m);
 
 	m.status = status;
+	m.addr	 = addr;
 	m.misc   = misc;
 	m.bank   = bank;
 	m.tsc	 = rdtsc();
 
 	if (m.status & MCI_STATUS_ADDRV) {
-		m.addr = addr;
-
 		/*
 		 * Extract [55:<lsb>] where lsb is the least significant
 		 * *valid* bit of the address bits.
@@ -819,9 +818,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 
 	if (mce_flags.smca) {
 		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
-
-		if (m.status & MCI_STATUS_SYNDV)
-			rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+		rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
 	}
 
 	mce_log(&m);
@@ -849,8 +846,7 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
 	if (!(status & MCI_STATUS_VAL))
 		return false;
 
-	if (status & MCI_STATUS_ADDRV)
-		rdmsrl(msr_addr, addr);
+	rdmsrl(msr_addr, addr);
 
 	__log_error(bank, status, addr, misc);
 
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 2ab4d61ee47e..004425cc8ddf 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -990,16 +990,12 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 
 	pr_cont("]: 0x%016llx\n", m->status);
 
-	if (m->status & MCI_STATUS_ADDRV)
-		pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
+	pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
+	pr_emerg(HW_ERR "Misc: 0x%016llx\n", m->misc);
 
 	if (boot_cpu_has(X86_FEATURE_SMCA)) {
 		pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
-
-		if (m->status & MCI_STATUS_SYNDV)
-			pr_cont(", Syndrome: 0x%016llx", m->synd);
-
-		pr_cont("\n");
+		pr_cont(", Syndrome: 0x%016llx\n", m->synd);
 
 		decode_smca_error(m);
 		goto err_code;
-- 
2.14.1

WARNING: multiple messages have this Message-ID (diff)
From: Yazen Ghannam <yazen.ghannam@amd.com>
To: linux-edac@vger.kernel.org
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>,
	linux-kernel@vger.kernel.org, bp@suse.de, tony.luck@intel.com,
	x86@kernel.org
Subject: [2/2] x86/MCE: Always save MCA_{ADDR,MISC,SYND} register contents
Date: Mon, 26 Mar 2018 14:15:26 -0500	[thread overview]
Message-ID: <20180326191526.64314-2-Yazen.Ghannam@amd.com> (raw)

From: Yazen Ghannam <yazen.ghannam@amd.com>

The Intel SDM and AMD APM both state that the contents of the MCA_ADDR
register should be saved if MCA_STATUS[ADDRV] is set. The same applies
to MCA_MISC and MCA_SYND (on SMCA systems) and their respective valid
bits.

However, the Fam17h Processor Programming Reference states
"Error handlers should save the values in MCA_ADDR, MCA_MISC0, and
MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
MCA_STATUS[SyndV] are zero."

This is to ensure that all MCA state information is collected even if
software cannot act upon it (because the valid bits are cleared).

So always save the auxiliary MCA register contents even if the valid
bits are cleared. This should not affect error processing because
software should still check the valid bits before using the register
contents for error processing.

Also, print MCA_{ADDR,MISC,SYND} even if their valid bits are not set.
Printing from EDAC/mce_amd is included here since we want to do this on
AMD systems.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c     | 23 +++++++----------------
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +++-------
 drivers/edac/mce_amd.c               | 10 +++-------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 42cf2880d0ed..a556e1cadfbc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -248,19 +248,14 @@ static void __print_mce(struct mce *m)
 	}
 
 	pr_emerg(HW_ERR "TSC %llx ", m->tsc);
-	if (m->addr)
-		pr_cont("ADDR %llx ", m->addr);
-	if (m->misc)
-		pr_cont("MISC %llx ", m->misc);
+	pr_cont("ADDR %016llx ", m->addr);
+	pr_cont("MISC %016llx\n", m->misc);
 
 	if (mce_flags.smca) {
-		if (m->synd)
-			pr_cont("SYND %llx ", m->synd);
-		if (m->ipid)
-			pr_cont("IPID %llx ", m->ipid);
+		pr_emerg(HW_ERR "IPID %016llx ", m->ipid);
+		pr_cont("SYND %016llx\n", m->synd);
 	}
 
-	pr_cont("\n");
 	/*
 	 * Note this output is parsed by external tools and old fields
 	 * should not be changed.
@@ -639,12 +634,10 @@ static struct notifier_block mce_default_nb = {
  */
 static void mce_read_aux(struct mce *m, int i)
 {
-	if (m->status & MCI_STATUS_MISCV)
-		m->misc = mce_rdmsrl(msr_ops.misc(i));
+	m->misc = mce_rdmsrl(msr_ops.misc(i));
+	m->addr = mce_rdmsrl(msr_ops.addr(i));
 
 	if (m->status & MCI_STATUS_ADDRV) {
-		m->addr = mce_rdmsrl(msr_ops.addr(i));
-
 		/*
 		 * Mask the reported address by the reported granularity.
 		 */
@@ -667,9 +660,7 @@ static void mce_read_aux(struct mce *m, int i)
 
 	if (mce_flags.smca) {
 		m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
-
-		if (m->status & MCI_STATUS_SYNDV)
-			m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+		m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..5a37ae704578 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -799,13 +799,12 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 	mce_setup(&m);
 
 	m.status = status;
+	m.addr	 = addr;
 	m.misc   = misc;
 	m.bank   = bank;
 	m.tsc	 = rdtsc();
 
 	if (m.status & MCI_STATUS_ADDRV) {
-		m.addr = addr;
-
 		/*
 		 * Extract [55:<lsb>] where lsb is the least significant
 		 * *valid* bit of the address bits.
@@ -819,9 +818,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 
 	if (mce_flags.smca) {
 		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
-
-		if (m.status & MCI_STATUS_SYNDV)
-			rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+		rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
 	}
 
 	mce_log(&m);
@@ -849,8 +846,7 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
 	if (!(status & MCI_STATUS_VAL))
 		return false;
 
-	if (status & MCI_STATUS_ADDRV)
-		rdmsrl(msr_addr, addr);
+	rdmsrl(msr_addr, addr);
 
 	__log_error(bank, status, addr, misc);
 
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 2ab4d61ee47e..004425cc8ddf 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -990,16 +990,12 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 
 	pr_cont("]: 0x%016llx\n", m->status);
 
-	if (m->status & MCI_STATUS_ADDRV)
-		pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
+	pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
+	pr_emerg(HW_ERR "Misc: 0x%016llx\n", m->misc);
 
 	if (boot_cpu_has(X86_FEATURE_SMCA)) {
 		pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
-
-		if (m->status & MCI_STATUS_SYNDV)
-			pr_cont(", Syndrome: 0x%016llx", m->synd);
-
-		pr_cont("\n");
+		pr_cont(", Syndrome: 0x%016llx\n", m->synd);
 
 		decode_smca_error(m);
 		goto err_code;

  reply	other threads:[~2018-03-26 19:15 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-26 19:15 [PATCH 1/2] Revert "x86/mce/AMD: Collect error info even if valid bits are not set" Yazen Ghannam
2018-03-26 19:15 ` [1/2] " Yazen Ghannam
2018-03-26 19:15 ` Yazen Ghannam [this message]
2018-03-26 19:15   ` [2/2] x86/MCE: Always save MCA_{ADDR,MISC,SYND} register contents Yazen Ghannam
2018-03-26 19:35   ` [PATCH 2/2] " Borislav Petkov
2018-03-26 19:35     ` [2/2] " Borislav Petkov
2018-03-26 20:05     ` [PATCH 2/2] " Ghannam, Yazen
2018-03-26 20:05       ` [2/2] " Yazen Ghannam
2018-03-26 20:09       ` [PATCH 2/2] " Borislav Petkov
2018-03-26 20:09         ` [2/2] " Borislav Petkov
2018-03-26 20:27         ` [PATCH 2/2] " Luck, Tony
2018-03-26 20:27           ` [2/2] " Luck, Tony
2018-03-27 14:07           ` [PATCH 2/2] " Ghannam, Yazen
2018-03-27 14:07             ` [2/2] " Yazen Ghannam
2018-03-26 19:30 ` [PATCH 1/2] Revert "x86/mce/AMD: Collect error info even if valid bits are not set" Borislav Petkov
2018-03-26 19:30   ` [1/2] " Borislav Petkov
2018-03-26 19:58   ` [PATCH 1/2] " Ghannam, Yazen
2018-03-26 19:58     ` [1/2] " Yazen Ghannam
2018-03-26 20:07     ` [PATCH 1/2] " Borislav Petkov
2018-03-26 20:07       ` [1/2] " Borislav Petkov
2018-03-27 14:02       ` [PATCH 1/2] " Ghannam, Yazen
2018-03-27 14:02         ` [1/2] " Yazen Ghannam
2018-03-27 15:59         ` [PATCH 1/2] " Ghannam, Yazen
2018-03-27 15:59           ` [1/2] " Yazen Ghannam
2018-08-23 12:24           ` [PATCH 1/2] " Borislav Petkov
2018-08-23 12:24             ` [1/2] " Borislav Petkov
2018-08-23 17:53             ` [PATCH 1/2] " Ghannam, Yazen
2018-08-23 17:53               ` [1/2] " Yazen Ghannam
2018-03-28 18:39 ` [tip:ras/core] " tip-bot for Yazen Ghannam

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180326191526.64314-2-Yazen.Ghannam@amd.com \
    --to=yazen.ghannam@amd.com \
    --cc=bp@suse.de \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.