All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/MCE: Update MCE severity condition check
@ 2013-06-20  9:16 Chen Gong
  2013-06-20  9:41 ` Borislav Petkov
  2013-06-25  6:32 ` Naveen N. Rao
  0 siblings, 2 replies; 14+ messages in thread
From: Chen Gong @ 2013-06-20  9:16 UTC (permalink / raw)
  To: tony.luck, bp; +Cc: linux-kernel, Chen Gong

Update some SRAR severity conditions check to make it clearer,
according to latest Intel SDM Vol 3(June 2013), table 15-20.

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-severity.c |   15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index beb1f16..1fa12ea 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -110,22 +110,17 @@ static struct severity {
 	/* known AR MCACODs: */
 #ifdef	CONFIG_MEMORY_FAILURE
 	MCESEV(
-		KEEP, "HT thread notices Action required: data load error",
-		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
-		MCGMASK(MCG_STATUS_EIPV, 0)
+		KEEP, "Action required but non-affected thread is continuable",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
+		MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
 		),
 	MCESEV(
-		AR, "Action required: data load error",
+		AR, "Action required: data load error on user land",
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
 		USER
 		),
 	MCESEV(
-		KEEP, "HT thread notices Action required: instruction fetch error",
-		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
-		MCGMASK(MCG_STATUS_EIPV, 0)
-		),
-	MCESEV(
-		AR, "Action required: instruction fetch error",
+		AR, "Action required: instruction fetch error on user land",
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
 		USER
 		),
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/MCE: Update MCE severity condition check
  2013-06-20  9:16 [PATCH] x86/MCE: Update MCE severity condition check Chen Gong
@ 2013-06-20  9:41 ` Borislav Petkov
  2013-06-21 12:38   ` Chen Gong
  2013-06-25  6:32 ` Naveen N. Rao
  1 sibling, 1 reply; 14+ messages in thread
From: Borislav Petkov @ 2013-06-20  9:41 UTC (permalink / raw)
  To: Chen Gong; +Cc: tony.luck, linux-kernel

On Thu, Jun 20, 2013 at 05:16:12AM -0400, Chen Gong wrote:
> Update some SRAR severity conditions check to make it clearer,
> according to latest Intel SDM Vol 3(June 2013), table 15-20.
> 
> Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/mcheck/mce-severity.c |   15 +++++----------
>  1 file changed, 5 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> index beb1f16..1fa12ea 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> @@ -110,22 +110,17 @@ static struct severity {
>  	/* known AR MCACODs: */
>  #ifdef	CONFIG_MEMORY_FAILURE
>  	MCESEV(
> -		KEEP, "HT thread notices Action required: data load error",
> -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> -		MCGMASK(MCG_STATUS_EIPV, 0)
> +		KEEP, "Action required but non-affected thread is continuable",
> +		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
> +		MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
>  		),
>  	MCESEV(
> -		AR, "Action required: data load error",
> +		AR, "Action required: data load error on user land",

You mean "data load error in a user process"?

>  		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
>  		USER
>  		),
>  	MCESEV(
> -		KEEP, "HT thread notices Action required: instruction fetch error",
> -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
> -		MCGMASK(MCG_STATUS_EIPV, 0)
> -		),
> -	MCESEV(
> -		AR, "Action required: instruction fetch error",
> +		AR, "Action required: instruction fetch error on user land",

ditto?

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/MCE: Update MCE severity condition check
  2013-06-20  9:41 ` Borislav Petkov
@ 2013-06-21 12:38   ` Chen Gong
  0 siblings, 0 replies; 14+ messages in thread
From: Chen Gong @ 2013-06-21 12:38 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: tony.luck, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2384 bytes --]

On Thu, Jun 20, 2013 at 11:41:52AM +0200, Borislav Petkov wrote:
> Date: Thu, 20 Jun 2013 11:41:52 +0200
> From: Borislav Petkov <bp@alien8.de>
> To: Chen Gong <gong.chen@linux.intel.com>
> Cc: tony.luck@intel.com, linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] x86/MCE: Update MCE severity condition check
> User-Agent: Mutt/1.5.21 (2010-09-15)
> 
> On Thu, Jun 20, 2013 at 05:16:12AM -0400, Chen Gong wrote:
> > Update some SRAR severity conditions check to make it clearer,
> > according to latest Intel SDM Vol 3(June 2013), table 15-20.
> > 
> > Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
> > ---
> >  arch/x86/kernel/cpu/mcheck/mce-severity.c |   15 +++++----------
> >  1 file changed, 5 insertions(+), 10 deletions(-)
> > 
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> > index beb1f16..1fa12ea 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> > @@ -110,22 +110,17 @@ static struct severity {
> >  	/* known AR MCACODs: */
> >  #ifdef	CONFIG_MEMORY_FAILURE
> >  	MCESEV(
> > -		KEEP, "HT thread notices Action required: data load error",
> > -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> > -		MCGMASK(MCG_STATUS_EIPV, 0)
> > +		KEEP, "Action required but non-affected thread is continuable",
> > +		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
> > +		MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
> >  		),
> >  	MCESEV(
> > -		AR, "Action required: data load error",
> > +		AR, "Action required: data load error on user land",
> 
> You mean "data load error in a user process"?

Yes it is.
> 
> >  		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> >  		USER
> >  		),
> >  	MCESEV(
> > -		KEEP, "HT thread notices Action required: instruction fetch error",
> > -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
> > -		MCGMASK(MCG_STATUS_EIPV, 0)
> > -		),
> > -	MCESEV(
> > -		AR, "Action required: instruction fetch error",
> > +		AR, "Action required: instruction fetch error on user land",
> 
> ditto?
> 
ditto

> -- 
> Regards/Gruss,
>     Boris.
> 
> Sent from a fat crate under my desk. Formatting is fine.
> --

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/MCE: Update MCE severity condition check
  2013-06-20  9:16 [PATCH] x86/MCE: Update MCE severity condition check Chen Gong
  2013-06-20  9:41 ` Borislav Petkov
@ 2013-06-25  6:32 ` Naveen N. Rao
  2013-06-25 16:31   ` Luck, Tony
  1 sibling, 1 reply; 14+ messages in thread
From: Naveen N. Rao @ 2013-06-25  6:32 UTC (permalink / raw)
  To: Chen Gong; +Cc: tony.luck, bp, linux-kernel

On 2013/06/20 05:16AM, Chen Gong wrote:
> Update some SRAR severity conditions check to make it clearer,
> according to latest Intel SDM Vol 3(June 2013), table 15-20.
> 
> Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/mcheck/mce-severity.c |   15 +++++----------
>  1 file changed, 5 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> index beb1f16..1fa12ea 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> @@ -110,22 +110,17 @@ static struct severity {
>  	/* known AR MCACODs: */
>  #ifdef	CONFIG_MEMORY_FAILURE
>  	MCESEV(
> -		KEEP, "HT thread notices Action required: data load error",
> -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> -		MCGMASK(MCG_STATUS_EIPV, 0)
> +		KEEP, "Action required but non-affected thread is continuable",

The SDM talks about "non-affected" logical processors, but perhaps we
can call this an "unaffected" thread?

- Naveen


^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] x86/MCE: Update MCE severity condition check
  2013-06-25  6:32 ` Naveen N. Rao
@ 2013-06-25 16:31   ` Luck, Tony
  2013-06-25 20:08     ` Naveen N. Rao
  2013-06-26  9:18     ` Chen Gong
  0 siblings, 2 replies; 14+ messages in thread
From: Luck, Tony @ 2013-06-25 16:31 UTC (permalink / raw)
  To: Naveen N. Rao, Chen Gong; +Cc: bp, linux-kernel

> The SDM talks about "non-affected" logical processors, but perhaps we
> can call this an "unaffected" thread?

"unaffected" sounds a bit more natural (but close enough to the wording in
the SDM that people should see the connection).

-Tony

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/MCE: Update MCE severity condition check
  2013-06-25 16:31   ` Luck, Tony
@ 2013-06-25 20:08     ` Naveen N. Rao
  2013-06-26  9:18     ` Chen Gong
  1 sibling, 0 replies; 14+ messages in thread
From: Naveen N. Rao @ 2013-06-25 20:08 UTC (permalink / raw)
  To: Luck, Tony; +Cc: Chen Gong, bp, linux-kernel

On 06/25/2013 10:01 PM, Luck, Tony wrote:
>> The SDM talks about "non-affected" logical processors, but perhaps we
>> can call this an "unaffected" thread?
>
> "unaffected" sounds a bit more natural (but close enough to the wording in
> the SDM that people should see the connection).

Yup - "unnatural" is precisely the term that describes my feeling when I 
read the original description :)

Thanks,
Naveen


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/MCE: Update MCE severity condition check
  2013-06-25 16:31   ` Luck, Tony
  2013-06-25 20:08     ` Naveen N. Rao
@ 2013-06-26  9:18     ` Chen Gong
       [not found]       ` <0134840@agluck-desk.sc.intel.com>
  1 sibling, 1 reply; 14+ messages in thread
From: Chen Gong @ 2013-06-26  9:18 UTC (permalink / raw)
  To: Luck, Tony; +Cc: Naveen N. Rao, bp, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 782 bytes --]

On Tue, Jun 25, 2013 at 04:31:23PM +0000, Luck, Tony wrote:
> Date: Tue, 25 Jun 2013 16:31:23 +0000
> From: "Luck, Tony" <tony.luck@intel.com>
> To: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>, Chen Gong
>  <gong.chen@linux.intel.com>
> CC: "bp@alien8.de" <bp@alien8.de>, "linux-kernel@vger.kernel.org"
>  <linux-kernel@vger.kernel.org>
> Subject: RE: [PATCH] x86/MCE: Update MCE severity condition check
> 
> > The SDM talks about "non-affected" logical processors, but perhaps we
> > can call this an "unaffected" thread?
> 
> "unaffected" sounds a bit more natural (but close enough to the wording in
> the SDM that people should see the connection).
> 
> -Tony

If this patch is OK, would you please help to update it when merging
it? Thanks very much.

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/mce: Update MCE severity condition check
       [not found]       ` <0134840@agluck-desk.sc.intel.com>
@ 2013-06-26 19:08         ` Borislav Petkov
  2013-06-26 20:23           ` Luck, Tony
  2013-06-27  8:55         ` Naveen N. Rao
  1 sibling, 1 reply; 14+ messages in thread
From: Borislav Petkov @ 2013-06-26 19:08 UTC (permalink / raw)
  To: Luck, Tony; +Cc: linux-kernel, Chen Gong, Naveen N. Rao

On Thu, Jun 20, 2013 at 05:16:12AM -0400, Luck, Tony wrote:
> From: Chen Gong <gong.chen@linux.intel.com>
> 
> Update some SRAR severity conditions check to make it clearer
> according to latest Intel SDM Vol 3B (June 2013), table 15-20.
> 
> Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> 
> Chen Gong wrote:
> > If this patch is OK, would you please help to update it when merging
> > it? Thanks very much
> 
> This is what I plan to apply.
> 1. Changed "user land" to "in a user process" (2 places) per Boris comment
> 2. Changed "non-affected" to "unaffected" per Naveen comment
> 
> Anyone wants to jump on the "Acked-by" bandwagon - speak now.
> 
> -Tony
> 
>  arch/x86/kernel/cpu/mcheck/mce-severity.c | 15 +++++----------
>  1 file changed, 5 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> index beb1f16..e2703520 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> @@ -110,22 +110,17 @@ static struct severity {
>  	/* known AR MCACODs: */
>  #ifdef	CONFIG_MEMORY_FAILURE
>  	MCESEV(
> -		KEEP, "HT thread notices Action required: data load error",
> -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> -		MCGMASK(MCG_STATUS_EIPV, 0)
> +		KEEP, "Action required but unaffected thread is continuable",
> +		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),

Why did we lose MCACOD_DATA from the MASK above? Was this intentional?

> +		MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)

This change I can understand as restart IP is valid for thread is
continuable.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] x86/mce: Update MCE severity condition check
  2013-06-26 19:08         ` [PATCH] x86/mce: " Borislav Petkov
@ 2013-06-26 20:23           ` Luck, Tony
  2013-06-26 20:36             ` Borislav Petkov
  0 siblings, 1 reply; 14+ messages in thread
From: Luck, Tony @ 2013-06-26 20:23 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-kernel, Chen Gong, Naveen N. Rao

[-- Attachment #1: Type: text/plain, Size: 940 bytes --]

	MCESEV(
> -		KEEP, "HT thread notices Action required: data load error",
> -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> -		MCGMASK(MCG_STATUS_EIPV, 0)
> +		KEEP, "Action required but unaffected thread is continuable",
> +		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),

> Why did we lose MCACOD_DATA from the MASK above? Was this intentional?


We used to have separate entries for "HT thread notices ... data load" and "HT thread notices ... instruction load"
because the old SDM had a complex table calling out the bit settings for each type of recoverable machine check.

Latest SDM simplifies the table making it clear that for every SRAR (software recoverable action required) error
we'll have the same bits in MCG_STATUS (EIPV=0, RIPV=1) ... so we don't need to check for the MCACOD value.
See attached snapshot of the new table.

-Tony

[-- Attachment #2: SRAR.png --]
[-- Type: image/png, Size: 17121 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/mce: Update MCE severity condition check
  2013-06-26 20:23           ` Luck, Tony
@ 2013-06-26 20:36             ` Borislav Petkov
  2013-06-26 21:00               ` Luck, Tony
  0 siblings, 1 reply; 14+ messages in thread
From: Borislav Petkov @ 2013-06-26 20:36 UTC (permalink / raw)
  To: Luck, Tony; +Cc: linux-kernel, Chen Gong, Naveen N. Rao

On Wed, Jun 26, 2013 at 08:23:47PM +0000, Luck, Tony wrote:
> 	MCESEV(
> > -		KEEP, "HT thread notices Action required: data load error",
> > -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> > -		MCGMASK(MCG_STATUS_EIPV, 0)
> > +		KEEP, "Action required but unaffected thread is continuable",
> > +		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
> 
> > Why did we lose MCACOD_DATA from the MASK above? Was this intentional?
> 
> 
> We used to have separate entries for "HT thread notices ... data load" and "HT thread notices ... instruction load"
> because the old SDM had a complex table calling out the bit settings for each type of recoverable machine check.
> 
> Latest SDM simplifies the table making it clear that for every SRAR (software recoverable action required) error
> we'll have the same bits in MCG_STATUS (EIPV=0, RIPV=1) ... so we don't need to check for the MCACOD value.
> See attached snapshot of the new table.

And this obviously is the case for the hardware too, I assume, not only
the SDM?

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] x86/mce: Update MCE severity condition check
  2013-06-26 20:36             ` Borislav Petkov
@ 2013-06-26 21:00               ` Luck, Tony
  2013-06-26 21:10                 ` Borislav Petkov
  0 siblings, 1 reply; 14+ messages in thread
From: Luck, Tony @ 2013-06-26 21:00 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-kernel, Chen Gong, Naveen N. Rao

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 834 bytes --]

> And this obviously is the case for the hardware too, I assume, not only
> the SDM?

Yes - we have a magic process which reconfigures all deployed silicon whenever
a new SDM is published :-)

Actually the SDM had been collecting new features for each generation ... each
time just bolting on a new paragraph or table.  I snapped when I saw the table
that was proposed for 15-20 to add "continuable" errors and complained that
it had gotten way too complicated ... and proposed the version that you see in
the current SDM.

It accurately portrays what older generations implemented, and adds the new
continuable (EIPV=1, RIPV=1) while removing many rows and columns.

-Tony
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/mce: Update MCE severity condition check
  2013-06-26 21:00               ` Luck, Tony
@ 2013-06-26 21:10                 ` Borislav Petkov
  2013-06-27  6:42                   ` Chen Gong
  0 siblings, 1 reply; 14+ messages in thread
From: Borislav Petkov @ 2013-06-26 21:10 UTC (permalink / raw)
  To: Luck, Tony; +Cc: linux-kernel, Chen Gong, Naveen N. Rao

On Wed, Jun 26, 2013 at 09:00:10PM +0000, Luck, Tony wrote:
> > And this obviously is the case for the hardware too, I assume, not only
> > the SDM?
> 
> Yes - we have a magic process which reconfigures all deployed silicon whenever
> a new SDM is published :-)

Haha, I wouldn't wonder if your silicon dudes come up with
reprogrammable fuses someday :-)

> Actually the SDM had been collecting new features for each generation ... each
> time just bolting on a new paragraph or table.  I snapped when I saw the table
> that was proposed for 15-20 to add "continuable" errors and complained that
> it had gotten way too complicated ... and proposed the version that you see in
> the current SDM.
> 
> It accurately portrays what older generations implemented, and adds the new
> continuable (EIPV=1, RIPV=1) while removing many rows and columns.

Cool :)

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/mce: Update MCE severity condition check
  2013-06-26 21:10                 ` Borislav Petkov
@ 2013-06-27  6:42                   ` Chen Gong
  0 siblings, 0 replies; 14+ messages in thread
From: Chen Gong @ 2013-06-27  6:42 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Luck, Tony, linux-kernel, Naveen N. Rao

[-- Attachment #1: Type: text/plain, Size: 943 bytes --]

On Wed, Jun 26, 2013 at 11:10:52PM +0200, Borislav Petkov wrote:
> Date: Wed, 26 Jun 2013 23:10:52 +0200
> From: Borislav Petkov <bp@alien8.de>
> To: "Luck, Tony" <tony.luck@intel.com>
> Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>, Chen
>  Gong <gong.chen@linux.intel.com>, "Naveen N. Rao"
>  <naveen.n.rao@linux.vnet.ibm.com>
> Subject: Re: [PATCH] x86/mce: Update MCE severity condition check
> User-Agent: Mutt/1.5.21 (2010-09-15)
> 
> On Wed, Jun 26, 2013 at 09:00:10PM +0000, Luck, Tony wrote:
> > > And this obviously is the case for the hardware too, I assume, not only
> > > the SDM?
> > 
> > Yes - we have a magic process which reconfigures all deployed silicon whenever
> > a new SDM is published :-)
> 
> Haha, I wouldn't wonder if your silicon dudes come up with
> reprogrammable fuses someday :-)

If so, it must be a catastrophe and the guys who wrote SDM have a big
trouble ... ;-)


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] x86/mce: Update MCE severity condition check
       [not found]       ` <0134840@agluck-desk.sc.intel.com>
  2013-06-26 19:08         ` [PATCH] x86/mce: " Borislav Petkov
@ 2013-06-27  8:55         ` Naveen N. Rao
  1 sibling, 0 replies; 14+ messages in thread
From: Naveen N. Rao @ 2013-06-27  8:55 UTC (permalink / raw)
  To: Luck, Tony; +Cc: linux-kernel, Borislav Petkov, Chen Gong

On 06/20/2013 02:46 PM, Luck, Tony wrote:
> From: Chen Gong <gong.chen@linux.intel.com>
>
> Update some SRAR severity conditions check to make it clearer
> according to latest Intel SDM Vol 3B (June 2013), table 15-20.
>
> Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>
> Chen Gong wrote:
>> If this patch is OK, would you please help to update it when merging
>> it? Thanks very much
>
> This is what I plan to apply.
> 1. Changed "user land" to "in a user process" (2 places) per Boris comment
> 2. Changed "non-affected" to "unaffected" per Naveen comment
>
> Anyone wants to jump on the "Acked-by" bandwagon - speak now.

Yep - looks fine to me.
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Thanks,
Naveen

>
> -Tony
>
>   arch/x86/kernel/cpu/mcheck/mce-severity.c | 15 +++++----------
>   1 file changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> index beb1f16..e2703520 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> @@ -110,22 +110,17 @@ static struct severity {
>   	/* known AR MCACODs: */
>   #ifdef	CONFIG_MEMORY_FAILURE
>   	MCESEV(
> -		KEEP, "HT thread notices Action required: data load error",
> -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
> -		MCGMASK(MCG_STATUS_EIPV, 0)
> +		KEEP, "Action required but unaffected thread is continuable",
> +		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
> +		MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
>   		),
>   	MCESEV(
> -		AR, "Action required: data load error",
> +		AR, "Action required: data load error in a user process",
>   		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
>   		USER
>   		),
>   	MCESEV(
> -		KEEP, "HT thread notices Action required: instruction fetch error",
> -		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
> -		MCGMASK(MCG_STATUS_EIPV, 0)
> -		),
> -	MCESEV(
> -		AR, "Action required: instruction fetch error",
> +		AR, "Action required: instruction fetch error in a user process",
>   		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
>   		USER
>   		),
>


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2013-06-27  8:55 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-20  9:16 [PATCH] x86/MCE: Update MCE severity condition check Chen Gong
2013-06-20  9:41 ` Borislav Petkov
2013-06-21 12:38   ` Chen Gong
2013-06-25  6:32 ` Naveen N. Rao
2013-06-25 16:31   ` Luck, Tony
2013-06-25 20:08     ` Naveen N. Rao
2013-06-26  9:18     ` Chen Gong
     [not found]       ` <0134840@agluck-desk.sc.intel.com>
2013-06-26 19:08         ` [PATCH] x86/mce: " Borislav Petkov
2013-06-26 20:23           ` Luck, Tony
2013-06-26 20:36             ` Borislav Petkov
2013-06-26 21:00               ` Luck, Tony
2013-06-26 21:10                 ` Borislav Petkov
2013-06-27  6:42                   ` Chen Gong
2013-06-27  8:55         ` Naveen N. Rao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.