All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug.
@ 2010-10-29  4:42 Jin Dongming
  2010-10-29  7:24 ` Borislav Petkov
  2010-10-29  8:10 ` Andi Kleen
  0 siblings, 2 replies; 7+ messages in thread
From: Jin Dongming @ 2010-10-29  4:42 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H.Peter Anvin, Andi Kleen, Hidetoshi Seto, LKLM

Add mce_sys_info interface for testing mce.

Following is the reason why the interface is needed:
  1. Sometimes before testing mce, we want to know the basic
     information of mce on the test machine. Though there are
     some provided interfaces, there are some kernel variables
     which could not be known still.
  2. When we inject mce data to test mce, the value of some
     interfaces may be changed. It is hard for everybody to confirm
     all the information of mce on the system.

This interface could help us know what has happened and make mce test
become easier.

The content of mce_sys_info will be like following:

  MCE status:                      Original/(Current) Value
      mce_disabled:                N

      mce_ser:                     0/1
      tolerant:                    1/1
      panic_on_oops:               0/0
      monarch_timeout:             1000000/1000000 us
      mce_ignore_ce:               0/0
      mce_cmci_disabled:           N/N

  MCG Register:
      CAP_Reg_Val           BANKS    EXT_CNT    FUNCTIONS
      0x0000000000000806    0x06     0x00       TES

Please note that it is used for testing only.

I tested this patch on Intel64 next-tree.

Signe-off-by: Jin Dongming <jin.dongming@np.css.fujitsu.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c |  126 ++++++++++++++++++++++++++++++++++++++
 1 files changed, 126 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ed10e76..91108db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -97,6 +97,25 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int			cpu_missing;
 
+#ifdef CONFIG_DEBUG_FS
+struct mce_sys_info {
+	int mce_disabled;
+
+	u64 mcg_cap_register;
+
+	int mce_ser;
+	int tolerant;
+	int panic_on_oops;
+	int monarch_timeout;
+	int mce_cmci_disable;
+	int mce_ignore_ce;
+	int mce_cmci_disabled;
+};
+
+static struct mce_sys_info mce_sys_infos;
+
+static void mce_sys_infos_init(void);
+#endif
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -1453,6 +1472,10 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
 	__mcheck_cpu_init_timer();
 	INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
 
+#ifdef CONFIG_DEBUG_FS
+	mce_sys_infos_init();
+#endif
+
 }
 
 /*
@@ -2224,14 +2247,112 @@ static int mce_ser_set(void *data, u64 val)
 DEFINE_SIMPLE_ATTRIBUTE(mce_ser_fops, mce_ser_get,
 			mce_ser_set, "%llu\n");
 
+static void mce_sys_infos_init(void)
+{
+	static int info_init = 0;
+
+	mce_sys_infos.mce_disabled = mce_disabled;
+
+	if (!mce_disabled && !info_init) {
+		u64 cap;
+
+		info_init++;
+		rdmsrl(MSR_IA32_MCG_CAP, cap);
+		mce_sys_infos.mcg_cap_register	= cap;
+
+		mce_sys_infos.mce_ser		= mce_ser;
+		mce_sys_infos.tolerant		= tolerant;
+		mce_sys_infos.panic_on_oops	= panic_on_oops;
+		mce_sys_infos.monarch_timeout	= monarch_timeout;
+		mce_sys_infos.mce_ignore_ce	= mce_ignore_ce;
+		mce_sys_infos.mce_cmci_disabled	= mce_cmci_disabled;
+	}
+}
+
+static int mce_sys_info_show(struct seq_file *m, void *v)
+{
+	unsigned long cap_reg;
+	seq_printf(m,
+			"MCE status:                      Original/(Current) Value\n"
+			"    mce_disabled:                %s\n"
+			"\n",
+			mce_sys_infos.mce_disabled ? "Y":"N"
+	);
+
+	if (!mce_disabled) {
+		seq_printf(m,
+			"    mce_ser:                     %d/%d\n"
+			"    tolerant:                    %d/%d\n"
+			"    panic_on_oops:               %d/%d\n"
+			"    monarch_timeout:             %d/%d us\n"
+			"    mce_ignore_ce:               %d/%d\n"
+			"    mce_cmci_disabled:           %s/%s\n"
+			"\n",
+
+			mce_sys_infos.mce_ser,
+			mce_ser,
+			mce_sys_infos.tolerant,
+			tolerant,
+			mce_sys_infos.panic_on_oops,
+			panic_on_oops,
+			mce_sys_infos.monarch_timeout,
+			monarch_timeout,
+			mce_sys_infos.mce_ignore_ce,
+			mce_ignore_ce,
+			mce_sys_infos.mce_cmci_disabled ? "Y":"N",
+			mce_cmci_disabled ? "Y":"N"
+		);
+
+		cap_reg = (unsigned long)(mce_sys_infos.mcg_cap_register);
+		seq_printf(m,
+		"MCG Register:\n"
+		"    CAP_Reg_Val           BANKS    EXT_CNT    FUNCTIONS\n"
+		"    0x%016lx    0x%02x     0x%02x       ",
+			cap_reg,
+			(unsigned int)(cap_reg & MCG_BANKCNT_MASK),
+			(unsigned int)MCG_EXT_CNT(cap_reg)
+		);
+
+		if (cap_reg & MCG_SER_P)
+			seq_printf(m, "SER ");
+#define MCG_TES_P (1ULL << 11)
+		if (cap_reg & MCG_TES_P)
+			seq_printf(m, "TES ");
+		if (cap_reg & MCG_CMCI_P)
+			seq_printf(m, "CMCI ");
+		if (cap_reg & MCG_EXT_P)
+			seq_printf(m, "EXT ");
+		if (cap_reg & MCG_CTL_P)
+			seq_printf(m, "CTL ");
+
+		seq_printf(m, "\n");
+	}
+
+	return 0;
+}
+
+static int mce_sys_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mce_sys_info_show, NULL);
+}
+
+static const struct file_operations mce_sys_info_fops = {
+	.open		= mce_sys_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int __init mcheck_debugfs_init(void)
 {
 	struct dentry *dmce, *ffake_panic;
 	struct dentry *fmce_ser = NULL;
+	struct dentry *fmce_sys_info = NULL;
 
 	dmce = mce_get_debugfs_dir();
 	if (!dmce)
 		return -ENOMEM;
+
 	ffake_panic = debugfs_create_file("fake_panic", 0644, dmce, NULL,
 					  &fake_panic_fops);
 	if (!ffake_panic)
@@ -2244,6 +2365,11 @@ static int __init mcheck_debugfs_init(void)
 	if (!fmce_ser)
 		return -ENOMEM;
 
+	fmce_sys_info = debugfs_create_file("mce_sys_info", 0444, dmce,
+						NULL, &mce_sys_info_fops);
+	if (!fmce_sys_info)
+		return -ENOMEM;
+
 	return 0;
 }
 late_initcall(mcheck_debugfs_init);
-- 
1.7.2.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug.
  2010-10-29  4:42 [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug Jin Dongming
@ 2010-10-29  7:24 ` Borislav Petkov
  2010-11-01  0:11   ` Jin Dongming
  2010-10-29  8:10 ` Andi Kleen
  1 sibling, 1 reply; 7+ messages in thread
From: Borislav Petkov @ 2010-10-29  7:24 UTC (permalink / raw)
  To: Jin Dongming
  Cc: Huang Ying, Ingo Molnar, H.Peter Anvin, Andi Kleen, Hidetoshi Seto, LKLM

On Fri, Oct 29, 2010 at 01:42:23PM +0900, Jin Dongming wrote:
> Add mce_sys_info interface for testing mce.
> 
> Following is the reason why the interface is needed:
>   1. Sometimes before testing mce, we want to know the basic
>      information of mce on the test machine. Though there are
>      some provided interfaces, there are some kernel variables
>      which could not be known still.
>   2. When we inject mce data to test mce, the value of some
>      interfaces may be changed. It is hard for everybody to confirm
>      all the information of mce on the system.
> 
> This interface could help us know what has happened and make mce test
> become easier.
> 
> The content of mce_sys_info will be like following:
> 
>   MCE status:                      Original/(Current) Value
>       mce_disabled:                N
> 
>       mce_ser:                     0/1
>       tolerant:                    1/1
>       panic_on_oops:               0/0
>       monarch_timeout:             1000000/1000000 us
>       mce_ignore_ce:               0/0
>       mce_cmci_disabled:           N/N
> 
>   MCG Register:
>       CAP_Reg_Val           BANKS    EXT_CNT    FUNCTIONS
>       0x0000000000000806    0x06     0x00       TES
> 
> Please note that it is used for testing only.
> 
> I tested this patch on Intel64 next-tree.
> 
> Signe-off-by: Jin Dongming <jin.dongming@np.css.fujitsu.com>
> ---
>  arch/x86/kernel/cpu/mcheck/mce.c |  126 ++++++++++++++++++++++++++++++++++++++
>  1 files changed, 126 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index ed10e76..91108db 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -97,6 +97,25 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
>  static DEFINE_PER_CPU(struct mce, mces_seen);
>  static int			cpu_missing;
>  
> +#ifdef CONFIG_DEBUG_FS
> +struct mce_sys_info {
> +	int mce_disabled;
> +
> +	u64 mcg_cap_register;
> +
> +	int mce_ser;
> +	int tolerant;
> +	int panic_on_oops;
> +	int monarch_timeout;
> +	int mce_cmci_disable;
> +	int mce_ignore_ce;
> +	int mce_cmci_disabled;

Most of those could be put in a flags vector, thus saving a bunch of
space. Or a bitfield...

-- 
Regards/Gruss,
    Boris.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug.
  2010-10-29  4:42 [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug Jin Dongming
  2010-10-29  7:24 ` Borislav Petkov
@ 2010-10-29  8:10 ` Andi Kleen
  2010-11-01  0:07   ` Jin Dongming
  1 sibling, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2010-10-29  8:10 UTC (permalink / raw)
  To: Jin Dongming
  Cc: Huang Ying, Ingo Molnar, H.Peter Anvin, Andi Kleen, Hidetoshi Seto, LKLM

On Fri, Oct 29, 2010 at 01:42:23PM +0900, Jin Dongming wrote:
> Add mce_sys_info interface for testing mce.
> 
> Following is the reason why the interface is needed:
>   1. Sometimes before testing mce, we want to know the basic
>      information of mce on the test machine. Though there are
>      some provided interfaces, there are some kernel variables
>      which could not be known still.

That doesn't make sense to me. AFAIK everything is either available
in sysfs or printed in the kernel log or can be known by knowing 
the hardware.

>   2. When we inject mce data to test mce, the value of some
>      interfaces may be changed. It is hard for everybody to confirm
>      all the information of mce on the system.
> 
> This interface could help us know what has happened and make mce test
> become easier.


The new interface seems overkill to me.

> 
> The content of mce_sys_info will be like following:
> 
>   MCE status:                      Original/(Current) Value
>       mce_disabled:                N

Well you should know if you set that. It's also visible
in the kernel log.

> 
>       mce_ser:                     0/1

That should be known or can be read directly.

>       tolerant:                    1/1
>       panic_on_oops:               0/0
>       monarch_timeout:             1000000/1000000 us
>       mce_ignore_ce:               0/0
>       mce_cmci_disabled:           N/N

These are all in sysfs or in the command line.



> 
>   MCG Register:
>       CAP_Reg_Val           BANKS    EXT_CNT    FUNCTIONS
>       0x0000000000000806    0x06     0x00       TES

You can just read that from /dev/cpu/*/msr if you really want it.
No need to bloat the kernel.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug.
  2010-10-29  8:10 ` Andi Kleen
@ 2010-11-01  0:07   ` Jin Dongming
  2010-11-01 14:00     ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Jin Dongming @ 2010-11-01  0:07 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H.Peter Anvin, Hidetoshi Seto, LKLM

Hi, Andi

Thanks for your review.
I will cancel this patch.

But I suggested there is such kind of tool for mce test.
It really costs too much time to confirm them one by one.

Best Regards,
Jin Dongming

(2010/10/29 17:10), Andi Kleen wrote:
> On Fri, Oct 29, 2010 at 01:42:23PM +0900, Jin Dongming wrote:
>> Add mce_sys_info interface for testing mce.
>>
>> Following is the reason why the interface is needed:
>>   1. Sometimes before testing mce, we want to know the basic
>>      information of mce on the test machine. Though there are
>>      some provided interfaces, there are some kernel variables
>>      which could not be known still.
> 
> That doesn't make sense to me. AFAIK everything is either available
> in sysfs or printed in the kernel log or can be known by knowing 
> the hardware.
> 
>>   2. When we inject mce data to test mce, the value of some
>>      interfaces may be changed. It is hard for everybody to confirm
>>      all the information of mce on the system.
>>
>> This interface could help us know what has happened and make mce test
>> become easier.
> 
> 
> The new interface seems overkill to me.
> 
>>
>> The content of mce_sys_info will be like following:
>>
>>   MCE status:                      Original/(Current) Value
>>       mce_disabled:                N
> 
> Well you should know if you set that. It's also visible
> in the kernel log.
> 
>>
>>       mce_ser:                     0/1
> 
> That should be known or can be read directly.
> 
>>       tolerant:                    1/1
>>       panic_on_oops:               0/0
>>       monarch_timeout:             1000000/1000000 us
>>       mce_ignore_ce:               0/0
>>       mce_cmci_disabled:           N/N
> 
> These are all in sysfs or in the command line.
> 
> 
> 
>>
>>   MCG Register:
>>       CAP_Reg_Val           BANKS    EXT_CNT    FUNCTIONS
>>       0x0000000000000806    0x06     0x00       TES
> 
> You can just read that from /dev/cpu/*/msr if you really want it.
> No need to bloat the kernel.
> 
> -Andi
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug.
  2010-10-29  7:24 ` Borislav Petkov
@ 2010-11-01  0:11   ` Jin Dongming
  0 siblings, 0 replies; 7+ messages in thread
From: Jin Dongming @ 2010-11-01  0:11 UTC (permalink / raw)
  To: Borislav Petkov, Huang Ying, Ingo Molnar, H.Peter Anvin,
	Andi Kleen, Hidetoshi Seto, LKLM

Hi, Borislav Petkov

Thanks for your review. Because this patch will be canceled, I could not
update it as your comment. But I will do other patches as your comment.

Thanks again.

Best Regards,
Jin Dongming

(2010/10/29 16:24), Borislav Petkov wrote:
> On Fri, Oct 29, 2010 at 01:42:23PM +0900, Jin Dongming wrote:
>> Add mce_sys_info interface for testing mce.
>>
>> Following is the reason why the interface is needed:
>>   1. Sometimes before testing mce, we want to know the basic
>>      information of mce on the test machine. Though there are
>>      some provided interfaces, there are some kernel variables
>>      which could not be known still.
>>   2. When we inject mce data to test mce, the value of some
>>      interfaces may be changed. It is hard for everybody to confirm
>>      all the information of mce on the system.
>>
>> This interface could help us know what has happened and make mce test
>> become easier.
>>
>> The content of mce_sys_info will be like following:
>>
>>   MCE status:                      Original/(Current) Value
>>       mce_disabled:                N
>>
>>       mce_ser:                     0/1
>>       tolerant:                    1/1
>>       panic_on_oops:               0/0
>>       monarch_timeout:             1000000/1000000 us
>>       mce_ignore_ce:               0/0
>>       mce_cmci_disabled:           N/N
>>
>>   MCG Register:
>>       CAP_Reg_Val           BANKS    EXT_CNT    FUNCTIONS
>>       0x0000000000000806    0x06     0x00       TES
>>
>> Please note that it is used for testing only.
>>
>> I tested this patch on Intel64 next-tree.
>>
>> Signe-off-by: Jin Dongming <jin.dongming@np.css.fujitsu.com>
>> ---
>>  arch/x86/kernel/cpu/mcheck/mce.c |  126 ++++++++++++++++++++++++++++++++++++++
>>  1 files changed, 126 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
>> index ed10e76..91108db 100644
>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>> @@ -97,6 +97,25 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
>>  static DEFINE_PER_CPU(struct mce, mces_seen);
>>  static int			cpu_missing;
>>  
>> +#ifdef CONFIG_DEBUG_FS
>> +struct mce_sys_info {
>> +	int mce_disabled;
>> +
>> +	u64 mcg_cap_register;
>> +
>> +	int mce_ser;
>> +	int tolerant;
>> +	int panic_on_oops;
>> +	int monarch_timeout;
>> +	int mce_cmci_disable;
>> +	int mce_ignore_ce;
>> +	int mce_cmci_disabled;
> 
> Most of those could be put in a flags vector, thus saving a bunch of
> space. Or a bitfield...
> 



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug.
  2010-11-01  0:07   ` Jin Dongming
@ 2010-11-01 14:00     ` Andi Kleen
  2010-11-01 23:56       ` Jin Dongming
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2010-11-01 14:00 UTC (permalink / raw)
  To: Jin Dongming
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H.Peter Anvin, Hidetoshi Seto, LKLM

On Mon, Nov 01, 2010 at 09:07:35AM +0900, Jin Dongming wrote:
> Hi, Andi
> 
> Thanks for your review.
> I will cancel this patch.
> 
> But I suggested there is such kind of tool for mce test.
> It really costs too much time to confirm them one by one.

I would suggest to write a shell script and put it to download
somewhere. We could add a reference to it to the kernel documentation.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug.
  2010-11-01 14:00     ` Andi Kleen
@ 2010-11-01 23:56       ` Jin Dongming
  0 siblings, 0 replies; 7+ messages in thread
From: Jin Dongming @ 2010-11-01 23:56 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H.Peter Anvin, Hidetoshi Seto, LKLM

Hi, Andi
(2010/11/01 23:00), Andi Kleen wrote:
> On Mon, Nov 01, 2010 at 09:07:35AM +0900, Jin Dongming wrote:
>> Hi, Andi
>>
>> Thanks for your review.
>> I will cancel this patch.
>>
>> But I suggested there is such kind of tool for mce test.
>> It really costs too much time to confirm them one by one.
> 
> I would suggest to write a shell script and put it to download
> somewhere. We could add a reference to it to the kernel documentation.
> 
> -Andi

It sounds great.
Thank you.

Best Regards,
Jin Dongming
 



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-11-01 23:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-29  4:42 [PATCH 3/3] [x86, next] Add mce_sys_info interface for debug Jin Dongming
2010-10-29  7:24 ` Borislav Petkov
2010-11-01  0:11   ` Jin Dongming
2010-10-29  8:10 ` Andi Kleen
2010-11-01  0:07   ` Jin Dongming
2010-11-01 14:00     ` Andi Kleen
2010-11-01 23:56       ` Jin Dongming

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.