linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
To: tony.luck@intel.com
Cc: gong.chen@linux.intel.com, ananth@in.ibm.com,
	masbock@linux.vnet.ibm.com, x86@kernel.org,
	linux-kernel@vger.kernel.org, bp@amd64.org, lcm@us.ibm.com,
	andi@firstfloor.org, hpa@zytor.com, tglx@linutronix.de,
	mingo@redhat.com, gregkh@suse.de, linux-edac@vger.kernel.org
Subject: Re: [PATCH v3] x86/mce: Honour bios-set CMCI threshold
Date: Fri, 21 Sep 2012 17:09:04 +0530	[thread overview]
Message-ID: <505C51D8.6070402@linux.vnet.ibm.com> (raw)
In-Reply-To: <20120912122516.3825.87838.stgit@localhost.localdomain>

Hi Tony,
Can you kindly take in this patch if there are no further comments?

Thanks,
Naveen

On 09/12/2012 05:55 PM, Naveen N. Rao wrote:
> The ACPI spec doesn't provide for a way for the bios to pass down
> recommended thresholds to the OS on a _per-bank_ basis. This patch adds
> a new boot option, which if passed, allows bios to initialize the CMCI
> threshold. In such a case, we simply skip programming any threshold
> value.
>
> As fail-safe, we initialize threshold to 1 if some banks have not been
> initialized by the bios and warn the user.
>
> v3: Updated messages as per Tony's inputs.
> v2: Just separating out the patch. I will send a separate patch for
> consolidating the MCE boot flags.
>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
> ---
>   Documentation/x86/x86_64/boot-options.txt |    5 ++++
>   arch/x86/include/asm/mce.h                |    1 +
>   arch/x86/kernel/cpu/mcheck/mce.c          |   10 ++++++++
>   arch/x86/kernel/cpu/mcheck/mce_intel.c    |   35 +++++++++++++++++++++++++++--
>   4 files changed, 48 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
> index c54b4f5..ec92540 100644
> --- a/Documentation/x86/x86_64/boot-options.txt
> +++ b/Documentation/x86/x86_64/boot-options.txt
> @@ -50,6 +50,11 @@ Machine check
>   		monarchtimeout:
>   		Sets the time in us to wait for other CPUs on machine checks. 0
>   		to disable.
> +   mce=bios_cmci_threshold
> +		Don't overwrite the bios-set CMCI threshold. This boot option
> +		prevents Linux from overwriting the CMCI threshold set by the
> +		bios. Without this option, Linux always sets the CMCI
> +		threshold to 1.
>
>      nomce (for compatibility with i386): same as mce=off
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index a3ac52b..8ad5078 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -171,6 +171,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
>   #ifdef CONFIG_X86_MCE_INTEL
>   extern int mce_cmci_disabled;
>   extern int mce_ignore_ce;
> +extern int mce_bios_cmci_threshold;
>   void mce_intel_feature_init(struct cpuinfo_x86 *c);
>   void cmci_clear(void);
>   void cmci_reenable(void);
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index c311122..29e87d3 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -83,6 +83,7 @@ static int			mce_dont_log_ce		__read_mostly;
>   int				mce_cmci_disabled	__read_mostly;
>   int				mce_ignore_ce		__read_mostly;
>   int				mce_ser			__read_mostly;
> +int				mce_bios_cmci_threshold	__read_mostly;
>
>   struct mce_bank                *mce_banks		__read_mostly;
>
> @@ -1946,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
>    *	check, or 0 to not wait
>    * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
>    * mce=nobootlog Don't log MCEs from before booting.
> + * mce=bios_cmci_threshold Don't program the CMCI threshold
>    */
>   static int __init mcheck_enable(char *str)
>   {
> @@ -1965,6 +1967,8 @@ static int __init mcheck_enable(char *str)
>   		mce_ignore_ce = 1;
>   	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
>   		mce_bootlog = (str[0] == 'b');
> +	else if (!strcmp(str, "bios_cmci_threshold"))
> +		mce_bios_cmci_threshold = 1;
>   	else if (isdigit(str[0])) {
>   		get_option(&str, &tolerant);
>   		if (*str == ',') {
> @@ -2205,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
>   	&mce_cmci_disabled
>   };
>
> +static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
> +	__ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
> +	&mce_bios_cmci_threshold
> +};
> +
>   static struct device_attribute *mce_device_attrs[] = {
>   	&dev_attr_tolerant.attr,
>   	&dev_attr_check_interval.attr,
> @@ -2213,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
>   	&dev_attr_dont_log_ce.attr,
>   	&dev_attr_ignore_ce.attr,
>   	&dev_attr_cmci_disabled.attr,
> +	&dev_attr_bios_cmci_threshold.attr,
>   	NULL
>   };
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
> index 098386f..5f88abf 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
> @@ -181,10 +181,12 @@ static void cmci_discover(int banks)
>   	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
>   	unsigned long flags;
>   	int i;
> +	int bios_wrong_thresh = 0;
>
>   	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
>   	for (i = 0; i < banks; i++) {
>   		u64 val;
> +		int bios_zero_thresh = 0;
>
>   		if (test_bit(i, owned))
>   			continue;
> @@ -198,8 +200,20 @@ static void cmci_discover(int banks)
>   			continue;
>   		}
>
> -		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
> -		val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
> +		if (!mce_bios_cmci_threshold) {
> +			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
> +			val |= CMCI_THRESHOLD;
> +		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
> +			/*
> +			 * If bios_cmci_threshold boot option was specified
> +			 * but the threshold is zero, we'll try to initialize
> +			 * it to 1.
> +			 */
> +			bios_zero_thresh = 1;
> +			val |= CMCI_THRESHOLD;
> +		}
> +
> +		val |= MCI_CTL2_CMCI_EN;
>   		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
>   		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
>
> @@ -207,11 +221,26 @@ static void cmci_discover(int banks)
>   		if (val & MCI_CTL2_CMCI_EN) {
>   			set_bit(i, owned);
>   			__clear_bit(i, __get_cpu_var(mce_poll_banks));
> +			/*
> +			 * We are able to set thresholds for some banks that
> +			 * had a threshold of 0. This means the BIOS has not
> +			 * set the thresholds properly or does not work with
> +			 * this boot option. Note down now and report later.
> +			 */
> +			if (mce_bios_cmci_threshold && bios_zero_thresh &&
> +					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
> +				bios_wrong_thresh = 1;
>   		} else {
>   			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
>   		}
>   	}
>   	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
> +	if (mce_bios_cmci_threshold && bios_wrong_thresh) {
> +		pr_info_once(
> +			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
> +		pr_info_once(
> +			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
> +	}
>   }
>
>   /*
> @@ -249,7 +278,7 @@ void cmci_clear(void)
>   			continue;
>   		/* Disable CMCI */
>   		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
> -		val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
> +		val &= ~MCI_CTL2_CMCI_EN;
>   		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
>   		__clear_bit(i, __get_cpu_var(mce_banks_owned));
>   	}
>


  reply	other threads:[~2012-09-21 11:39 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-11  5:31 [PATCH v2] x86/mce: Honour bios-set CMCI threshold Naveen N. Rao
2012-09-11 16:04 ` Tony Luck
2012-09-12 12:25   ` [PATCH v3] " Naveen N. Rao
2012-09-21 11:39     ` Naveen N. Rao [this message]
2012-10-17 10:59       ` Borislav Petkov
2012-10-17 11:27         ` Naveen N. Rao
2012-10-17 13:09           ` Borislav Petkov
2012-10-17 15:47             ` Naveen N. Rao
2012-10-17 16:40               ` Borislav Petkov
2012-10-17 17:28                 ` Luck, Tony
2012-10-17 18:09                   ` Borislav Petkov
2012-10-18  5:43                   ` Naveen N. Rao
2012-10-18 13:24                     ` Borislav Petkov
2012-10-18 16:46                       ` Luck, Tony
2012-10-19 15:52         ` [tip:x86/urgent] x86, MCE: Remove bios_cmci_threshold sysfs attribute tip-bot for Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=505C51D8.6070402@linux.vnet.ibm.com \
    --to=naveen.n.rao@linux.vnet.ibm.com \
    --cc=ananth@in.ibm.com \
    --cc=andi@firstfloor.org \
    --cc=bp@amd64.org \
    --cc=gong.chen@linux.intel.com \
    --cc=gregkh@suse.de \
    --cc=hpa@zytor.com \
    --cc=lcm@us.ibm.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=masbock@linux.vnet.ibm.com \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).