All of lore.kernel.org
 help / color / mirror / Atom feed
From: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
To: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org, linux-edac@vger.kernel.org,
	linux-kernel@vger.kernel.org, Tony Luck <tony.luck@intel.com>,
	"H . Peter Anvin" <hpa@zytor.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register
Date: Wed, 13 Apr 2022 12:16:26 -0700	[thread overview]
Message-ID: <2c4c2359-f662-cd5d-e9ab-da5c71ca7dc9@amd.com> (raw)
In-Reply-To: <Yk267A1MKOo2AlXQ@zn.tnic>

On 4/6/2022 9:08 AM, Borislav Petkov wrote:
> On Mon, Feb 14, 2022 at 05:36:39PM -0600, Smita Koralahalli wrote:
>> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
>> index 5fbd7ffb3233..43ba63b7dc73 100644
>> --- a/arch/x86/kernel/cpu/mce/inject.c
>> +++ b/arch/x86/kernel/cpu/mce/inject.c
>> @@ -470,11 +470,36 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
>>   		       __func__, PCI_FUNC(F3->devfn), NBCFG);
>>   }
>>   
>> +struct mce_err_handler {
>> +	struct mce *mce;
>> +	int err;
>> +};
> Well, we already have a struct mce i_mce which serves as a sort-of
> global container for injection data which gets dumped into the hardware
> upon injection time.
>
> Now you're adding another struct which contains additional info and are
> adding a pointer to that i_mce.
>
> But this is not a clean design - when you do stuff like that you need to
> unify all those global-info-containing structs and make the code dealing
> with them straight-forward . I.e., if you don't look at the big picture
> of a design, it soon grows into an unwieldy mess which some poor sod
> would need to clean up afterwards and that poor sod is in most cases the
> maintainer.
>
> So I went and did that ontop of your patch, this is one possible way to
> do it but it.
>
> Here it is, a combined diff ontop of tip:ras/core. Please split it into
> patches: first patch converts to the new descriptor and then a second
> one adds the MCA_STATUS checking bits.
>
> There are a couple of other changes in there, if they're not clear, feel
> free to ask.
Okay, will make changes as mentioned. Thanks.
I understood most of it, just have one doubt below..
>
> Thx.
>
> ---
> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
> index 43ba63b7dc73..0fd1eea2f754 100644
> --- a/arch/x86/kernel/cpu/mce/inject.c
> +++ b/arch/x86/kernel/cpu/mce/inject.c
> @@ -33,10 +33,14 @@
>   
>   #include "internal.h"
>   
> -/*
> - * Collect all the MCi_XXX settings
> - */
> -static struct mce i_mce;
> +static bool hw_injection_possible = true;
> +
> +/* Collect all the MCi_XXX settings */
> +static struct inject_desc {
> +	struct mce m;
> +	int err;
> +} inj_desc;
> +
>   static struct dentry *dfs_inj;
>   
>   #define MAX_FLAG_OPT_SIZE	4
> @@ -110,9 +114,11 @@ static int inj_ipid_set(void *data, u64 val)
>   
>   DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
>   
> -static void setup_inj_struct(struct mce *m)
> +static void setup_inj_struct(void)
>   {
> -	memset(m, 0, sizeof(struct mce));
> +	struct mce *m = &inj_desc.m;
> +
> +	memset(&inj_desc, 0, sizeof(struct inject_desc));
>   
>   	m->cpuvendor = boot_cpu_data.x86_vendor;
>   	m->time	     = ktime_get_real_seconds();
> @@ -470,84 +476,78 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
>   		       __func__, PCI_FUNC(F3->devfn), NBCFG);
>   }
>   
> -struct mce_err_handler {
> -	struct mce *mce;
> -	int err;
> -};
> -
> -static struct mce_err_handler mce_err;
> -
> -static bool prepare_mca_status(struct mce *m)
> +static bool prepare_mca_status(void)
>   {
> -	u32 status_reg = mca_msr_reg(m->bank, MCA_STATUS);
> -	u64 status_val = m->status;
> +	u32 status_reg = mca_msr_reg(inj_desc.m.bank, MCA_STATUS);
> +	u64 status_val = inj_desc.m.status;
>   
>   	wrmsrl(status_reg, status_val);
>   	rdmsrl(status_reg, status_val);
>   
> -	return status_val;
> +	return status_val == inj_desc.m.status;
>   }
>   
> -static void prepare_msrs(void *info)
> +static void prepare_msrs(void *unused)
>   {
> -	struct mce_err_handler *i_mce_err = ((struct mce_err_handler *)info);
> -	struct mce m = *i_mce_err->mce;
> -	u8 b = m.bank;
> +	struct mce *m = &inj_desc.m;
> +	u8 b = inj_desc.m.bank;
>   
> -	if (!prepare_mca_status(&m)) {
> +	if (!prepare_mca_status()) {
>   		pr_err("Platform does not allow error injection, try using APEI EINJ instead.\n");
> -		i_mce_err->err = -EINVAL;
> +		inj_desc.err = -EINVAL;
> +		hw_injection_possible = false;
>   		return;
>   	}
>   
> -	wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
> +	wrmsrl(MSR_IA32_MCG_STATUS, m->mcgstatus);
>   
>   	if (boot_cpu_has(X86_FEATURE_SMCA)) {
> -		if (m.inject_flags == DFR_INT_INJ) {
> -			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
> -			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
> +		if (m->inject_flags == DFR_INT_INJ) {
> +			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m->status);
> +			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m->addr);
>   		} else {
> -			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
> -			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
> +			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m->status);
> +			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m->addr);
>   		}
>   
> -		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
> -		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
> +		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m->misc);
> +		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m->synd);
>   	} else {
> -		wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
> -		wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
> -		wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
> +		wrmsrl(MSR_IA32_MCx_STATUS(b), m->status);
> +		wrmsrl(MSR_IA32_MCx_ADDR(b), m->addr);
> +		wrmsrl(MSR_IA32_MCx_MISC(b), m->misc);
>   	}
>   }
>   
> -static void do_inject(void)
> +static int do_inject(void)
>   {
> +	struct mce *m = &inj_desc.m;
> +	unsigned int cpu = m->extcpu;
>   	u64 mcg_status = 0;
> -	unsigned int cpu = i_mce.extcpu;
> -	u8 b = i_mce.bank;
> +	u8 b = m->bank;
>   
> -	mce_err.mce = &i_mce;
> -	mce_err.err = 0;
> +	m->tsc = rdtsc_ordered();
>   
> -	i_mce.tsc = rdtsc_ordered();
> +	m->status |= MCI_STATUS_VAL;
>   
> -	i_mce.status |= MCI_STATUS_VAL;
> +	if (m->misc)
> +		m->status |= MCI_STATUS_MISCV;
>   
> -	if (i_mce.misc)
> -		i_mce.status |= MCI_STATUS_MISCV;
> -
> -	if (i_mce.synd)
> -		i_mce.status |= MCI_STATUS_SYNDV;
> +	if (m->synd)
> +		m->status |= MCI_STATUS_SYNDV;
>   
>   	if (inj_type == SW_INJ) {
> -		mce_log(&i_mce);
> -		return;
> +		mce_log(m);
> +		return 0;
>   	}
>   
> +	if (!hw_injection_possible)
> +		return -EINVAL;
> +
>   	/* prep MCE global settings for the injection */
>   	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
>   
> -	if (!(i_mce.status & MCI_STATUS_PCC))
> +	if (!(m->status & MCI_STATUS_PCC))
>   		mcg_status |= MCG_STATUS_RIPV;
>   
>   	/*
> @@ -556,8 +556,8 @@ static void do_inject(void)
>   	 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
>   	 */
>   	if (inj_type == DFR_INT_INJ) {
> -		i_mce.status |= MCI_STATUS_DEFERRED;
> -		i_mce.status &= ~MCI_STATUS_UC;
> +		m->status |= MCI_STATUS_DEFERRED;
> +		m->status &= ~MCI_STATUS_UC;
>   	}
>   
>   	/*
> @@ -578,13 +578,13 @@ static void do_inject(void)
>   
>   	toggle_hw_mce_inject(cpu, true);
>   
> -	i_mce.mcgstatus = mcg_status;
> -	i_mce.inject_flags = inj_type;
> -	smp_call_function_single(cpu, prepare_msrs, &mce_err, 0);
> +	m->mcgstatus = mcg_status;
> +	m->inject_flags = inj_type;
> +	smp_call_function_single(cpu, prepare_msrs, NULL, 0);
>   
>   	toggle_hw_mce_inject(cpu, false);
>   
> -	if (mce_err.err)
> +	if (inj_desc.err)
>   		goto err;
>   
>   	switch (inj_type) {
> @@ -601,6 +601,7 @@ static void do_inject(void)
>   err:
>   	cpus_read_unlock();
>   
> +	return inj_desc.err;
>   }
>   
>   /*
> @@ -611,6 +612,7 @@ static int inj_bank_set(void *data, u64 val)
>   {
>   	struct mce *m = (struct mce *)data;
>   	u8 n_banks;
> +	int err;
>   	u64 cap;
>   
>   	/* Get bank count on target CPU so we can handle non-uniform values. */
> @@ -650,12 +652,12 @@ static int inj_bank_set(void *data, u64 val)
>   	}
>   
>   inject:
> -	do_inject();
> +	err = do_inject();
>   
>   	/* Reset injection struct */
> -	setup_inj_struct(&i_mce);
> +	setup_inj_struct();
>   
> -	return mce_err.err;
> +	return err;
>   }
>   
>   MCE_INJECT_GET(bank);
> @@ -745,7 +747,7 @@ static void __init debugfs_init(void)
>   
>   	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
>   		debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
> -				    &i_mce, dfs_fls[i].fops);
> +				    &inj_desc.m, dfs_fls[i].fops);
>   }
>   
>   static int __init inject_init(void)
> @@ -758,7 +760,7 @@ static int __init inject_init(void)
>   	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
>   	mce_register_injector_chain(&inject_nb);
>   
> -	setup_inj_struct(&i_mce);
> +	setup_inj_struct();
>   
>   	pr_info("Machine check injector initialized\n");
>   
> diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
> index 4ae0e603f7fa..7e03f5b7f6bd 100644
> --- a/arch/x86/kernel/cpu/mce/internal.h
> +++ b/arch/x86/kernel/cpu/mce/internal.h
> @@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);
>   
>   static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
>   {
> -	if (mce_flags.smca) {
> +	if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
Should this change be part of this patch series? Also, why mce_flags.smca
check won't work?

Thanks,
Smita
>   		switch (reg) {
>   		case MCA_CTL:	 return MSR_AMD64_SMCA_MCx_CTL(bank);
>   		case MCA_ADDR:	 return MSR_AMD64_SMCA_MCx_ADDR(bank);
>


  reply	other threads:[~2022-04-13 19:16 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-14 23:36 [PATCH v4 0/2] x86/mce: Handle error injection failure in mce-inject module Smita Koralahalli
2022-02-14 23:36 ` [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register Smita Koralahalli
2022-04-06 16:08   ` Borislav Petkov
2022-04-13 19:16     ` Smita Koralahalli [this message]
2022-04-13 20:19       ` Borislav Petkov
2022-04-19  3:24     ` Smita Koralahalli
2022-04-20  9:17       ` Borislav Petkov
2022-04-21 19:10         ` Smita Koralahalli
2022-04-24 22:32           ` Borislav Petkov
2022-05-03  3:28             ` Smita Koralahalli
2022-05-03 21:14               ` Borislav Petkov
2022-05-05 19:03                 ` Smita Koralahalli
2022-06-28 10:17   ` [tip: ras/core] x86/mce: Check whether writes to MCA_STATUS are getting ignored tip-bot2 for Smita Koralahalli
2022-02-14 23:36 ` [PATCH v4 2/2] x86/mce/mce-inject: Return appropriate error code if CPUs are offline Smita Koralahalli
2022-03-11 20:55 ` [PATCH v4 0/2] x86/mce: Handle error injection failure in mce-inject module Koralahalli Channabasappa, Smita

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2c4c2359-f662-cd5d-e9ab-da5c71ca7dc9@amd.com \
    --to=smita.koralahallichannabasappa@amd.com \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    --cc=yazen.ghannam@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.