[MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1

From: mark gross <mgross@linux.intel.com>
To: speck@linutronix.de
Subject: [MODERATED] Re: [PATCH 1/2] v3 more sampling fun 1
Date: Tue, 17 Mar 2020 11:56:16 -0700	[thread overview]
Message-ID: <20200317185616.GA107482@mtg-dev.jf.intel.com> (raw)
In-Reply-To: <87tv2uk6c3.fsf@nanos.tec.linutronix.de>

On Wed, Mar 11, 2020 at 09:02:52PM +0100, speck for Thomas Gleixner wrote:
> Mark,
> 
> speck for mark gross <speck@linutronix.de> writes:
> > From: mark gross <mgross@linux.intel.com>
> >
> > This patch:
> 
> git grep 'This patch' Documentation/process/

Interpretive mood / give orders to the codebase to change its behavior.

ok.

> 
> > * enables administrator to configure the mitigation off when desired
> >   using either mitigations=off or srbds=off.
> > * exports vulnerability status via sysfs
> >  
> > +/*
> > + * Match a range of steppings
> > + */
> > +
> > +struct x86_cpu_id_ext {
> > +	struct x86_cpu_id id;
> > +	__u16 steppings; /* bit map of steppings to match against */
> 
> IIRC, we asked for adding the stepping to the existing data structure,
> but I can't find any rationale somewhere why this is still separate.

Changed to change the x86_cpu_id to append a "steppings" member at the end.

> 
> If you really think hard about it then this is not needed at all. See
> below.
> 
> > +static bool srbds_off;
> > +
> > +void srbds_configure_mitigation(void)
> > +{
> > +	u64 mcu_ctrl;
> > +
> > +	if (srbds_mitigation == SRBDS_NOT_AFFECTED)
> > +		return;
> > +
> > +	if (srbds_mitigation == SRBDS_HYPERVISOR)
> > +		return;
> > +
> > +	if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
> > +		return;
> > +
> > +	rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
> > +
> > +	switch (srbds_mitigation) {
> > +	case SRBDS_MITIGATION_OFF:
> > +	case SRBDS_TSX_NOT_AFFECTED:
> 
> This mitigation state confuses the hell out of me. The text says:
> 
>  +	[SRBDS_TSX_NOT_AFFECTED]	= "Not affected (TSX disabled)",
> 
> But the enum value reads to me: TSX is not affected....
> 
>     SRBDS_NOT_AFFECTED_TSX_OFF
> 
> is a bit more intuitive. Hmm?
changed to SRBDS_NOT_AFFECTED_TSX_OFF.

> 
> > +		mcu_ctrl |= RNGDS_MITG_DIS;
> > +		break;
> > +	case SRBDS_MITIGATION_FULL:
> > +		mcu_ctrl &= ~RNGDS_MITG_DIS;
> > +		break;
> > +	default:
> > +		break;
> > +	}
> > +
> > +	wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
> > +}
> > +
> > +static void __init srbds_select_mitigation(void)
> > +{
> > +	u64 ia32_cap;
> > +
> > +	if (!boot_cpu_has_bug(X86_BUG_SRBDS)) {
> > +		srbds_mitigation = SRBDS_NOT_AFFECTED;
> > +		return;
> > +	}
> > +
> > +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> > +		srbds_mitigation = SRBDS_HYPERVISOR;
> > +		return;
> > +	}
> > +
> > +	if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) {
> > +		srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
> > +		return;
> > +	}
> > +
> > +	if (boot_cpu_has_bug(X86_BUG_SRBDS)) {
> > +		srbds_mitigation = SRBDS_MITIGATION_FULL;
> > +
> > +		ia32_cap = x86_read_arch_cap_msr();
> > +		if (ia32_cap & ARCH_CAP_MDS_NO) {
> > +			if (!boot_cpu_has(X86_FEATURE_RTM))
> > +				srbds_mitigation = SRBDS_TSX_NOT_AFFECTED;
> 
> This logic comes with an awesome amount of comments...
Added comment about checking to see if this is one of the MDS_NO systems that
supports TSX where they are only vulnerable if TSX is enabled.

> 
> > +		}
> > +	}
> > +
> > +	if (cpu_mitigations_off() || srbds_off) {
> > +		if (srbds_mitigation != SRBDS_TSX_NOT_AFFECTED)
> > +			srbds_mitigation = SRBDS_MITIGATION_OFF;
> > +	}
> > +
> > +	srbds_configure_mitigation();
> > +}
> > +
> > +static int __init srbds_parse_cmdline(char *str)
> > +{
> > +	if (!str)
> > +		return -EINVAL;
> > +
> > +	if (!strcmp(str, "off"))
> > +		srbds_off = true;
> > +
> > +	return 0;
> > +}
> > +
> 
> stray newline
> 
> > +early_param("srbds", srbds_parse_cmdline);
> > +
> 
> >  #define VULNWL(_vendor, _family, _model, _whitelist)	\
> >  	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
> > @@ -1020,6 +1021,15 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
> >  #define VULNWL_HYGON(family, whitelist)		\
> >  	VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
> >  
> > +#define VULNWL_EXT(_vendor, _family, _model, _steppings, _whitelist)	\
> > +	{ VULNWL(_vendor, _family, _model, _whitelist), _steppings }
> > +
> 
> And because this is used for a blacklist the prefix VULNWL, aka
> VULNerability White List, and the last argument make a lot of sense,
> right?

right.  Would it be ok to s/whitelist/issues or issue_mask for all these?  That
structure is a bitmask and not a list anyway.

> 
> > +#define VULNWL_INTEL_EXT(model,  whitelist)		\
> > +	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, X86_STEPPING_ANY, whitelist)
> > +
> > +#define VULNWL_INTEL_STEPPING(model, stepping, whitelist)		\
> > +	VULNWL_EXT(INTEL, 6, INTEL_FAM6_##model, stepping, whitelist)
> > +
> >  static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
> >  	VULNWL(ANY,	4, X86_MODEL_ANY,	NO_SPECULATION),
> >  	VULNWL(CENTAUR,	5, X86_MODEL_ANY,	NO_SPECULATION),
> > @@ -1075,6 +1085,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
> >  	{}
> >  };
> >  
> > +/*
> > + * to avoide corrupting the whiltelist with blacklist items lets create a list
> 
> Sentences start with uppercase letters and spell checking is available
> in most editors. Now for the content:
> 
> There is nothing to corrupt. Blacklists and whitelists do not mix.
> 
> Also what means 'lets create' here? This is a comment describing what
> the following array is used for. Facts please.
Changed / reworded and spell checked.

> 
> > + * of affected processors for issues that cannot be enumerated other than by
> > + * family/model/stepping
> > + */
> > +static const struct x86_cpu_id_ext affected_cpus[] __initconst = {
> > +	VULNWL_INTEL_EXT(IVYBRIDGE,		SRBDS),
> 
> 
> > +	VULNWL_INTEL_EXT(HASWELL,		SRBDS),
> > +	VULNWL_INTEL_EXT(HASWELL_L,		SRBDS),
> > +	VULNWL_INTEL_EXT(HASWELL_G,		SRBDS),
> > +	VULNWL_INTEL_EXT(BROADWELL_G,		SRBDS),
> > +	VULNWL_INTEL_EXT(BROADWELL,		SRBDS),
> > +	VULNWL_INTEL_EXT(SKYLAKE_L,		SRBDS),
> > +	VULNWL_INTEL_EXT(SKYLAKE,		SRBDS),
> > +	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xA, 0),	SRBDS), /*06_8E steppings <=A*/
> > +	VULNWL_INTEL_STEPPING(KABYLAKE_L, GENMASK(0xC, 0xB),	SRBDS),
> > /*06_8E stepping = 0xB|0xC if TSX enabled*/
> 
> This is beyond confusing because this should either be expressed in the
> vulnerability itself, i.e. SRBDS_TSX_ONLY, or just commented along with
> the comment in srbds_select_mitigation()

reduced by 2 entries and removed talk of TSX enabled

> 
> > +	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xB, 0),	SRBDS), /*06_9E steppings <=B*/
> > +	VULNWL_INTEL_STEPPING(KABYLAKE, GENMASK(0xD, 0xC),	SRBDS), /*06_9E stepping = 0xC if TSX enabled*/
> 
>         Comment and code do not match.
Fixed.

> 
> Aside of this whole thing is utter garbage, really.
> 
> #define X86_STEPPING_MAX	15
> #define STEPSHIFT		16
> 
> #define ISVULN(_vendor, family, model, minstep, maxstep, vulns)		\
> 	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY,	\
>           GENMASK(minstep, maxstep) << STEPSHIFT | vulns }
> 
> #define ISVULN_INTEL(model, minstep, maxstep, vulns)			\
> 	ISVULN(INTEL, 6, INTEL_FAM6_##model, minstep, maxstep, vulns)
> 
> ....
> 
> /* List of affected CPUs identified by model and stepping range. */
> static const struct x86_cpu_id affected_cpus[] __initconst = {
> 	ISVULN_INTEL(HASWELL,		0, X86_STEPPING_MAX,	SRBDS),
> 	ISVULN_INTEL(BROADWELL_G,	0, X86_STEPPING_MAX,	SRBDS),
> 
> 	/* Kabylake L steppings 0xB, 0xC only affected when TSX in on */
> 	ISVULN_INTEL(KABYLAKE_L,	0, 0xC,			SRBDS),
> 
>         /* Kabylake steppings 0xC, 0xD only affected when TSX in on */
> 	ISVULN_INTEL(KABYLAKE,		0, 0xD,			SRBDS),
>         {}
> };
> 
> Now:
> 
> static bool __init cpu_matches(unsigned long which)
> {
>   	const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
> 
> -	return m && !!(m->driver_data & which);
> +  	return m && (m->driver_data & which) == which;
> }
>   
Yes, this is a clever use of encoding steppings into 16 bits of the driver_data
member but, its not very straight forward compared to adding a steppings data
member to the end of the existing structure.

> > +static bool __init cpu_affected(unsigned long which)
> > +{
> > +	const struct x86_cpu_id_ext *m = x86_match_cpu_ext(affected_cpus);
> > +
> > +	return m && !!(m->id.driver_data & which);
> > +}
> 
> Which makes this go away
> 
> >  u64 x86_read_arch_cap_msr(void)
> >  {
> >  	u64 ia32_cap = 0;
> > @@ -1124,6 +1162,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
> >  	if (!cpu_matches(NO_SWAPGS))
> >  		setup_force_cpu_bug(X86_BUG_SWAPGS);
> >  
> > +	if (cpu_affected(SRBDS))
> > +		setup_force_cpu_bug(X86_BUG_SRBDS);
> 
> and this becomes:
> 
> +    	if (cpu_matches(BIT(boot_cpu_data.x86_stepping + STEPSHIFT) | which))
> +		setup_force_cpu_bug(X86_BUG_SRBDS);
> 
> Too much code reuse, right?
Maybe.

> 
> >  	/*
> >  	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
> >  	 *	- TSX is supported or
> > diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
> > index 37fdefd14f28..22d419080fd6 100644
> > --- a/arch/x86/kernel/cpu/cpu.h
> > +++ b/arch/x86/kernel/cpu/cpu.h
> > @@ -44,7 +44,20 @@ struct _tlb_table {
> >  extern const struct cpu_dev *const __x86_cpu_dev_start[],
> >  			    *const __x86_cpu_dev_end[];
> >  
> > +enum srbds_mitigations {
> > +	SRBDS_NOT_AFFECTED,
> > +	SRBDS_MITIGATION_OFF,
> > +	SRBDS_MITIGATION_UCODE_NEEDED,
> > +	SRBDS_MITIGATION_FULL,
> > +	SRBDS_TSX_NOT_AFFECTED,
> > +	SRBDS_HYPERVISOR,
> > +};
> > +
> > +extern __ro_after_init enum srbds_mitigations srbds_mitigation;
> 
> And this needs to be public because the only user is in bugs.c, right?
right.  this is now gone.

> 
> > +void srbds_configure_mitigation(void);
> > +
> >  #ifdef CONFIG_CPU_SUP_INTEL
> > +
> >  enum tsx_ctrl_states {
> >  	TSX_CTRL_ENABLE,
> >  	TSX_CTRL_DISABLE,
> > diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
> > index be82cd5841c3..1b083a2a415b 100644
> > --- a/arch/x86/kernel/cpu/intel.c
> > +++ b/arch/x86/kernel/cpu/intel.c
> > @@ -684,6 +684,8 @@ static void init_intel(struct cpuinfo_x86 *c)
> >  		tsx_enable();
> >  	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
> >  		tsx_disable();
> > +
> > +	srbds_configure_mitigation();
> >  }
> >  
> >  #ifdef CONFIG_X86_32
> > diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
> > index 6dd78d8235e4..118c503b1c36 100644
> > --- a/arch/x86/kernel/cpu/match.c
> > +++ b/arch/x86/kernel/cpu/match.c
> > @@ -49,6 +49,32 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
> >  }
> >  EXPORT_SYMBOL(x86_match_cpu);
> >  
> > +/*
> > + * Extend x86_match_cpu to support matching a range of steppings.
> > + */
> > +const struct x86_cpu_id_ext *x86_match_cpu_ext(const struct x86_cpu_id_ext *match)
> > +{
> > +	const struct x86_cpu_id_ext *m;
> > +	struct cpuinfo_x86 *c = &boot_cpu_data;
> > +
> > +	for (m = match; m->id.vendor | m->id.family | m->id.model | m->id.feature; m++) {
> > +		if (m->id.vendor != X86_VENDOR_ANY && c->x86_vendor != m->id.vendor)
> > +			continue;
> > +		if (m->id.family != X86_FAMILY_ANY && c->x86 != m->id.family)
> > +			continue;
> > +		if (m->id.model != X86_MODEL_ANY && c->x86_model != m->id.model)
> > +			continue;
> > +		if (m->steppings != X86_STEPPING_ANY &&
> > +		    !(BIT(c->x86_stepping) & m->steppings))
> > +			continue;
> > +		if (m->id.feature != X86_FEATURE_ANY && !cpu_has(c, m->id.feature))
> > +			continue;
> > +		return m;
> > +	}
> > +	return NULL;
> > +}
> > +EXPORT_SYMBOL(x86_match_cpu_ext);
> 
> Sigh, aside of being pointless duplicated code:
> 
> If we'd really need this then it can share most of the code with
> x86_match_cpu(), but copy and paste is more fancy, right? You even
> copied the export just in case ...

duplication is removed. function refactored.

> 
> >  static const struct x86_cpu_desc *
> >  x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
> >  {
> > diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
> > index 6265871a4af2..d69e094e790c 100644
> > --- a/drivers/base/cpu.c
> > +++ b/drivers/base/cpu.c
> > @@ -567,6 +567,12 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
> >  	return sprintf(buf, "Not affected\n");
> >  }
> >  
> > +ssize_t __weak cpu_show_special_register_data_sampling(struct device *dev,
> > +						       struct device_attribute *attr, char *buf)
> > +{
> > +	return sprintf(buf, "Not affected\n");
> > +}
> > +
> >  static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
> >  static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
> >  static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
> > @@ -575,6 +581,7 @@ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
> >  static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
> >  static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
> >  static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
> > +static DEVICE_ATTR(special_register_data_sampling, 0444, cpu_show_special_register_data_sampling, NULL);
> > 
> >  static struct attribute *cpu_root_vulnerabilities_attrs[] = {
> >  	&dev_attr_meltdown.attr,
> > @@ -585,6 +592,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
> >  	&dev_attr_mds.attr,
> >  	&dev_attr_tsx_async_abort.attr,
> >  	&dev_attr_itlb_multihit.attr,
> > +	&dev_attr_special_register_data_sampling.attr,
> 
> This still lacks an entry in:
> 
>   Documentation/ABI/testing/sysfs-devices-system-cpu
> 
> as requested by Greg several times.
Done.

I like your review feedback, its not ambiguous.

Thank you.

I should have an updated posting hopefully tomorrow.
--mark