All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Kohei Tarumizu <tarumizu.kohei@fujitsu.com>,
	catalin.marinas@arm.com, will@kernel.org, mingo@redhat.com,
	bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
	hpa@zytor.com, linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, fenghua.yu@intel.com,
	reinette.chatre@intel.com
Cc: tarumizu.kohei@fujitsu.com
Subject: Re: [PATCH v3 7/9] x86: Add hardware prefetch control support for x86
Date: Thu, 21 Apr 2022 00:27:09 +0200	[thread overview]
Message-ID: <87czhbtmpu.ffs@tglx> (raw)
In-Reply-To: <20220420030223.689259-8-tarumizu.kohei@fujitsu.com>

Kohei!

On Wed, Apr 20 2022 at 12:02, Kohei Tarumizu wrote:
> +#define BROADWELL_L2_HWPF_FIELD		BIT_ULL(0)
> +#define BROADWELL_L2_ACLPF_FIELD	BIT_ULL(1)
> +#define BROADWELL_DCU_HWPF_FIELD	BIT_ULL(2)
> +#define BROADWELL_DCU_IPPF_FIELD	BIT_ULL(3)

Prefetch control is supported on a lot of CPU models and all except Xeon
PHI have the same MSR layout. ATOMs do not support L2_ACL and DCU_IP,
but that's it. So there is absolutely nothing broadwell specific here.

> +static int broadwell_get_hwpf_enable(u64 reg, unsigned int level)
> +static int broadwell_modify_hwpf_enable(u64 *reg, unsigned int level, u64 val)
> +static int broadwell_get_ippf_enable(u64 reg, unsigned int level)
> +static int broadwell_modify_ippf_enable(u64 *reg, unsigned int level, u64 val)
> +static int broadwell_get_aclpf_enable(u64 reg, unsigned int level)
> +static int broadwell_modify_aclpf_enable(u64 *reg, unsigned int level, u64 val)

Why do we need six explicit functions, which are pretty much copy and paste?

The only difference is the bit they operate on. It's just a matter of
proper wrapper structs.

/* Global */
struct pfctl_attr {
	unsigned int		level;
	unsigned int		type;
	struct device_attribute	**attr;
};

/* x86 */
enum {
	TYPE_L12_BASE,
	TYPE_L12_PLUS,
	TYPE_L12_XPHI,
};

struct x86_pfctl_attr {
	struct device_attribute		attr;
	u64				mask;
};

static ssize_t pfctl_show(struct device *pfctl_dev, struct device_attribute *attr, char *buf)
{
	struct x86_pfctl_attr *xa = container_of(attr, struct x86_pfctl_attr, attr);
	int cpu = pfctl_dev_get_cpu(pfctl_dev);
	u64 val;

	rdmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, &val);
	return sprintf(buf, "%d\n", val & xa->mask ? 0 : 1);
}

static DEFINE_MUTEX(pfctl_mutex);

static ssize_t pfctl_store(struct device *cache_dev, struct device_attribute *attr,
			   const char *buf, size_t size)
{
	struct x86_pfctl_attr *xa = container_of(attr, struct x86_pfctl_attr, attr);
	int cpu = pfctl_dev_get_cpu(pfctl_dev);
	bool enable;
	u64 val;

	if (strtobool(buf, &enable) < 0)
		return -EINVAL;

	/* MSR_MISC_FEATURE_CONTROL is per core, not per thread */
	mutex_lock(&pfctl_mutex);
	rdmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, &val);
	if (enable)
		val &= ~xa->mask;
	else
		val |= xa->mask;
	wrmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, val);
	mutex_unlock(&pfctl_mutex);
	return 0;
}

#define PFCTL_ATTR(_name, _level, _type, _bit)				\
	struct x86_pfctl_attr attr_l##_level##_##_name = {		\
		.attr	= __ATTR(_name, 0600, pfctl_show, pfctl_store),	\
		.mask = BIT_ULL(_bit), }

static PFCTL_ATTR(prefetch,     1, CACHE_TYPE_DATA,    2);
static PFCTL_ATTR(prefetch,     2, CACHE_TYPE_UNIFIED, 0);
static PFCTL_ATTR(prefetch_ip,  1, CACHE_TYPE_DATA,    3);
static PFCTL_ATTR(prefetch_acl, 2, CACHE_TYPE_UNIFIED, 1);

static struct device_attribute *l1_attrs[] __ro_after_init = {
	&attr_l1_prefetch.attr,
	&attr_l1_prefetch_ip.attr,
	NULL,
};

static struct device_attribute *l2_attrs[] __ro_after_init = {
	&attr_l2_prefetch.attr,
	&attr_l2_prefetch_acl.attr,
	NULL,
};

static const struct pfctl_group pfctl_groups[] = {
	{
		.level = 1,
		.type = CACHE_TYPE_DATA,
		.attrs = l1_attrs,
	},
	{
		.level = 2,
		.type = CACHE_TYPE_UNIFIED,
		.attrs = l2_attrs,
	},
	{
		.attrs = NULL,
	},
};

static const __initconst struct x86_cpu_id pfctl_match[] = {
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,	TYPE_L12_BASE),
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	TYPE_L12_BASE),
	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		TYPE_L12_PLUS),
	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		TYPE_L12_PLUS),
	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	TYPE_L12_XPHI),
        { },
};

static __init int pfctl_init(void)
{
	const struct x86_cpu_id *m;

	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
		return -ENODEV;

	m = x86_match_cpu(pfctl_match);
	if (!m)
		return -ENODEV;

	switch (m->driver_data) {
	case TYPE_L12_BASE:
		l1_attrs[1] = NULL;
		l2_attrs[1] = NULL;
		break;
	case TYPE_L12_PLUS:
		break;
	case TYPE_L12_XPHI:
		attr_l1_prefetch.mask = BIT_ULL(0);
		attr_l2_prefetch.mask = BIT_ULL(1);
		break;
	default:
		return -ENODEV;
	};

	return pfctl_register_attrs(pfctl_groups);
}
late_initcall(pfctl_init);

See? One show() and one store() function which operates directly at the
attribute level and supports all known variants of bits in the control
MSR. No magic global constants, no visible management, no hardcoded
type/level relations... Simple and straight forward.

All what the core code needs to do is to populate the attributes in the
sys/.../cache/index$N/ directories when level and type matches.

I'm pretty sure you can simplify the A64FX code in a very similar way.

Thanks,

        tglx

WARNING: multiple messages have this Message-ID (diff)
From: Thomas Gleixner <tglx@linutronix.de>
To: Kohei Tarumizu <tarumizu.kohei@fujitsu.com>,
	catalin.marinas@arm.com, will@kernel.org, mingo@redhat.com,
	bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
	hpa@zytor.com, linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, fenghua.yu@intel.com,
	reinette.chatre@intel.com
Cc: tarumizu.kohei@fujitsu.com
Subject: Re: [PATCH v3 7/9] x86: Add hardware prefetch control support for x86
Date: Thu, 21 Apr 2022 00:27:09 +0200	[thread overview]
Message-ID: <87czhbtmpu.ffs@tglx> (raw)
In-Reply-To: <20220420030223.689259-8-tarumizu.kohei@fujitsu.com>

Kohei!

On Wed, Apr 20 2022 at 12:02, Kohei Tarumizu wrote:
> +#define BROADWELL_L2_HWPF_FIELD		BIT_ULL(0)
> +#define BROADWELL_L2_ACLPF_FIELD	BIT_ULL(1)
> +#define BROADWELL_DCU_HWPF_FIELD	BIT_ULL(2)
> +#define BROADWELL_DCU_IPPF_FIELD	BIT_ULL(3)

Prefetch control is supported on a lot of CPU models and all except Xeon
PHI have the same MSR layout. ATOMs do not support L2_ACL and DCU_IP,
but that's it. So there is absolutely nothing broadwell specific here.

> +static int broadwell_get_hwpf_enable(u64 reg, unsigned int level)
> +static int broadwell_modify_hwpf_enable(u64 *reg, unsigned int level, u64 val)
> +static int broadwell_get_ippf_enable(u64 reg, unsigned int level)
> +static int broadwell_modify_ippf_enable(u64 *reg, unsigned int level, u64 val)
> +static int broadwell_get_aclpf_enable(u64 reg, unsigned int level)
> +static int broadwell_modify_aclpf_enable(u64 *reg, unsigned int level, u64 val)

Why do we need six explicit functions, which are pretty much copy and paste?

The only difference is the bit they operate on. It's just a matter of
proper wrapper structs.

/* Global */
struct pfctl_attr {
	unsigned int		level;
	unsigned int		type;
	struct device_attribute	**attr;
};

/* x86 */
enum {
	TYPE_L12_BASE,
	TYPE_L12_PLUS,
	TYPE_L12_XPHI,
};

struct x86_pfctl_attr {
	struct device_attribute		attr;
	u64				mask;
};

static ssize_t pfctl_show(struct device *pfctl_dev, struct device_attribute *attr, char *buf)
{
	struct x86_pfctl_attr *xa = container_of(attr, struct x86_pfctl_attr, attr);
	int cpu = pfctl_dev_get_cpu(pfctl_dev);
	u64 val;

	rdmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, &val);
	return sprintf(buf, "%d\n", val & xa->mask ? 0 : 1);
}

static DEFINE_MUTEX(pfctl_mutex);

static ssize_t pfctl_store(struct device *cache_dev, struct device_attribute *attr,
			   const char *buf, size_t size)
{
	struct x86_pfctl_attr *xa = container_of(attr, struct x86_pfctl_attr, attr);
	int cpu = pfctl_dev_get_cpu(pfctl_dev);
	bool enable;
	u64 val;

	if (strtobool(buf, &enable) < 0)
		return -EINVAL;

	/* MSR_MISC_FEATURE_CONTROL is per core, not per thread */
	mutex_lock(&pfctl_mutex);
	rdmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, &val);
	if (enable)
		val &= ~xa->mask;
	else
		val |= xa->mask;
	wrmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, val);
	mutex_unlock(&pfctl_mutex);
	return 0;
}

#define PFCTL_ATTR(_name, _level, _type, _bit)				\
	struct x86_pfctl_attr attr_l##_level##_##_name = {		\
		.attr	= __ATTR(_name, 0600, pfctl_show, pfctl_store),	\
		.mask = BIT_ULL(_bit), }

static PFCTL_ATTR(prefetch,     1, CACHE_TYPE_DATA,    2);
static PFCTL_ATTR(prefetch,     2, CACHE_TYPE_UNIFIED, 0);
static PFCTL_ATTR(prefetch_ip,  1, CACHE_TYPE_DATA,    3);
static PFCTL_ATTR(prefetch_acl, 2, CACHE_TYPE_UNIFIED, 1);

static struct device_attribute *l1_attrs[] __ro_after_init = {
	&attr_l1_prefetch.attr,
	&attr_l1_prefetch_ip.attr,
	NULL,
};

static struct device_attribute *l2_attrs[] __ro_after_init = {
	&attr_l2_prefetch.attr,
	&attr_l2_prefetch_acl.attr,
	NULL,
};

static const struct pfctl_group pfctl_groups[] = {
	{
		.level = 1,
		.type = CACHE_TYPE_DATA,
		.attrs = l1_attrs,
	},
	{
		.level = 2,
		.type = CACHE_TYPE_UNIFIED,
		.attrs = l2_attrs,
	},
	{
		.attrs = NULL,
	},
};

static const __initconst struct x86_cpu_id pfctl_match[] = {
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,	TYPE_L12_BASE),
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	TYPE_L12_BASE),
	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		TYPE_L12_PLUS),
	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		TYPE_L12_PLUS),
	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	TYPE_L12_XPHI),
        { },
};

static __init int pfctl_init(void)
{
	const struct x86_cpu_id *m;

	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
		return -ENODEV;

	m = x86_match_cpu(pfctl_match);
	if (!m)
		return -ENODEV;

	switch (m->driver_data) {
	case TYPE_L12_BASE:
		l1_attrs[1] = NULL;
		l2_attrs[1] = NULL;
		break;
	case TYPE_L12_PLUS:
		break;
	case TYPE_L12_XPHI:
		attr_l1_prefetch.mask = BIT_ULL(0);
		attr_l2_prefetch.mask = BIT_ULL(1);
		break;
	default:
		return -ENODEV;
	};

	return pfctl_register_attrs(pfctl_groups);
}
late_initcall(pfctl_init);

See? One show() and one store() function which operates directly at the
attribute level and supports all known variants of bits in the control
MSR. No magic global constants, no visible management, no hardcoded
type/level relations... Simple and straight forward.

All what the core code needs to do is to populate the attributes in the
sys/.../cache/index$N/ directories when level and type matches.

I'm pretty sure you can simplify the A64FX code in a very similar way.

Thanks,

        tglx

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2022-04-20 22:27 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-20  3:02 [PATCH v3 0/9] Add hardware prefetch control driver for A64FX and x86 Kohei Tarumizu
2022-04-20  3:02 ` Kohei Tarumizu
2022-04-20  3:02 ` [PATCH v3 1/9] drivers: base: Add hardware prefetch control core driver Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20 21:40   ` Thomas Gleixner
2022-04-20 21:40     ` Thomas Gleixner
2022-04-22 12:10     ` tarumizu.kohei
2022-04-22 12:10       ` tarumizu.kohei
2022-04-21  6:18   ` Greg KH
2022-04-21  6:18     ` Greg KH
2022-04-22 12:30     ` tarumizu.kohei
2022-04-22 12:30       ` tarumizu.kohei
2022-04-20  3:02 ` [PATCH v3 2/9] drivers: base: Add Kconfig/Makefile to build " Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20  3:02 ` [PATCH v3 3/9] soc: fujitsu: Add hardware prefetch control support for A64FX Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20  3:02 ` [PATCH v3 4/9] soc: fujitsu: Add Kconfig/Makefile to build hardware prefetch control driver Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20 22:14   ` Thomas Gleixner
2022-04-20 22:14     ` Thomas Gleixner
2022-04-20  3:02 ` [PATCH v3 5/9] arm64: Create cache sysfs directory without ACPI PPTT for hardware prefetch control Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20  3:02 ` [PATCH v3 6/9] x86: resctrl: pseudo_lock: Fix to restore to original value when re-enabling hardware prefetch register Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20 20:56   ` Dave Hansen
2022-04-20 20:56     ` Dave Hansen
2022-04-22 12:01     ` tarumizu.kohei
2022-04-22 12:01       ` tarumizu.kohei
2022-04-25 23:17   ` Reinette Chatre
2022-04-25 23:17     ` Reinette Chatre
2022-04-27  2:51     ` tarumizu.kohei
2022-04-27  2:51       ` tarumizu.kohei
2022-04-20  3:02 ` [PATCH v3 7/9] x86: Add hardware prefetch control support for x86 Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20 22:27   ` Thomas Gleixner [this message]
2022-04-20 22:27     ` Thomas Gleixner
2022-04-22 12:16     ` tarumizu.kohei
2022-04-22 12:16       ` tarumizu.kohei
2022-04-20  3:02 ` [PATCH v3 8/9] x86: Add Kconfig/Makefile to build hardware prefetch control driver Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu
2022-04-20  3:02 ` [PATCH v3 9/9] docs: ABI: Add sysfs documentation interface of " Kohei Tarumizu
2022-04-20  3:02   ` Kohei Tarumizu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87czhbtmpu.ffs@tglx \
    --to=tglx@linutronix.de \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=reinette.chatre@intel.com \
    --cc=tarumizu.kohei@fujitsu.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.