linux-hwmon.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters
@ 2020-04-24  3:20 Naveen Krishna Chatradhi
  2020-04-24  3:20 ` [PATCH v4 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
  2020-04-24  3:20 ` [PATCH v4 3/3] MAINTAINERS: add entry for AMD energy driver Naveen Krishna Chatradhi
  0 siblings, 2 replies; 8+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-04-24  3:20 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

This patch adds hwmon based amd_energy driver support for
family 17h processors from AMD.

The driver provides following interface to the userspace
1. Reports the per core consumption
	* file: "energy%d_input", label: "Ecore%03d"
2. Reports per socket energy consumption
	* file: "energy%d_input", label: "Esocket%d"
2. Reports scaled energy value in Joules.

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
Changes in v4:
1. Use num_present_cpus() instead num_online_cpu(), as the latter does
   not take offline cpus into account.
2. run checkpatch.pl --strict and fix alignment issues
3. Fixed other comments from Guenter Roeck

 drivers/hwmon/Kconfig      |  10 ++
 drivers/hwmon/Makefile     |   1 +
 drivers/hwmon/amd_energy.c | 250 +++++++++++++++++++++++++++++++++++++
 3 files changed, 261 insertions(+)
 create mode 100644 drivers/hwmon/amd_energy.c

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 4c62f900bf7e..e165e10c49ef 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
 	  This driver can also be built as a module. If so, the module
 	  will be called fam15h_power.
 
+config SENSORS_AMD_ENERGY
+	tristate "AMD RAPL MSR based Energy driver"
+	depends on X86
+	help
+	  If you say yes here you get support for core and package energy
+	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called as amd_energy.
+
 config SENSORS_APPLESMC
 	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
 	depends on INPUT && X86
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index b0b9c8e57176..318f89dc7133 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
 obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
 obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
 obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
+obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
 obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
 obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
 obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
new file mode 100644
index 000000000000..7162c80edd10
--- /dev/null
+++ b/drivers/hwmon/amd_energy.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ */
+
+#include <asm/cpu_device_id.h>
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/hwmon.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/processor.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define DRVNAME			"amd_energy"
+
+#define ENERGY_PWR_UNIT_MSR	0xC0010299
+#define ENERGY_CORE_MSR		0xC001029A
+#define ENERGY_PKG_MSR		0xC001029B
+
+#define AMD_TIME_UNIT_MASK	0xF0000
+#define AMD_ENERGY_UNIT_MASK	0x01F00
+#define AMD_POWER_UNIT_MASK	0x0000F
+
+#define ENERGY_STATUS_MASK	0xffffffff
+
+#define AMD_FAM_17		0x17 /* ZP, SSP */
+
+/* Useful macros */
+#define AMD_CPU_FAM_ANY(_family, _model)	\
+{						\
+	.vendor		= X86_VENDOR_AMD,	\
+	.family		= _family,		\
+	.model		= _model,		\
+	.feature	= X86_FEATURE_ANY,	\
+}
+
+#define AMD_CPU_FAM(_model)			\
+	AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
+
+struct amd_energy_data {
+	struct hwmon_channel_info energy_info;
+	const struct hwmon_channel_info *info[2];
+	struct hwmon_chip_info chip;
+};
+
+static int nr_cpus, nr_socks;
+static u64 energy_units;
+
+static int amd_energy_read_labels(struct device *dev,
+				  enum hwmon_sensor_types type,
+				 u32 attr, int channel,
+				 const char **str)
+{
+	char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
+
+	if (channel >= nr_cpus)
+		scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
+	else
+		scnprintf(buf, 9, "Ecore%03u", channel);
+
+	*str = buf;
+
+	return 0;
+}
+
+static int get_energy_units(void)
+{
+	u64 rapl_units;
+	int ret;
+
+	ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
+	if (ret)
+		return -EAGAIN;
+
+	energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
+	return 0;
+}
+
+static int amd_energy_read(struct device *dev,
+			   enum hwmon_sensor_types type,
+			   u32 attr, int channel, long *val)
+{
+	u64 value;
+	int cpu, ret;
+	u32 reg;
+
+	if (channel >= nr_cpus) {
+		reg = ENERGY_PKG_MSR;
+		cpu = cpumask_first_and(cpu_online_mask,
+					cpumask_of_node
+					(channel - nr_cpus));
+	} else {
+		reg = ENERGY_CORE_MSR;
+		cpu = channel;
+	}
+
+	if (!cpu_online(cpu))
+		return -ENODEV;
+
+	ret = rdmsrl_safe_on_cpu(cpu, reg, &value);
+	if (ret)
+		return -EAGAIN;
+
+	if (energy_units == 0 && get_energy_units())
+		return -EAGAIN;
+
+	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
+	*val = (long)value * div64_ul(1000000UL, BIT(energy_units));
+
+	return 0;
+}
+
+static umode_t amd_energy_is_visible(const void *_data,
+				     enum hwmon_sensor_types type,
+				     u32 attr, int channel)
+{
+	return 0444;
+}
+
+static const struct hwmon_ops amd_energy_ops = {
+	.is_visible = amd_energy_is_visible,
+	.read = amd_energy_read,
+	.read_string = amd_energy_read_labels,
+};
+
+static int amd_create_sensor(struct device *dev,
+			     struct amd_energy_data *data,
+			     u8 type, u32 config)
+{
+	int i;
+	u32 *s_config;
+	struct hwmon_channel_info *info = &data->energy_info;
+
+	nr_socks = num_possible_nodes();
+	nr_cpus = num_present_cpus();
+
+	s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
+				sizeof(u32), GFP_KERNEL);
+	if (!s_config)
+		return -ENOMEM;
+
+	info->type = type;
+	info->config = s_config;
+
+	for (i = 0; i < nr_cpus + nr_socks; i++)
+		s_config[i] = config;
+
+	return 0;
+}
+
+static int amd_energy_probe(struct platform_device *pdev)
+{
+	struct device *hwmon_dev;
+	struct amd_energy_data *data;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	ret = get_energy_units();
+
+	data = devm_kzalloc(dev, sizeof(struct amd_energy_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->chip.ops = &amd_energy_ops;
+	data->chip.info = data->info;
+
+	/* Populate per-core energy reporting */
+	data->info[0] = &data->energy_info;
+	amd_create_sensor(dev, data,  hwmon_energy,
+			  HWMON_E_INPUT | HWMON_E_LABEL);
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
+							 data,
+							 &data->chip,
+							 NULL);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static int amd_energy_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct platform_device_id amd_energy_ids[] = {
+	{ .name = DRVNAME, },
+	{}
+};
+MODULE_DEVICE_TABLE(platform, amd_energy_ids);
+
+static struct platform_driver amd_energy_driver = {
+	.probe = amd_energy_probe,
+	.remove	= amd_energy_remove,
+	.id_table = amd_energy_ids,
+	.driver = {
+		.name = DRVNAME,
+	},
+};
+
+static struct platform_device *amd_energy_platdev;
+
+static const struct x86_cpu_id cpu_ids[] __initconst = {
+	AMD_CPU_FAM(17),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
+
+static int __init amd_energy_init(void)
+{
+	int ret;
+
+	if (!x86_match_cpu(cpu_ids))
+		return -ENODEV;
+
+	ret = platform_driver_register(&amd_energy_driver);
+	if (ret < 0)
+		return ret;
+
+	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
+	if (!amd_energy_platdev)
+		return -ENOMEM;
+
+	ret = platform_device_add(amd_energy_platdev);
+	if (ret) {
+		platform_device_unregister(amd_energy_platdev);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit amd_energy_exit(void)
+{
+	platform_device_unregister(amd_energy_platdev);
+	platform_driver_unregister(&amd_energy_driver);
+}
+
+module_init(amd_energy_init);
+module_exit(amd_energy_exit);
+
+MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
+MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
+MODULE_LICENSE("GPL");
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 2/3] hwmon: (amd_energy) Add documentation
  2020-04-24  3:20 [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
@ 2020-04-24  3:20 ` Naveen Krishna Chatradhi
  2020-04-24  3:20 ` [PATCH v4 3/3] MAINTAINERS: add entry for AMD energy driver Naveen Krishna Chatradhi
  1 sibling, 0 replies; 8+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-04-24  3:20 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

Document amd_energy driver with all chips supported by it.

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
Changes in v4:
None

 Documentation/hwmon/amd_energy.rst | 84 ++++++++++++++++++++++++++++++
 Documentation/hwmon/index.rst      |  1 +
 2 files changed, 85 insertions(+)
 create mode 100644 Documentation/hwmon/amd_energy.rst

diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
new file mode 100644
index 000000000000..b5d176deec1d
--- /dev/null
+++ b/Documentation/hwmon/amd_energy.rst
@@ -0,0 +1,84 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver amd_energy
+==========================
+
+Supported chips:
+
+* AMD Family 17h Processors
+
+  Prefix: 'amd_energy'
+
+  Addresses used:  RAPL MSRs
+
+  Datasheets:
+
+  - Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors
+
+	https://developer.amd.com/wp-content/resources/55570-B1_PUB.zip
+
+  - Preliminary Processor Programming Reference (PPR) for AMD Family 17h Model 31h, Revision B0 Processors
+
+	https://developer.amd.com/wp-content/resources/56176_ppr_Family_17h_Model_71h_B0_pub_Rev_3.06.zip
+
+Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
+
+Description
+-----------
+
+The Energy driver exposes the energy counters that are
+reported via the Running Average Power Limit (RAPL)
+Model-specific Registers (MSRs) via the hardware monitor
+(HWMON) sysfs interface.
+
+1. Power, Energy and Time Units
+   MSR_RAPL_POWER_UNIT/ C001_0299:
+     shared with all cores in the socket
+
+2. Energy consumed by each Core
+   MSR_CORE_ENERGY_STATUS/ C001_029A:
+     32-bitRO, Accumulator, core-level power reporting
+
+3. Energy consumed by Socket
+   MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
+     32-bitRO, Accumulator, socket-level power reporting,
+     shared with all cores in socket
+
+These registers are updated every 1ms and cleared on
+reset of the system.
+
+Energy Caluclation
+------------------
+
+Energy information (in Joules) is based on the multiplier,
+1/2^ESU; where ESU is an unsigned integer read from
+MSR_RAPL_POWER_UNIT register. Default value is 10000b,
+indicating energy status unit is 15.3 micro-Joules increment.
+
+Reported values are scaled as per the formula
+
+scaled value = ((1/2^ESU) * (Raw value) * 1000000UL) in Joules
+
+Users calculate power for a given domain by calculating
+	dEnergy/dTime for that domain.
+
+Sysfs attributes
+----------------
+
+=============== ========  =====================================
+Attribute	Label	  Description
+===============	========  =====================================
+
+* For index N between [1] and [nr_cpus]
+
+===============	========  ======================================
+energy[N]_input EcoreX	  Core Energy   X = [0] to [nr_cpus - 1]
+			  Measured input core energy
+===============	========  ======================================
+
+* For N between [nr_cpus] and [nr_cpus + nr_socks]
+
+===============	========  ======================================
+energy[N]_input EsocketX  Socket Energy X = [0] to [nr_socks -1]
+			  Measured input socket energy
+=============== ========  ======================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8ef62fd39787..fc4b89810e67 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
    adt7470
    adt7475
    amc6821
+   amd_energy
    asb100
    asc7621
    aspeed-pwm-tacho
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 3/3] MAINTAINERS: add entry for AMD energy driver
  2020-04-24  3:20 [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
  2020-04-24  3:20 ` [PATCH v4 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
@ 2020-04-24  3:20 ` Naveen Krishna Chatradhi
  1 sibling, 0 replies; 8+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-04-24  3:20 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

The kernel driver is part of HWMON subsystem.

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
Changes in v4:
None

 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c1175fc0aadb..112ffd7742aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -842,6 +842,13 @@ S:	Supported
 T:	git git://people.freedesktop.org/~agd5f/linux
 F:	drivers/gpu/drm/amd/display/
 
+AMD ENERGY DRIVER
+M:	Naveen Krishna Chatradhi <nchatrad@amd.com>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/hwmon/amd_energy.rst
+F:	drivers/hwmon/amd_energy.c
+
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
 M:	Huang Rui <ray.huang@amd.com>
 L:	linux-hwmon@vger.kernel.org
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-04-27 15:04     ` Naveen Krishna Ch
@ 2020-05-05 15:43       ` Naveen Krishna Ch
  0 siblings, 0 replies; 8+ messages in thread
From: Naveen Krishna Ch @ 2020-05-05 15:43 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter

On Mon, 27 Apr 2020 at 20:34, Naveen Krishna Ch
<naveenkrishna.ch@gmail.com> wrote:
>
> Hi Guenter,
>
> On Mon, 27 Apr 2020 at 19:04, Guenter Roeck <linux@roeck-us.net> wrote:
> >
> > On 4/27/20 5:39 AM, Naveen Krishna Ch wrote:
> > > Hi Guenter,
> > >
> > > On Sat, 25 Apr 2020 at 21:47, Guenter Roeck <linux@roeck-us.net> wrote:
> > >>
> > >> On Fri, Apr 24, 2020 at 08:50:54AM +0530, Naveen Krishna Chatradhi wrote:
> > >>> This patch adds hwmon based amd_energy driver support for
> > >>> family 17h processors from AMD.
> > >>>
> > >>> The driver provides following interface to the userspace
> > >>> 1. Reports the per core consumption
> > >>>       * file: "energy%d_input", label: "Ecore%03d"
> > >>> 2. Reports per socket energy consumption
> > >>>       * file: "energy%d_input", label: "Esocket%d"
> > >>> 2. Reports scaled energy value in Joules.
> > >>>
> > >>> Cc: Guenter Roeck <linux@roeck-us.net>
> > >>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > >>
> > >> Couple of additional comments below.
> > >>
> > >> On a higher level, I noticed that the data overflows quickly
> > >> (ie within a day or so), which is the reason why the matching
> > >> code for Intel CPUs never made it into the kernel. With that
> > >> in mind, I do wonder if this is really valuable. I am quite
> > >> concerned about people complaining that the data is useless,
> > >> and I have to say that I find it quite useless myself. Any
> > >> system running for more than a few hours will report more or
> > >> less random data. Any thoughts on that ?
> > > This driver will also address the future platforms with
> > > support for 64-bit counters.
> > >
> > > We do have platforms that will come out very shortly, which will
> > > support a different energy resolution to increase the wraparound
> > > time with 32bit counters,
> > >
> > > On a platform with 2 sockets each with 64 cores,
> > > Assuming 240W, new resolution of 15.6 milli Joules
> > >
> > > -  Wrap around time for socket energy counter will be
> > > (up to ~3 to 4 days with maximum load).
> > >
> > > 2^32*15.625e-3/240 = 279620.2667 secs to wrap (i.e 3.2 days)
> > >
> > > - Wrap around time for the per-core energy counters with the
> > > new resolution will be
> > >
> > > 2^32*15.6e-3/ (240 * 128) = 2184533.33 secs to wrap (i.e 25 days)
> > >
> > > When a work load is to be run on certain predefined cores.
> > > The Work load managers can gather the energy status before starting
> > > and after completion of the job and measure power consumed by the
> > > work load.
> > >
> > All that doesn't help much for existing platforms, nor for users who
> > expect that counters don't wrap around at all (at least not until they
> > reach the 64-bit limit).
>
> The 3 energy counter MSRs are added newly in family 17h, and this
> driver supports family 17h and later.
>
> >
> > I see two options. Either provide power reporting instead (which should
> > be done in the k10temp driver), or implement a kernel thread which runs
> > often enough to catch wraparounds. While not perfect (it will only report
> > the energy since the driver was loaded), it will at least avoid the
> > frequent wraparounds seen today, and that caveat can be documented.
> Sure, i can updated the k10temp driver once a documented way of power
> reporting is available. For now, i will implement a kernel thread to reduce
> the wrap around and update the documentation.
I've addressed your comments and submitted the next version. Could you
review and let us your know comments. Thank you.
>
> >
> > >>
> > >> How about making the power reporting registers available and
> > >> reporting per-CPU power consumption instead ? I think that would
> > >> add much more value.
> > > We will expose power reporting when platform exposes that information.
> > > Until then, energy reporting becomes further critical.
> > >
> >
> > Some Windows tools such as HwInfo report power readings today,
> > on existing CPUs, so I don't really buy the claim that existing
> > chips don't report it.
> This driver is needed for servers based on Naples and Rome
> which runs on Linux. The energy MSRs are accumulating registers
> updated every 1ms. At present, this is a reliable and documented
> way to measure power consumption over a period. Also, there
> is no documented way to report power readings at this point.
>
> >
> > Thanks,
> > Guenter
> --
> Shine bright,
> (: Nav :)



-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-04-27 13:34   ` Guenter Roeck
@ 2020-04-27 15:04     ` Naveen Krishna Ch
  2020-05-05 15:43       ` Naveen Krishna Ch
  0 siblings, 1 reply; 8+ messages in thread
From: Naveen Krishna Ch @ 2020-04-27 15:04 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter,

On Mon, 27 Apr 2020 at 19:04, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 4/27/20 5:39 AM, Naveen Krishna Ch wrote:
> > Hi Guenter,
> >
> > On Sat, 25 Apr 2020 at 21:47, Guenter Roeck <linux@roeck-us.net> wrote:
> >>
> >> On Fri, Apr 24, 2020 at 08:50:54AM +0530, Naveen Krishna Chatradhi wrote:
> >>> This patch adds hwmon based amd_energy driver support for
> >>> family 17h processors from AMD.
> >>>
> >>> The driver provides following interface to the userspace
> >>> 1. Reports the per core consumption
> >>>       * file: "energy%d_input", label: "Ecore%03d"
> >>> 2. Reports per socket energy consumption
> >>>       * file: "energy%d_input", label: "Esocket%d"
> >>> 2. Reports scaled energy value in Joules.
> >>>
> >>> Cc: Guenter Roeck <linux@roeck-us.net>
> >>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> >>
> >> Couple of additional comments below.
> >>
> >> On a higher level, I noticed that the data overflows quickly
> >> (ie within a day or so), which is the reason why the matching
> >> code for Intel CPUs never made it into the kernel. With that
> >> in mind, I do wonder if this is really valuable. I am quite
> >> concerned about people complaining that the data is useless,
> >> and I have to say that I find it quite useless myself. Any
> >> system running for more than a few hours will report more or
> >> less random data. Any thoughts on that ?
> > This driver will also address the future platforms with
> > support for 64-bit counters.
> >
> > We do have platforms that will come out very shortly, which will
> > support a different energy resolution to increase the wraparound
> > time with 32bit counters,
> >
> > On a platform with 2 sockets each with 64 cores,
> > Assuming 240W, new resolution of 15.6 milli Joules
> >
> > -  Wrap around time for socket energy counter will be
> > (up to ~3 to 4 days with maximum load).
> >
> > 2^32*15.625e-3/240 = 279620.2667 secs to wrap (i.e 3.2 days)
> >
> > - Wrap around time for the per-core energy counters with the
> > new resolution will be
> >
> > 2^32*15.6e-3/ (240 * 128) = 2184533.33 secs to wrap (i.e 25 days)
> >
> > When a work load is to be run on certain predefined cores.
> > The Work load managers can gather the energy status before starting
> > and after completion of the job and measure power consumed by the
> > work load.
> >
> All that doesn't help much for existing platforms, nor for users who
> expect that counters don't wrap around at all (at least not until they
> reach the 64-bit limit).

The 3 energy counter MSRs are added newly in family 17h, and this
driver supports family 17h and later.

>
> I see two options. Either provide power reporting instead (which should
> be done in the k10temp driver), or implement a kernel thread which runs
> often enough to catch wraparounds. While not perfect (it will only report
> the energy since the driver was loaded), it will at least avoid the
> frequent wraparounds seen today, and that caveat can be documented.
Sure, i can updated the k10temp driver once a documented way of power
reporting is available. For now, i will implement a kernel thread to reduce
the wrap around and update the documentation.

>
> >>
> >> How about making the power reporting registers available and
> >> reporting per-CPU power consumption instead ? I think that would
> >> add much more value.
> > We will expose power reporting when platform exposes that information.
> > Until then, energy reporting becomes further critical.
> >
>
> Some Windows tools such as HwInfo report power readings today,
> on existing CPUs, so I don't really buy the claim that existing
> chips don't report it.
This driver is needed for servers based on Naples and Rome
which runs on Linux. The energy MSRs are accumulating registers
updated every 1ms. At present, this is a reliable and documented
way to measure power consumption over a period. Also, there
is no documented way to report power readings at this point.

>
> Thanks,
> Guenter
--
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-04-27 12:39 ` Naveen Krishna Ch
@ 2020-04-27 13:34   ` Guenter Roeck
  2020-04-27 15:04     ` Naveen Krishna Ch
  0 siblings, 1 reply; 8+ messages in thread
From: Guenter Roeck @ 2020-04-27 13:34 UTC (permalink / raw)
  To: Naveen Krishna Ch; +Cc: Naveen Krishna Chatradhi, linux-hwmon

On 4/27/20 5:39 AM, Naveen Krishna Ch wrote:
> Hi Guenter,
> 
> On Sat, 25 Apr 2020 at 21:47, Guenter Roeck <linux@roeck-us.net> wrote:
>>
>> On Fri, Apr 24, 2020 at 08:50:54AM +0530, Naveen Krishna Chatradhi wrote:
>>> This patch adds hwmon based amd_energy driver support for
>>> family 17h processors from AMD.
>>>
>>> The driver provides following interface to the userspace
>>> 1. Reports the per core consumption
>>>       * file: "energy%d_input", label: "Ecore%03d"
>>> 2. Reports per socket energy consumption
>>>       * file: "energy%d_input", label: "Esocket%d"
>>> 2. Reports scaled energy value in Joules.
>>>
>>> Cc: Guenter Roeck <linux@roeck-us.net>
>>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
>>
>> Couple of additional comments below.
>>
>> On a higher level, I noticed that the data overflows quickly
>> (ie within a day or so), which is the reason why the matching
>> code for Intel CPUs never made it into the kernel. With that
>> in mind, I do wonder if this is really valuable. I am quite
>> concerned about people complaining that the data is useless,
>> and I have to say that I find it quite useless myself. Any
>> system running for more than a few hours will report more or
>> less random data. Any thoughts on that ?
> This driver will also address the future platforms with
> support for 64-bit counters.
> 
> We do have platforms that will come out very shortly, which will
> support a different energy resolution to increase the wraparound
> time with 32bit counters,
> 
> On a platform with 2 sockets each with 64 cores,
> Assuming 240W, new resolution of 15.6 milli Joules
> 
> -  Wrap around time for socket energy counter will be
> (up to ~3 to 4 days with maximum load).
> 
> 2^32*15.625e-3/240 = 279620.2667 secs to wrap (i.e 3.2 days)
> 
> - Wrap around time for the per-core energy counters with the
> new resolution will be
> 
> 2^32*15.6e-3/ (240 * 128) = 2184533.33 secs to wrap (i.e 25 days)
> 
> When a work load is to be run on certain predefined cores.
> The Work load managers can gather the energy status before starting
> and after completion of the job and measure power consumed by the
> work load.
> 
All that doesn't help much for existing platforms, nor for users who
expect that counters don't wrap around at all (at least not until they
reach the 64-bit limit).

I see two options. Either provide power reporting instead (which should
be done in the k10temp driver), or implement a kernel thread which runs
often enough to catch wraparounds. While not perfect (it will only report
the energy since the driver was loaded), it will at least avoid the
frequent wraparounds seen today, and that caveat can be documented.

>>
>> How about making the power reporting registers available and
>> reporting per-CPU power consumption instead ? I think that would
>> add much more value.
> We will expose power reporting when platform exposes that information.
> Until then, energy reporting becomes further critical.
> 

Some Windows tools such as HwInfo report power readings today,
on existing CPUs, so I don't really buy the claim that existing
chips don't report it.

Thanks,
Guenter

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-04-25 16:17 [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
@ 2020-04-27 12:39 ` Naveen Krishna Ch
  2020-04-27 13:34   ` Guenter Roeck
  0 siblings, 1 reply; 8+ messages in thread
From: Naveen Krishna Ch @ 2020-04-27 12:39 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter,

On Sat, 25 Apr 2020 at 21:47, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On Fri, Apr 24, 2020 at 08:50:54AM +0530, Naveen Krishna Chatradhi wrote:
> > This patch adds hwmon based amd_energy driver support for
> > family 17h processors from AMD.
> >
> > The driver provides following interface to the userspace
> > 1. Reports the per core consumption
> >       * file: "energy%d_input", label: "Ecore%03d"
> > 2. Reports per socket energy consumption
> >       * file: "energy%d_input", label: "Esocket%d"
> > 2. Reports scaled energy value in Joules.
> >
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
>
> Couple of additional comments below.
>
> On a higher level, I noticed that the data overflows quickly
> (ie within a day or so), which is the reason why the matching
> code for Intel CPUs never made it into the kernel. With that
> in mind, I do wonder if this is really valuable. I am quite
> concerned about people complaining that the data is useless,
> and I have to say that I find it quite useless myself. Any
> system running for more than a few hours will report more or
> less random data. Any thoughts on that ?
This driver will also address the future platforms with
support for 64-bit counters.

We do have platforms that will come out very shortly, which will
support a different energy resolution to increase the wraparound
time with 32bit counters,

On a platform with 2 sockets each with 64 cores,
Assuming 240W, new resolution of 15.6 milli Joules

-  Wrap around time for socket energy counter will be
(up to ~3 to 4 days with maximum load).

2^32*15.625e-3/240 = 279620.2667 secs to wrap (i.e 3.2 days)

- Wrap around time for the per-core energy counters with the
new resolution will be

2^32*15.6e-3/ (240 * 128) = 2184533.33 secs to wrap (i.e 25 days)

When a work load is to be run on certain predefined cores.
The Work load managers can gather the energy status before starting
and after completion of the job and measure power consumed by the
work load.

>
> How about making the power reporting registers available and
> reporting per-CPU power consumption instead ? I think that would
> add much more value.
We will expose power reporting when platform exposes that information.
Until then, energy reporting becomes further critical.

>
> Thanks,
> Guenter
>
> > ---
> > Changes in v4:
> > 1. Use num_present_cpus() instead num_online_cpu(), as the latter does
> >    not take offline cpus into account.
> > 2. run checkpatch.pl --strict and fix alignment issues
> > 3. Fixed other comments from Guenter Roeck
> >
> >  drivers/hwmon/Kconfig      |  10 ++
> >  drivers/hwmon/Makefile     |   1 +
> >  drivers/hwmon/amd_energy.c | 250 +++++++++++++++++++++++++++++++++++++
> >  3 files changed, 261 insertions(+)
> >  create mode 100644 drivers/hwmon/amd_energy.c
> >
> > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> > index 4c62f900bf7e..e165e10c49ef 100644
> > --- a/drivers/hwmon/Kconfig
> > +++ b/drivers/hwmon/Kconfig
> > @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
> >         This driver can also be built as a module. If so, the module
> >         will be called fam15h_power.
> >
> > +config SENSORS_AMD_ENERGY
> > +     tristate "AMD RAPL MSR based Energy driver"
> > +     depends on X86
> > +     help
> > +       If you say yes here you get support for core and package energy
> > +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> > +
> > +       This driver can also be built as a module. If so, the module
> > +       will be called as amd_energy.
> > +
> >  config SENSORS_APPLESMC
> >       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
> >       depends on INPUT && X86
> > diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> > index b0b9c8e57176..318f89dc7133 100644
> > --- a/drivers/hwmon/Makefile
> > +++ b/drivers/hwmon/Makefile
> > @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
> >  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
> >  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
> >  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
> > +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
> >  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
> >  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
> >  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
> > diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> > new file mode 100644
> > index 000000000000..7162c80edd10
> > --- /dev/null
> > +++ b/drivers/hwmon/amd_energy.c
> > @@ -0,0 +1,250 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +/*
> > + * Copyright (C) 2020 Advanced Micro Devices, Inc.
> > + */
> > +
> > +#include <asm/cpu_device_id.h>
> > +
> I don't think this is currently used. More on that below, though.
will remove
>
> > +#include <linux/cpu.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/device.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/kernel.h>
> > +#include <linux/list.h>
> > +#include <linux/module.h>
> > +#include <linux/processor.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/slab.h>
> > +#include <linux/sysfs.h>
>
> I don't think this include is needed.
yes

>
> On the other side, using BIT() means that linux/bits.h should be included.
sure

>
> > +#include <linux/types.h>
> > +
> > +#define DRVNAME                      "amd_energy"
> > +
> > +#define ENERGY_PWR_UNIT_MSR  0xC0010299
> > +#define ENERGY_CORE_MSR              0xC001029A
> > +#define ENERGY_PKG_MSR               0xC001029B
> > +
> > +#define AMD_TIME_UNIT_MASK   0xF0000
> > +#define AMD_ENERGY_UNIT_MASK 0x01F00
> > +#define AMD_POWER_UNIT_MASK  0x0000F
> > +
> > +#define ENERGY_STATUS_MASK   0xffffffff
> > +
> > +#define AMD_FAM_17           0x17 /* ZP, SSP */
> > +
> > +/* Useful macros */
> > +#define AMD_CPU_FAM_ANY(_family, _model)     \
> > +{                                            \
> > +     .vendor         = X86_VENDOR_AMD,       \
> > +     .family         = _family,              \
> > +     .model          = _model,               \
> > +     .feature        = X86_FEATURE_ANY,      \
> > +}
> > +
> > +#define AMD_CPU_FAM(_model)                  \
> > +     AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
> > +
>
> Any special reason for not just using the following in cpu_ids[] ?
>         X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL)
no specific reason, will use this.

>
> > +struct amd_energy_data {
> > +     struct hwmon_channel_info energy_info;
> > +     const struct hwmon_channel_info *info[2];
> > +     struct hwmon_chip_info chip;
> > +};
> > +
> > +static int nr_cpus, nr_socks;
> > +static u64 energy_units;
> > +
> > +static int amd_energy_read_labels(struct device *dev,
> > +                               enum hwmon_sensor_types type,
> > +                              u32 attr, int channel,
> > +                              const char **str)
> > +{
> > +     char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
> > +
> > +     if (channel >= nr_cpus)
> > +             scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
> > +     else
> > +             scnprintf(buf, 9, "Ecore%03u", channel);
> > +
> > +     *str = buf;
> > +
> > +     return 0;
> > +}
> > +
> > +static int get_energy_units(void)
> > +{
> > +     u64 rapl_units;
> > +     int ret;
> > +
> > +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> > +     return 0;
> > +}
> > +
> > +static int amd_energy_read(struct device *dev,
> > +                        enum hwmon_sensor_types type,
> > +                        u32 attr, int channel, long *val)
> > +{
> > +     u64 value;
> > +     int cpu, ret;
> > +     u32 reg;
> > +
> > +     if (channel >= nr_cpus) {
> > +             reg = ENERGY_PKG_MSR;
> > +             cpu = cpumask_first_and(cpu_online_mask,
> > +                                     cpumask_of_node
> > +                                     (channel - nr_cpus));
> > +     } else {
> > +             reg = ENERGY_CORE_MSR;
> > +             cpu = channel;
> > +     }
> > +
> > +     if (!cpu_online(cpu))
> > +             return -ENODEV;
> > +
> > +     ret = rdmsrl_safe_on_cpu(cpu, reg, &value);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     if (energy_units == 0 && get_energy_units())
> > +             return -EAGAIN;
> > +
> > +     /* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
> > +     *val = (long)value * div64_ul(1000000UL, BIT(energy_units));
> > +
> > +     return 0;
> > +}
> > +
> > +static umode_t amd_energy_is_visible(const void *_data,
> > +                                  enum hwmon_sensor_types type,
> > +                                  u32 attr, int channel)
> > +{
> > +     return 0444;
> > +}
> > +
> > +static const struct hwmon_ops amd_energy_ops = {
> > +     .is_visible = amd_energy_is_visible,
> > +     .read = amd_energy_read,
> > +     .read_string = amd_energy_read_labels,
> > +};
> > +
> > +static int amd_create_sensor(struct device *dev,
> > +                          struct amd_energy_data *data,
> > +                          u8 type, u32 config)
> > +{
> > +     int i;
> > +     u32 *s_config;
> > +     struct hwmon_channel_info *info = &data->energy_info;
> > +
> > +     nr_socks = num_possible_nodes();
> > +     nr_cpus = num_present_cpus();
> > +
>
> On Ryzen 3900X, this generates data for 24 CPUs. Are you sure
> that this is correct, ie that the data is available per
> hyperthreading sibling ?
These energy counters are updated per core level, not at thread level.
Correct me if I’m wrong. My understanding is linux enumerates
all thread as a cpus and the internals of rd/wrmsr_on_cpu() functions
would handle the cpu/sibling thread in the x86_64 specific way.

If this understanding is wrong, I will try to use
topology_sibling_cpumask() or similar.


>
> > +     s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
> > +                             sizeof(u32), GFP_KERNEL);
> > +     if (!s_config)
> > +             return -ENOMEM;
> > +
> > +     info->type = type;
> > +     info->config = s_config;
> > +
> > +     for (i = 0; i < nr_cpus + nr_socks; i++)
> > +             s_config[i] = config;
> > +
> > +     return 0;
> > +}
> > +
> > +static int amd_energy_probe(struct platform_device *pdev)
> > +{
> > +     struct device *hwmon_dev;
> > +     struct amd_energy_data *data;
> > +     struct device *dev = &pdev->dev;
> > +     int ret;
> > +
> > +     ret = get_energy_units();
> > +
> > +     data = devm_kzalloc(dev, sizeof(struct amd_energy_data), GFP_KERNEL);
> > +     if (!data)
> > +             return -ENOMEM;
> > +
> > +     data->chip.ops = &amd_energy_ops;
> > +     data->chip.info = data->info;
> > +
> > +     /* Populate per-core energy reporting */
> > +     data->info[0] = &data->energy_info;
> > +     amd_create_sensor(dev, data,  hwmon_energy,
> > +                       HWMON_E_INPUT | HWMON_E_LABEL);
> > +
> > +     hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> > +                                                      data,
> > +                                                      &data->chip,
> > +                                                      NULL);
> > +     return PTR_ERR_OR_ZERO(hwmon_dev);
> > +}
> > +
> > +static int amd_energy_remove(struct platform_device *pdev)
> > +{
> > +     return 0;
> > +}
> > +
> > +static const struct platform_device_id amd_energy_ids[] = {
> > +     { .name = DRVNAME, },
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> > +
> > +static struct platform_driver amd_energy_driver = {
> > +     .probe = amd_energy_probe,
> > +     .remove = amd_energy_remove,
> > +     .id_table = amd_energy_ids,
> > +     .driver = {
> > +             .name = DRVNAME,
> > +     },
> > +};
> > +
> > +static struct platform_device *amd_energy_platdev;
> > +
> > +static const struct x86_cpu_id cpu_ids[] __initconst = {
> > +     AMD_CPU_FAM(17),
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> > +
> > +static int __init amd_energy_init(void)
> > +{
> > +     int ret;
> > +
> > +     if (!x86_match_cpu(cpu_ids))
> > +             return -ENODEV;
> > +
> > +     ret = platform_driver_register(&amd_energy_driver);
> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> > +     if (!amd_energy_platdev)
> > +             return -ENOMEM;
> > +
> > +     ret = platform_device_add(amd_energy_platdev);
> > +     if (ret) {
> > +             platform_device_unregister(amd_energy_platdev);
> > +             return ret;
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static void __exit amd_energy_exit(void)
> > +{
> > +     platform_device_unregister(amd_energy_platdev);
> > +     platform_driver_unregister(&amd_energy_driver);
> > +}
> > +
> > +module_init(amd_energy_init);
> > +module_exit(amd_energy_exit);
> > +
> > +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> > +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> > +MODULE_LICENSE("GPL");
> > --
> > 2.17.1
> >



--
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters
@ 2020-04-25 16:17 Guenter Roeck
  2020-04-27 12:39 ` Naveen Krishna Ch
  0 siblings, 1 reply; 8+ messages in thread
From: Guenter Roeck @ 2020-04-25 16:17 UTC (permalink / raw)
  To: Naveen Krishna Chatradhi; +Cc: linux-hwmon, naveenkrishna.ch

On Fri, Apr 24, 2020 at 08:50:54AM +0530, Naveen Krishna Chatradhi wrote:
> This patch adds hwmon based amd_energy driver support for
> family 17h processors from AMD.
> 
> The driver provides following interface to the userspace
> 1. Reports the per core consumption
> 	* file: "energy%d_input", label: "Ecore%03d"
> 2. Reports per socket energy consumption
> 	* file: "energy%d_input", label: "Esocket%d"
> 2. Reports scaled energy value in Joules.
> 
> Cc: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>

Couple of additional comments below.

On a higher level, I noticed that the data overflows quickly
(ie within a day or so), which is the reason why the matching
code for Intel CPUs never made it into the kernel. With that
in mind, I do wonder if this is really valuable. I am quite
concerned about people complaining that the data is useless,
and I have to say that I find it quite useless myself. Any
system running for more than a few hours will report more or
less random data. Any thoughts on that ? 

How about making the power reporting registers available and
reporting per-CPU power consumption instead ? I think that would
add much more value.

Thanks,
Guenter

> ---
> Changes in v4:
> 1. Use num_present_cpus() instead num_online_cpu(), as the latter does
>    not take offline cpus into account.
> 2. run checkpatch.pl --strict and fix alignment issues
> 3. Fixed other comments from Guenter Roeck
> 
>  drivers/hwmon/Kconfig      |  10 ++
>  drivers/hwmon/Makefile     |   1 +
>  drivers/hwmon/amd_energy.c | 250 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 261 insertions(+)
>  create mode 100644 drivers/hwmon/amd_energy.c
> 
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index 4c62f900bf7e..e165e10c49ef 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
>  	  This driver can also be built as a module. If so, the module
>  	  will be called fam15h_power.
>  
> +config SENSORS_AMD_ENERGY
> +	tristate "AMD RAPL MSR based Energy driver"
> +	depends on X86
> +	help
> +	  If you say yes here you get support for core and package energy
> +	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> +
> +	  This driver can also be built as a module. If so, the module
> +	  will be called as amd_energy.
> +
>  config SENSORS_APPLESMC
>  	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
>  	depends on INPUT && X86
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index b0b9c8e57176..318f89dc7133 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
>  obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
>  obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
>  obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
> +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
>  obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
>  obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
>  obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> new file mode 100644
> index 000000000000..7162c80edd10
> --- /dev/null
> +++ b/drivers/hwmon/amd_energy.c
> @@ -0,0 +1,250 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Copyright (C) 2020 Advanced Micro Devices, Inc.
> + */
> +
> +#include <asm/cpu_device_id.h>
> +
I don't think this is currently used. More on that below, though.

> +#include <linux/cpu.h>
> +#include <linux/cpumask.h>
> +#include <linux/device.h>
> +#include <linux/hwmon.h>
> +#include <linux/kernel.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/processor.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/sysfs.h>

I don't think this include is needed.

On the other side, using BIT() means that linux/bits.h should be included.

> +#include <linux/types.h>
> +
> +#define DRVNAME			"amd_energy"
> +
> +#define ENERGY_PWR_UNIT_MSR	0xC0010299
> +#define ENERGY_CORE_MSR		0xC001029A
> +#define ENERGY_PKG_MSR		0xC001029B
> +
> +#define AMD_TIME_UNIT_MASK	0xF0000
> +#define AMD_ENERGY_UNIT_MASK	0x01F00
> +#define AMD_POWER_UNIT_MASK	0x0000F
> +
> +#define ENERGY_STATUS_MASK	0xffffffff
> +
> +#define AMD_FAM_17		0x17 /* ZP, SSP */
> +
> +/* Useful macros */
> +#define AMD_CPU_FAM_ANY(_family, _model)	\
> +{						\
> +	.vendor		= X86_VENDOR_AMD,	\
> +	.family		= _family,		\
> +	.model		= _model,		\
> +	.feature	= X86_FEATURE_ANY,	\
> +}
> +
> +#define AMD_CPU_FAM(_model)			\
> +	AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
> +

Any special reason for not just using the following in cpu_ids[] ?
	X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL)

> +struct amd_energy_data {
> +	struct hwmon_channel_info energy_info;
> +	const struct hwmon_channel_info *info[2];
> +	struct hwmon_chip_info chip;
> +};
> +
> +static int nr_cpus, nr_socks;
> +static u64 energy_units;
> +
> +static int amd_energy_read_labels(struct device *dev,
> +				  enum hwmon_sensor_types type,
> +				 u32 attr, int channel,
> +				 const char **str)
> +{
> +	char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
> +
> +	if (channel >= nr_cpus)
> +		scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
> +	else
> +		scnprintf(buf, 9, "Ecore%03u", channel);
> +
> +	*str = buf;
> +
> +	return 0;
> +}
> +
> +static int get_energy_units(void)
> +{
> +	u64 rapl_units;
> +	int ret;
> +
> +	ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> +	if (ret)
> +		return -EAGAIN;
> +
> +	energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> +	return 0;
> +}
> +
> +static int amd_energy_read(struct device *dev,
> +			   enum hwmon_sensor_types type,
> +			   u32 attr, int channel, long *val)
> +{
> +	u64 value;
> +	int cpu, ret;
> +	u32 reg;
> +
> +	if (channel >= nr_cpus) {
> +		reg = ENERGY_PKG_MSR;
> +		cpu = cpumask_first_and(cpu_online_mask,
> +					cpumask_of_node
> +					(channel - nr_cpus));
> +	} else {
> +		reg = ENERGY_CORE_MSR;
> +		cpu = channel;
> +	}
> +
> +	if (!cpu_online(cpu))
> +		return -ENODEV;
> +
> +	ret = rdmsrl_safe_on_cpu(cpu, reg, &value);
> +	if (ret)
> +		return -EAGAIN;
> +
> +	if (energy_units == 0 && get_energy_units())
> +		return -EAGAIN;
> +
> +	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
> +	*val = (long)value * div64_ul(1000000UL, BIT(energy_units));
> +
> +	return 0;
> +}
> +
> +static umode_t amd_energy_is_visible(const void *_data,
> +				     enum hwmon_sensor_types type,
> +				     u32 attr, int channel)
> +{
> +	return 0444;
> +}
> +
> +static const struct hwmon_ops amd_energy_ops = {
> +	.is_visible = amd_energy_is_visible,
> +	.read = amd_energy_read,
> +	.read_string = amd_energy_read_labels,
> +};
> +
> +static int amd_create_sensor(struct device *dev,
> +			     struct amd_energy_data *data,
> +			     u8 type, u32 config)
> +{
> +	int i;
> +	u32 *s_config;
> +	struct hwmon_channel_info *info = &data->energy_info;
> +
> +	nr_socks = num_possible_nodes();
> +	nr_cpus = num_present_cpus();
> +

On Ryzen 3900X, this generates data for 24 CPUs. Are you sure
that this is correct, ie that the data is available per
hyperthreading sibling ?

> +	s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
> +				sizeof(u32), GFP_KERNEL);
> +	if (!s_config)
> +		return -ENOMEM;
> +
> +	info->type = type;
> +	info->config = s_config;
> +
> +	for (i = 0; i < nr_cpus + nr_socks; i++)
> +		s_config[i] = config;
> +
> +	return 0;
> +}
> +
> +static int amd_energy_probe(struct platform_device *pdev)
> +{
> +	struct device *hwmon_dev;
> +	struct amd_energy_data *data;
> +	struct device *dev = &pdev->dev;
> +	int ret;
> +
> +	ret = get_energy_units();
> +
> +	data = devm_kzalloc(dev, sizeof(struct amd_energy_data), GFP_KERNEL);
> +	if (!data)
> +		return -ENOMEM;
> +
> +	data->chip.ops = &amd_energy_ops;
> +	data->chip.info = data->info;
> +
> +	/* Populate per-core energy reporting */
> +	data->info[0] = &data->energy_info;
> +	amd_create_sensor(dev, data,  hwmon_energy,
> +			  HWMON_E_INPUT | HWMON_E_LABEL);
> +
> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> +							 data,
> +							 &data->chip,
> +							 NULL);
> +	return PTR_ERR_OR_ZERO(hwmon_dev);
> +}
> +
> +static int amd_energy_remove(struct platform_device *pdev)
> +{
> +	return 0;
> +}
> +
> +static const struct platform_device_id amd_energy_ids[] = {
> +	{ .name = DRVNAME, },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> +
> +static struct platform_driver amd_energy_driver = {
> +	.probe = amd_energy_probe,
> +	.remove	= amd_energy_remove,
> +	.id_table = amd_energy_ids,
> +	.driver = {
> +		.name = DRVNAME,
> +	},
> +};
> +
> +static struct platform_device *amd_energy_platdev;
> +
> +static const struct x86_cpu_id cpu_ids[] __initconst = {
> +	AMD_CPU_FAM(17),
> +	{}
> +};
> +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> +
> +static int __init amd_energy_init(void)
> +{
> +	int ret;
> +
> +	if (!x86_match_cpu(cpu_ids))
> +		return -ENODEV;
> +
> +	ret = platform_driver_register(&amd_energy_driver);
> +	if (ret < 0)
> +		return ret;
> +
> +	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> +	if (!amd_energy_platdev)
> +		return -ENOMEM;
> +
> +	ret = platform_device_add(amd_energy_platdev);
> +	if (ret) {
> +		platform_device_unregister(amd_energy_platdev);
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +static void __exit amd_energy_exit(void)
> +{
> +	platform_device_unregister(amd_energy_platdev);
> +	platform_driver_unregister(&amd_energy_driver);
> +}
> +
> +module_init(amd_energy_init);
> +module_exit(amd_energy_exit);
> +
> +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> +MODULE_LICENSE("GPL");
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-05-05 15:43 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-24  3:20 [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
2020-04-24  3:20 ` [PATCH v4 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
2020-04-24  3:20 ` [PATCH v4 3/3] MAINTAINERS: add entry for AMD energy driver Naveen Krishna Chatradhi
2020-04-25 16:17 [PATCH v4 1/3] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
2020-04-27 12:39 ` Naveen Krishna Ch
2020-04-27 13:34   ` Guenter Roeck
2020-04-27 15:04     ` Naveen Krishna Ch
2020-05-05 15:43       ` Naveen Krishna Ch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).