Linux-Hwmon Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters
@ 2020-05-01 17:50 Naveen Krishna Chatradhi
  2020-05-01 17:50 ` [PATCH 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-05-01 17:50 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

This patch adds hwmon based amd_energy driver support for
family 17h processors from AMD.

The driver provides following interface to the userspace
1. Reports the per core consumption
	* file: "energy%d_input", label: "Ecore%03d"
2. Reports per socket energy consumption
	* file: "energy%d_input", label: "Esocket%d"
3. To, increase the wrap around time of the socket energy
   counters, a 64bit accumultor is implemented.
4. Reports scaled energy value in Joules.

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
Changes in v5:
1. To improve wrap around time. A kernel thread is implemented
   to accumulate the socket energy counters into a 64bit.
2. Address other comments from Guenter.

 drivers/hwmon/Kconfig      |  10 ++
 drivers/hwmon/Makefile     |   1 +
 drivers/hwmon/amd_energy.c | 329 +++++++++++++++++++++++++++++++++++++
 3 files changed, 340 insertions(+)
 create mode 100644 drivers/hwmon/amd_energy.c

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 4c62f900bf7e..e165e10c49ef 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
 	  This driver can also be built as a module. If so, the module
 	  will be called fam15h_power.
 
+config SENSORS_AMD_ENERGY
+	tristate "AMD RAPL MSR based Energy driver"
+	depends on X86
+	help
+	  If you say yes here you get support for core and package energy
+	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called as amd_energy.
+
 config SENSORS_APPLESMC
 	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
 	depends on INPUT && X86
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index b0b9c8e57176..318f89dc7133 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
 obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
 obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
 obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
+obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
 obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
 obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
 obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
new file mode 100644
index 000000000000..16364576f067
--- /dev/null
+++ b/drivers/hwmon/amd_energy.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ */
+#include <asm/cpu_device_id.h>
+
+#include <linux/bits.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/hwmon.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/processor.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define DRVNAME			"amd_energy"
+
+#define ENERGY_PWR_UNIT_MSR	0xC0010299
+#define ENERGY_CORE_MSR		0xC001029A
+#define ENERGY_PKG_MSR		0xC001029B
+
+#define AMD_ENERGY_UNIT_MASK	0x01F00
+
+struct amd_energy_data {
+	struct hwmon_channel_info energy_info;
+	const struct hwmon_channel_info *info[2];
+	struct hwmon_chip_info chip;
+	/* Lock around the accumulator */
+	struct mutex lock;
+	/* Energy Status Units */
+	u64 energy_units;
+	/* An accumulator for each socket */
+	u64 *energy_ctrs;
+	u64 *prev_value;
+};
+
+/* */
+struct task_struct *wrap_accumulate;
+static int nr_cpus, nr_socks;
+
+static int amd_energy_read_labels(struct device *dev,
+				  enum hwmon_sensor_types type,
+				 u32 attr, int channel,
+				 const char **str)
+{
+	char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
+
+	if (channel >= nr_cpus)
+		scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
+	else
+		scnprintf(buf, 9, "Ecore%03u", channel);
+
+	*str = buf;
+
+	return 0;
+}
+
+static int get_energy_units(struct amd_energy_data *data)
+{
+	u64 rapl_units;
+	int ret;
+
+	ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
+	if (ret)
+		return ret;
+
+	data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
+	return 0;
+}
+
+static int read_accumulate(struct amd_energy_data *data)
+{
+	int ret, cu;
+	u64 input = 0;
+
+	for (cu = 0; cu < nr_socks; cu++) {
+		int cpu;
+
+		cpu = cpumask_first_and(cpu_online_mask,
+					cpumask_of_node(cu));
+
+		ret = rdmsrl_safe_on_cpu(cpu, ENERGY_PKG_MSR, &input);
+		if (ret)
+			return ret;
+
+		if (input > data->prev_value[cu])
+			data->energy_ctrs[cu] +=
+				input - data->prev_value[cu];
+		else
+			data->energy_ctrs[cu] +=
+				UINT_MAX - data->prev_value[cu] + input;
+
+		data->prev_value[cu] = input;
+	}
+		return 0;
+}
+
+static int amd_energy_read(struct device *dev,
+			   enum hwmon_sensor_types type,
+			   u32 attr, int channel, long *val)
+{
+	struct amd_energy_data *data = dev_get_drvdata(dev);
+	int ret, cpu;
+	u32 reg;
+	u64 input;
+
+	if (channel >= nr_cpus) {
+		reg = ENERGY_PKG_MSR;
+		cpu = cpumask_first_and(cpu_online_mask,
+					cpumask_of_node
+					(channel - nr_cpus));
+	} else {
+		reg = ENERGY_CORE_MSR;
+		cpu = channel;
+	}
+
+	if (!cpu_online(cpu))
+		return -ENODEV;
+
+	if (data->energy_units == 0 && get_energy_units(data))
+		return -EAGAIN;
+
+	mutex_lock(&data->lock);
+	ret = rdmsrl_safe_on_cpu(cpu, reg, &input);
+	if (ret)
+		return -EAGAIN;
+
+	/* Accumulation is for sockets only */
+	if (channel >= nr_cpus)
+		input += data->energy_ctrs[channel - nr_cpus];
+
+	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
+	*val = (long)input * div64_ul(1000000UL,
+				      BIT(data->energy_units));
+	mutex_unlock(&data->lock);
+
+	return 0;
+}
+
+static umode_t amd_energy_is_visible(const void *_data,
+				     enum hwmon_sensor_types type,
+				     u32 attr, int channel)
+{
+	return 0444;
+}
+
+static int socket_accumulator(void *p)
+{
+	struct amd_energy_data *data = (struct amd_energy_data *)p;
+
+	while (!kthread_should_stop()) {
+		mutex_lock(&data->lock);
+		read_accumulate(data);
+		mutex_unlock(&data->lock);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+
+		/*
+		 * On a 240W system, the Socket Energy status
+		 * register may wrap around in
+		 * 2^32*15.3 e-6/240 = 273.8041 secs (~4.5 mins)
+		 *
+		 * let us accumulate for every 100secs
+		 */
+		schedule_timeout(msecs_to_jiffies(100000));
+	}
+	return 0;
+}
+
+static const struct hwmon_ops amd_energy_ops = {
+	.is_visible = amd_energy_is_visible,
+	.read = amd_energy_read,
+	.read_string = amd_energy_read_labels,
+};
+
+static int amd_create_sensor(struct device *dev,
+			     struct amd_energy_data *data,
+			     u8 type, u32 config)
+{
+	int i;
+	u32 *s_config;
+
+	struct hwmon_channel_info *info = &data->energy_info;
+
+	nr_socks = num_possible_nodes();
+	nr_cpus = num_present_cpus();
+
+	s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
+				sizeof(u32), GFP_KERNEL);
+	if (!s_config)
+		return -ENOMEM;
+
+	data->energy_ctrs = devm_kcalloc(dev, nr_socks,
+					 sizeof(u64), GFP_KERNEL);
+	if (!data->energy_ctrs)
+		return -ENOMEM;
+
+	data->prev_value = devm_kcalloc(dev, nr_socks,
+					sizeof(u64), GFP_KERNEL);
+	if (!data->prev_value)
+		return -ENOMEM;
+
+	info->type = type;
+	info->config = s_config;
+
+	for (i = 0; i < nr_cpus + nr_socks; i++)
+		s_config[i] = config;
+
+	return 0;
+}
+
+static int amd_energy_probe(struct platform_device *pdev)
+{
+	struct device *hwmon_dev;
+	struct amd_energy_data *data;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	data = devm_kzalloc(dev,
+			    sizeof(struct amd_energy_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->chip.ops = &amd_energy_ops;
+	data->chip.info = data->info;
+
+	/* Populate per-core energy reporting */
+	data->info[0] = &data->energy_info;
+	amd_create_sensor(dev, data,  hwmon_energy,
+			  HWMON_E_INPUT | HWMON_E_LABEL);
+
+	mutex_init(&data->lock);
+
+	ret = get_energy_units(data);
+	if (ret)
+		return ret;
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
+							 data,
+							 &data->chip,
+							 NULL);
+	if (IS_ERR(hwmon_dev))
+		return PTR_ERR(hwmon_dev);
+
+	wrap_accumulate = kthread_run(socket_accumulator, data,
+				      "%s", dev_name(hwmon_dev));
+	if (IS_ERR(wrap_accumulate))
+		return PTR_ERR(wrap_accumulate);
+
+	return 0;
+}
+
+static int amd_energy_remove(struct platform_device *pdev)
+{
+	if (wrap_accumulate)
+		kthread_stop(wrap_accumulate);
+
+	return 0;
+}
+
+static const struct platform_device_id amd_energy_ids[] = {
+	{ .name = DRVNAME, },
+	{}
+};
+MODULE_DEVICE_TABLE(platform, amd_energy_ids);
+
+static struct platform_driver amd_energy_driver = {
+	.probe = amd_energy_probe,
+	.remove	= amd_energy_remove,
+	.id_table = amd_energy_ids,
+	.driver = {
+		.name = DRVNAME,
+	},
+};
+
+static struct platform_device *amd_energy_platdev;
+
+static const struct x86_cpu_id cpu_ids[] __initconst = {
+	X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
+	X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
+
+static int __init amd_energy_init(void)
+{
+	int ret;
+
+	if (!x86_match_cpu(cpu_ids))
+		return -ENODEV;
+
+	ret = platform_driver_register(&amd_energy_driver);
+	if (ret < 0)
+		return ret;
+
+	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
+	if (!amd_energy_platdev)
+		return -ENOMEM;
+
+	ret = platform_device_add(amd_energy_platdev);
+	if (ret) {
+		platform_device_unregister(amd_energy_platdev);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit amd_energy_exit(void)
+{
+	platform_device_unregister(amd_energy_platdev);
+	platform_driver_unregister(&amd_energy_driver);
+}
+
+module_init(amd_energy_init);
+module_exit(amd_energy_exit);
+
+MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
+MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
+MODULE_LICENSE("GPL");
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/3] hwmon: (amd_energy) Add documentation
  2020-05-01 17:50 [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
@ 2020-05-01 17:50 ` Naveen Krishna Chatradhi
  2020-05-06 16:33   ` Guenter Roeck
  2020-05-01 17:50 ` [PATCH 3/3] MAINTAINERS: add entry for AMD energy driver Naveen Krishna Chatradhi
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-05-01 17:50 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

Document amd_energy driver with all chips supported by it.

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
Changes in v5: None

 Documentation/hwmon/amd_energy.rst | 100 +++++++++++++++++++++++++++++
 Documentation/hwmon/index.rst      |   1 +
 2 files changed, 101 insertions(+)
 create mode 100644 Documentation/hwmon/amd_energy.rst

diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
new file mode 100644
index 000000000000..2216c8b13e58
--- /dev/null
+++ b/Documentation/hwmon/amd_energy.rst
@@ -0,0 +1,100 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver amd_energy
+==========================
+
+Supported chips:
+
+* AMD Family 17h Processors
+
+  Prefix: 'amd_energy'
+
+  Addresses used:  RAPL MSRs
+
+  Datasheets:
+
+  - Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors
+
+	https://developer.amd.com/wp-content/resources/55570-B1_PUB.zip
+
+  - Preliminary Processor Programming Reference (PPR) for AMD Family 17h Model 31h, Revision B0 Processors
+
+	https://developer.amd.com/wp-content/resources/56176_ppr_Family_17h_Model_71h_B0_pub_Rev_3.06.zip
+
+Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
+
+Description
+-----------
+
+The Energy driver exposes the energy counters that are
+reported via the Running Average Power Limit (RAPL)
+Model-specific Registers (MSRs) via the hardware monitor
+(HWMON) sysfs interface.
+
+1. Power, Energy and Time Units
+   MSR_RAPL_POWER_UNIT/ C001_0299:
+   shared with all cores in the socket
+
+2. Energy consumed by each Core
+   MSR_CORE_ENERGY_STATUS/ C001_029A:
+   32-bitRO, Accumulator, core-level power reporting
+
+3. Energy consumed by Socket
+   MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
+   32-bitRO, Accumulator, socket-level power reporting,
+   shared with all cores in socket
+
+These registers are updated every 1ms and cleared on
+reset of the system.
+
+Energy Caluclation
+------------------
+
+Energy information (in Joules) is based on the multiplier,
+1/2^ESU; where ESU is an unsigned integer read from
+MSR_RAPL_POWER_UNIT register. Default value is 10000b,
+indicating energy status unit is 15.3 micro-Joules increment.
+
+Reported values are scaled as per the formula
+
+scaled value = ((1/2^ESU) * (Raw value) * 1000000UL) in Joules
+
+Users calculate power for a given domain by calculating
+	dEnergy/dTime for that domain.
+
+Socket energy accumulation
+--------------------------
+
+Current Socket energy status register is 32bit, assuming a 240W
+system, the register would wrap around in
+
+	2^32*15.3 e-6/240 = 273.80416512 secs to wrap(~4.5 mins)
+
+To improve the wrap around time, a kernel thread is implemented
+to accumulate the socket energy counter to a 64-bit counter. The
+kernel thread starts running during probe, wakes up at 100secs
+and stops running in remove.
+
+A socket energy read would return the current register value
+added to the respective energy accumulator.
+
+Sysfs attributes
+----------------
+
+=============== ========  =====================================
+Attribute	Label	  Description
+===============	========  =====================================
+
+* For index N between [1] and [nr_cpus]
+
+===============	========  ======================================
+energy[N]_input EcoreX	  Core Energy   X = [0] to [nr_cpus - 1]
+			  Measured input core energy
+===============	========  ======================================
+
+* For N between [nr_cpus] and [nr_cpus + nr_socks]
+
+===============	========  ======================================
+energy[N]_input EsocketX  Socket Energy X = [0] to [nr_socks -1]
+			  Measured input socket energy
+=============== ========  ======================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8ef62fd39787..fc4b89810e67 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
    adt7470
    adt7475
    amc6821
+   amd_energy
    asb100
    asc7621
    aspeed-pwm-tacho
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/3] MAINTAINERS: add entry for AMD energy driver
  2020-05-01 17:50 [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
  2020-05-01 17:50 ` [PATCH 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
@ 2020-05-01 17:50 ` Naveen Krishna Chatradhi
  2020-05-06 16:23 ` [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
  2020-05-06 18:32 ` Guenter Roeck
  3 siblings, 0 replies; 11+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-05-01 17:50 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

The kernel driver is part of HWMON subsystem.

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
Changes in v5: None

 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 26f281d9f32a..48e94d0a9955 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -842,6 +842,13 @@ S:	Supported
 T:	git git://people.freedesktop.org/~agd5f/linux
 F:	drivers/gpu/drm/amd/display/
 
+AMD ENERGY DRIVER
+M:	Naveen Krishna Chatradhi <nchatrad@amd.com>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/hwmon/amd_energy.rst
+F:	drivers/hwmon/amd_energy.c
+
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
 M:	Huang Rui <ray.huang@amd.com>
 L:	linux-hwmon@vger.kernel.org
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-05-01 17:50 [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
  2020-05-01 17:50 ` [PATCH 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
  2020-05-01 17:50 ` [PATCH 3/3] MAINTAINERS: add entry for AMD energy driver Naveen Krishna Chatradhi
@ 2020-05-06 16:23 ` Guenter Roeck
  2020-05-06 17:06   ` Naveen Krishna Ch
  2020-05-06 18:32 ` Guenter Roeck
  3 siblings, 1 reply; 11+ messages in thread
From: Guenter Roeck @ 2020-05-06 16:23 UTC (permalink / raw)
  To: Naveen Krishna Chatradhi; +Cc: linux-hwmon, naveenkrishna.ch

On Fri, May 01, 2020 at 11:20:01PM +0530, Naveen Krishna Chatradhi wrote:
> This patch adds hwmon based amd_energy driver support for
> family 17h processors from AMD.
> 
> The driver provides following interface to the userspace
> 1. Reports the per core consumption
> 	* file: "energy%d_input", label: "Ecore%03d"
> 2. Reports per socket energy consumption
> 	* file: "energy%d_input", label: "Esocket%d"
> 3. To, increase the wrap around time of the socket energy
>    counters, a 64bit accumultor is implemented.
> 4. Reports scaled energy value in Joules.
> 
> Cc: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> ---
> Changes in v5:
> 1. To improve wrap around time. A kernel thread is implemented
>    to accumulate the socket energy counters into a 64bit.
> 2. Address other comments from Guenter.
> 
>  drivers/hwmon/Kconfig      |  10 ++
>  drivers/hwmon/Makefile     |   1 +
>  drivers/hwmon/amd_energy.c | 329 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 340 insertions(+)
>  create mode 100644 drivers/hwmon/amd_energy.c
> 
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index 4c62f900bf7e..e165e10c49ef 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
>  	  This driver can also be built as a module. If so, the module
>  	  will be called fam15h_power.
>  
> +config SENSORS_AMD_ENERGY
> +	tristate "AMD RAPL MSR based Energy driver"
> +	depends on X86
> +	help
> +	  If you say yes here you get support for core and package energy
> +	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> +
> +	  This driver can also be built as a module. If so, the module
> +	  will be called as amd_energy.
> +
>  config SENSORS_APPLESMC
>  	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
>  	depends on INPUT && X86
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index b0b9c8e57176..318f89dc7133 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
>  obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
>  obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
>  obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
> +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
>  obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
>  obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
>  obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> new file mode 100644
> index 000000000000..16364576f067
> --- /dev/null
> +++ b/drivers/hwmon/amd_energy.c
> @@ -0,0 +1,329 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Copyright (C) 2020 Advanced Micro Devices, Inc.
> + */
> +#include <asm/cpu_device_id.h>
> +
> +#include <linux/bits.h>
> +#include <linux/cpu.h>
> +#include <linux/cpumask.h>
> +#include <linux/device.h>
> +#include <linux/hwmon.h>
> +#include <linux/kernel.h>
> +#include <linux/kthread.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/processor.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +
> +#define DRVNAME			"amd_energy"
> +
> +#define ENERGY_PWR_UNIT_MSR	0xC0010299
> +#define ENERGY_CORE_MSR		0xC001029A
> +#define ENERGY_PKG_MSR		0xC001029B
> +
> +#define AMD_ENERGY_UNIT_MASK	0x01F00
> +
> +struct amd_energy_data {
> +	struct hwmon_channel_info energy_info;
> +	const struct hwmon_channel_info *info[2];
> +	struct hwmon_chip_info chip;
> +	/* Lock around the accumulator */
> +	struct mutex lock;
> +	/* Energy Status Units */
> +	u64 energy_units;
> +	/* An accumulator for each socket */
> +	u64 *energy_ctrs;
> +	u64 *prev_value;
> +};
> +
> +/* */
> +struct task_struct *wrap_accumulate;

declare as static

> +static int nr_cpus, nr_socks;
> +
> +static int amd_energy_read_labels(struct device *dev,
> +				  enum hwmon_sensor_types type,
> +				 u32 attr, int channel,
> +				 const char **str)

indentation is off here.

> +{
> +	char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
> +

This allocates 10 bytes each time the function is called, ie each time
the "sensors" command is executed (or, more specifically, each time
the sysfs attribute is read).

On a side note, using devm_kcalloc() to allocate a string is really
not necessary / useful.

> +	if (channel >= nr_cpus)
> +		scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
> +	else
> +		scnprintf(buf, 9, "Ecore%03u", channel);

For scnprintf(), the length parameter includes the trailing '\0'.
So it is safe to pass 10 as length here.

> +
> +	*str = buf;
> +
> +	return 0;
> +}
> +
> +static int get_energy_units(struct amd_energy_data *data)
> +{
> +	u64 rapl_units;
> +	int ret;
> +
> +	ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> +	if (ret)
> +		return ret;
> +
> +	data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> +	return 0;
> +}
> +
> +static int read_accumulate(struct amd_energy_data *data)
> +{
> +	int ret, cu;
> +	u64 input = 0;
> +
> +	for (cu = 0; cu < nr_socks; cu++) {
> +		int cpu;
> +
> +		cpu = cpumask_first_and(cpu_online_mask,
> +					cpumask_of_node(cu));
> +
> +		ret = rdmsrl_safe_on_cpu(cpu, ENERGY_PKG_MSR, &input);
> +		if (ret)
> +			return ret;
> +
> +		if (input > data->prev_value[cu])
> +			data->energy_ctrs[cu] +=
> +				input - data->prev_value[cu];
> +		else
> +			data->energy_ctrs[cu] +=
> +				UINT_MAX - data->prev_value[cu] + input;
> +
> +		data->prev_value[cu] = input;
> +	}
> +		return 0;
> +}
> +
> +static int amd_energy_read(struct device *dev,
> +			   enum hwmon_sensor_types type,
> +			   u32 attr, int channel, long *val)
> +{
> +	struct amd_energy_data *data = dev_get_drvdata(dev);
> +	int ret, cpu;
> +	u32 reg;
> +	u64 input;
> +
> +	if (channel >= nr_cpus) {
> +		reg = ENERGY_PKG_MSR;
> +		cpu = cpumask_first_and(cpu_online_mask,
> +					cpumask_of_node
> +					(channel - nr_cpus));
> +	} else {
> +		reg = ENERGY_CORE_MSR;
> +		cpu = channel;
> +	}
> +
> +	if (!cpu_online(cpu))
> +		return -ENODEV;
> +
> +	if (data->energy_units == 0 && get_energy_units(data))
> +		return -EAGAIN;
> +
> +	mutex_lock(&data->lock);
> +	ret = rdmsrl_safe_on_cpu(cpu, reg, &input);
> +	if (ret)
> +		return -EAGAIN;
> +
> +	/* Accumulation is for sockets only */
> +	if (channel >= nr_cpus)
> +		input += data->energy_ctrs[channel - nr_cpus];
> +
> +	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
> +	*val = (long)input * div64_ul(1000000UL,
> +				      BIT(data->energy_units));
> +	mutex_unlock(&data->lock);
> +
> +	return 0;
> +}
> +
> +static umode_t amd_energy_is_visible(const void *_data,
> +				     enum hwmon_sensor_types type,
> +				     u32 attr, int channel)
> +{
> +	return 0444;
> +}
> +
> +static int socket_accumulator(void *p)
> +{
> +	struct amd_energy_data *data = (struct amd_energy_data *)p;
> +
> +	while (!kthread_should_stop()) {
> +		mutex_lock(&data->lock);
> +		read_accumulate(data);
> +		mutex_unlock(&data->lock);
> +
> +		set_current_state(TASK_INTERRUPTIBLE);
> +		if (kthread_should_stop())
> +			break;
> +
> +		/*
> +		 * On a 240W system, the Socket Energy status
> +		 * register may wrap around in
> +		 * 2^32*15.3 e-6/240 = 273.8041 secs (~4.5 mins)
> +		 *
> +		 * let us accumulate for every 100secs
> +		 */
> +		schedule_timeout(msecs_to_jiffies(100000));
> +	}
> +	return 0;
> +}
> +
> +static const struct hwmon_ops amd_energy_ops = {
> +	.is_visible = amd_energy_is_visible,
> +	.read = amd_energy_read,
> +	.read_string = amd_energy_read_labels,
> +};
> +
> +static int amd_create_sensor(struct device *dev,
> +			     struct amd_energy_data *data,
> +			     u8 type, u32 config)
> +{
> +	int i;
> +	u32 *s_config;
> +
> +	struct hwmon_channel_info *info = &data->energy_info;
> +
> +	nr_socks = num_possible_nodes();
> +	nr_cpus = num_present_cpus();
> +
> +	s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
> +				sizeof(u32), GFP_KERNEL);
> +	if (!s_config)
> +		return -ENOMEM;
> +
> +	data->energy_ctrs = devm_kcalloc(dev, nr_socks,
> +					 sizeof(u64), GFP_KERNEL);
> +	if (!data->energy_ctrs)
> +		return -ENOMEM;
> +
> +	data->prev_value = devm_kcalloc(dev, nr_socks,
> +					sizeof(u64), GFP_KERNEL);
> +	if (!data->prev_value)
> +		return -ENOMEM;
> +
> +	info->type = type;
> +	info->config = s_config;
> +
> +	for (i = 0; i < nr_cpus + nr_socks; i++)
> +		s_config[i] = config;
> +
> +	return 0;
> +}
> +
> +static int amd_energy_probe(struct platform_device *pdev)
> +{
> +	struct device *hwmon_dev;
> +	struct amd_energy_data *data;
> +	struct device *dev = &pdev->dev;
> +	int ret;
> +
> +	data = devm_kzalloc(dev,
> +			    sizeof(struct amd_energy_data), GFP_KERNEL);
> +	if (!data)
> +		return -ENOMEM;
> +
> +	data->chip.ops = &amd_energy_ops;
> +	data->chip.info = data->info;
> +
> +	/* Populate per-core energy reporting */
> +	data->info[0] = &data->energy_info;
> +	amd_create_sensor(dev, data,  hwmon_energy,
> +			  HWMON_E_INPUT | HWMON_E_LABEL);
> +
> +	mutex_init(&data->lock);
> +
> +	ret = get_energy_units(data);
> +	if (ret)
> +		return ret;
> +
> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> +							 data,
> +							 &data->chip,
> +							 NULL);
> +	if (IS_ERR(hwmon_dev))
> +		return PTR_ERR(hwmon_dev);
> +
> +	wrap_accumulate = kthread_run(socket_accumulator, data,
> +				      "%s", dev_name(hwmon_dev));
> +	if (IS_ERR(wrap_accumulate))
> +		return PTR_ERR(wrap_accumulate);
> +
> +	return 0;

	return PTR_ERR_OR_ZERO(wrap_accumulate);

> +}
> +
> +static int amd_energy_remove(struct platform_device *pdev)
> +{
> +	if (wrap_accumulate)
> +		kthread_stop(wrap_accumulate);
> +
> +	return 0;
> +}
> +
> +static const struct platform_device_id amd_energy_ids[] = {
> +	{ .name = DRVNAME, },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> +
> +static struct platform_driver amd_energy_driver = {
> +	.probe = amd_energy_probe,
> +	.remove	= amd_energy_remove,
> +	.id_table = amd_energy_ids,
> +	.driver = {
> +		.name = DRVNAME,
> +	},
> +};
> +
> +static struct platform_device *amd_energy_platdev;
> +
> +static const struct x86_cpu_id cpu_ids[] __initconst = {
> +	X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
> +	X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
> +	{}
> +};
> +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> +
> +static int __init amd_energy_init(void)
> +{
> +	int ret;
> +
> +	if (!x86_match_cpu(cpu_ids))
> +		return -ENODEV;
> +
> +	ret = platform_driver_register(&amd_energy_driver);
> +	if (ret < 0)
> +		return ret;
> +
> +	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> +	if (!amd_energy_platdev)
> +		return -ENOMEM;
> +
> +	ret = platform_device_add(amd_energy_platdev);
> +	if (ret) {
> +		platform_device_unregister(amd_energy_platdev);
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +static void __exit amd_energy_exit(void)
> +{
> +	platform_device_unregister(amd_energy_platdev);
> +	platform_driver_unregister(&amd_energy_driver);
> +}
> +
> +module_init(amd_energy_init);
> +module_exit(amd_energy_exit);
> +
> +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> +MODULE_LICENSE("GPL");

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] hwmon: (amd_energy) Add documentation
  2020-05-01 17:50 ` [PATCH 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
@ 2020-05-06 16:33   ` Guenter Roeck
  2020-05-06 17:11     ` Naveen Krishna Ch
  0 siblings, 1 reply; 11+ messages in thread
From: Guenter Roeck @ 2020-05-06 16:33 UTC (permalink / raw)
  To: Naveen Krishna Chatradhi; +Cc: linux-hwmon, naveenkrishna.ch

On Fri, May 01, 2020 at 11:20:02PM +0530, Naveen Krishna Chatradhi wrote:
> Document amd_energy driver with all chips supported by it.
> 
> Cc: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> ---
> Changes in v5: None
> 
>  Documentation/hwmon/amd_energy.rst | 100 +++++++++++++++++++++++++++++
>  Documentation/hwmon/index.rst      |   1 +
>  2 files changed, 101 insertions(+)
>  create mode 100644 Documentation/hwmon/amd_energy.rst
> 
> diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
> new file mode 100644
> index 000000000000..2216c8b13e58
> --- /dev/null
> +++ b/Documentation/hwmon/amd_energy.rst
> @@ -0,0 +1,100 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +Kernel driver amd_energy
> +==========================
> +
> +Supported chips:
> +
> +* AMD Family 17h Processors
> +
> +  Prefix: 'amd_energy'
> +
> +  Addresses used:  RAPL MSRs
> +
> +  Datasheets:
> +
> +  - Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors
> +
> +	https://developer.amd.com/wp-content/resources/55570-B1_PUB.zip
> +
> +  - Preliminary Processor Programming Reference (PPR) for AMD Family 17h Model 31h, Revision B0 Processors
> +
> +	https://developer.amd.com/wp-content/resources/56176_ppr_Family_17h_Model_71h_B0_pub_Rev_3.06.zip
> +
> +Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
> +
> +Description
> +-----------
> +
> +The Energy driver exposes the energy counters that are
> +reported via the Running Average Power Limit (RAPL)
> +Model-specific Registers (MSRs) via the hardware monitor
> +(HWMON) sysfs interface.
> +
> +1. Power, Energy and Time Units
> +   MSR_RAPL_POWER_UNIT/ C001_0299:
> +   shared with all cores in the socket
> +
> +2. Energy consumed by each Core
> +   MSR_CORE_ENERGY_STATUS/ C001_029A:
> +   32-bitRO, Accumulator, core-level power reporting
> +
> +3. Energy consumed by Socket
> +   MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
> +   32-bitRO, Accumulator, socket-level power reporting,
> +   shared with all cores in socket
> +
> +These registers are updated every 1ms and cleared on
> +reset of the system.
> +
> +Energy Caluclation
> +------------------
> +
> +Energy information (in Joules) is based on the multiplier,
> +1/2^ESU; where ESU is an unsigned integer read from
> +MSR_RAPL_POWER_UNIT register. Default value is 10000b,
> +indicating energy status unit is 15.3 micro-Joules increment.
> +
> +Reported values are scaled as per the formula
> +
> +scaled value = ((1/2^ESU) * (Raw value) * 1000000UL) in Joules
> +
> +Users calculate power for a given domain by calculating
> +	dEnergy/dTime for that domain.
> +
> +Socket energy accumulation
> +--------------------------
> +
> +Current Socket energy status register is 32bit, assuming a 240W
> +system, the register would wrap around in
> +
> +	2^32*15.3 e-6/240 = 273.80416512 secs to wrap(~4.5 mins)
> +
> +To improve the wrap around time, a kernel thread is implemented
> +to accumulate the socket energy counter to a 64-bit counter. The
> +kernel thread starts running during probe, wakes up at 100secs

wakes up every 100 seconds

> +and stops running in remove.

stops running when the driver is removed.

All counters need to be be updated by the kernel thread, not just the socket
counter. If the socket counter can wrap in 4.5 minutes, the matching per-core
counters on a 64-core system can wrap every 4.5 * 64 = 288 minutes, which
isn't much better. This might be even worse on a system with fewer cores and
higher per-core power.

> +
> +A socket energy read would return the current register value
> +added to the respective energy accumulator.
> +
> +Sysfs attributes
> +----------------
> +
> +=============== ========  =====================================
> +Attribute	Label	  Description
> +===============	========  =====================================
> +
> +* For index N between [1] and [nr_cpus]
> +
> +===============	========  ======================================
> +energy[N]_input EcoreX	  Core Energy   X = [0] to [nr_cpus - 1]
> +			  Measured input core energy
> +===============	========  ======================================
> +
> +* For N between [nr_cpus] and [nr_cpus + nr_socks]
> +
> +===============	========  ======================================
> +energy[N]_input EsocketX  Socket Energy X = [0] to [nr_socks -1]
> +			  Measured input socket energy
> +=============== ========  ======================================
> diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
> index 8ef62fd39787..fc4b89810e67 100644
> --- a/Documentation/hwmon/index.rst
> +++ b/Documentation/hwmon/index.rst
> @@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
>     adt7470
>     adt7475
>     amc6821
> +   amd_energy
>     asb100
>     asc7621
>     aspeed-pwm-tacho

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-05-06 16:23 ` [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
@ 2020-05-06 17:06   ` Naveen Krishna Ch
  0 siblings, 0 replies; 11+ messages in thread
From: Naveen Krishna Ch @ 2020-05-06 17:06 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter

On Wed, 6 May 2020 at 21:54, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On Fri, May 01, 2020 at 11:20:01PM +0530, Naveen Krishna Chatradhi wrote:
> > This patch adds hwmon based amd_energy driver support for
> > family 17h processors from AMD.
> >
> > The driver provides following interface to the userspace
> > 1. Reports the per core consumption
> >       * file: "energy%d_input", label: "Ecore%03d"
> > 2. Reports per socket energy consumption
> >       * file: "energy%d_input", label: "Esocket%d"
> > 3. To, increase the wrap around time of the socket energy
> >    counters, a 64bit accumultor is implemented.
> > 4. Reports scaled energy value in Joules.
> >
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > ---
> > Changes in v5:
> > 1. To improve wrap around time. A kernel thread is implemented
> >    to accumulate the socket energy counters into a 64bit.
> > 2. Address other comments from Guenter.
> >
> >  drivers/hwmon/Kconfig      |  10 ++
> >  drivers/hwmon/Makefile     |   1 +
> >  drivers/hwmon/amd_energy.c | 329 +++++++++++++++++++++++++++++++++++++
> >  3 files changed, 340 insertions(+)
> >  create mode 100644 drivers/hwmon/amd_energy.c
> >
> > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> > index 4c62f900bf7e..e165e10c49ef 100644
> > --- a/drivers/hwmon/Kconfig
> > +++ b/drivers/hwmon/Kconfig
> > @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
> >         This driver can also be built as a module. If so, the module
> >         will be called fam15h_power.
> >
> > +config SENSORS_AMD_ENERGY
> > +     tristate "AMD RAPL MSR based Energy driver"
> > +     depends on X86
> > +     help
> > +       If you say yes here you get support for core and package energy
> > +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> > +
> > +       This driver can also be built as a module. If so, the module
> > +       will be called as amd_energy.
> > +
> >  config SENSORS_APPLESMC
> >       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
> >       depends on INPUT && X86
> > diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> > index b0b9c8e57176..318f89dc7133 100644
> > --- a/drivers/hwmon/Makefile
> > +++ b/drivers/hwmon/Makefile
> > @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
> >  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
> >  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
> >  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
> > +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
> >  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
> >  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
> >  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
> > diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> > new file mode 100644
> > index 000000000000..16364576f067
> > --- /dev/null
> > +++ b/drivers/hwmon/amd_energy.c
> > @@ -0,0 +1,329 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +/*
> > + * Copyright (C) 2020 Advanced Micro Devices, Inc.
> > + */
> > +#include <asm/cpu_device_id.h>
> > +
> > +#include <linux/bits.h>
> > +#include <linux/cpu.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/device.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/kernel.h>
> > +#include <linux/kthread.h>
> > +#include <linux/list.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/processor.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +
> > +#define DRVNAME                      "amd_energy"
> > +
> > +#define ENERGY_PWR_UNIT_MSR  0xC0010299
> > +#define ENERGY_CORE_MSR              0xC001029A
> > +#define ENERGY_PKG_MSR               0xC001029B
> > +
> > +#define AMD_ENERGY_UNIT_MASK 0x01F00
> > +
> > +struct amd_energy_data {
> > +     struct hwmon_channel_info energy_info;
> > +     const struct hwmon_channel_info *info[2];
> > +     struct hwmon_chip_info chip;
> > +     /* Lock around the accumulator */
> > +     struct mutex lock;
> > +     /* Energy Status Units */
> > +     u64 energy_units;
> > +     /* An accumulator for each socket */
> > +     u64 *energy_ctrs;
> > +     u64 *prev_value;
> > +};
> > +
> > +/* */
> > +struct task_struct *wrap_accumulate;
>
> declare as static
>
> > +static int nr_cpus, nr_socks;
> > +
> > +static int amd_energy_read_labels(struct device *dev,
> > +                               enum hwmon_sensor_types type,
> > +                              u32 attr, int channel,
> > +                              const char **str)
>
> indentation is off here.
>
> > +{
> > +     char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
> > +
>
> This allocates 10 bytes each time the function is called, ie each time
> the "sensors" command is executed (or, more specifically, each time
> the sysfs attribute is read).
>
> On a side note, using devm_kcalloc() to allocate a string is really
> not necessary / useful.
>
> > +     if (channel >= nr_cpus)
> > +             scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
> > +     else
> > +             scnprintf(buf, 9, "Ecore%03u", channel);
>
> For scnprintf(), the length parameter includes the trailing '\0'.
> So it is safe to pass 10 as length here.
>
> > +
> > +     *str = buf;
> > +
> > +     return 0;
> > +}
> > +
> > +static int get_energy_units(struct amd_energy_data *data)
> > +{
> > +     u64 rapl_units;
> > +     int ret;
> > +
> > +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> > +     if (ret)
> > +             return ret;
> > +
> > +     data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> > +     return 0;
> > +}
> > +
> > +static int read_accumulate(struct amd_energy_data *data)
> > +{
> > +     int ret, cu;
> > +     u64 input = 0;
> > +
> > +     for (cu = 0; cu < nr_socks; cu++) {
> > +             int cpu;
> > +
> > +             cpu = cpumask_first_and(cpu_online_mask,
> > +                                     cpumask_of_node(cu));
> > +
> > +             ret = rdmsrl_safe_on_cpu(cpu, ENERGY_PKG_MSR, &input);
> > +             if (ret)
> > +                     return ret;
> > +
> > +             if (input > data->prev_value[cu])
> > +                     data->energy_ctrs[cu] +=
> > +                             input - data->prev_value[cu];
> > +             else
> > +                     data->energy_ctrs[cu] +=
> > +                             UINT_MAX - data->prev_value[cu] + input;
> > +
> > +             data->prev_value[cu] = input;
> > +     }
> > +             return 0;
> > +}
> > +
> > +static int amd_energy_read(struct device *dev,
> > +                        enum hwmon_sensor_types type,
> > +                        u32 attr, int channel, long *val)
> > +{
> > +     struct amd_energy_data *data = dev_get_drvdata(dev);
> > +     int ret, cpu;
> > +     u32 reg;
> > +     u64 input;
> > +
> > +     if (channel >= nr_cpus) {
> > +             reg = ENERGY_PKG_MSR;
> > +             cpu = cpumask_first_and(cpu_online_mask,
> > +                                     cpumask_of_node
> > +                                     (channel - nr_cpus));
> > +     } else {
> > +             reg = ENERGY_CORE_MSR;
> > +             cpu = channel;
> > +     }
> > +
> > +     if (!cpu_online(cpu))
> > +             return -ENODEV;
> > +
> > +     if (data->energy_units == 0 && get_energy_units(data))
> > +             return -EAGAIN;
> > +
> > +     mutex_lock(&data->lock);
> > +     ret = rdmsrl_safe_on_cpu(cpu, reg, &input);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     /* Accumulation is for sockets only */
> > +     if (channel >= nr_cpus)
> > +             input += data->energy_ctrs[channel - nr_cpus];
> > +
> > +     /* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
> > +     *val = (long)input * div64_ul(1000000UL,
> > +                                   BIT(data->energy_units));
> > +     mutex_unlock(&data->lock);
> > +
> > +     return 0;
> > +}
> > +
> > +static umode_t amd_energy_is_visible(const void *_data,
> > +                                  enum hwmon_sensor_types type,
> > +                                  u32 attr, int channel)
> > +{
> > +     return 0444;
> > +}
> > +
> > +static int socket_accumulator(void *p)
> > +{
> > +     struct amd_energy_data *data = (struct amd_energy_data *)p;
> > +
> > +     while (!kthread_should_stop()) {
> > +             mutex_lock(&data->lock);
> > +             read_accumulate(data);
> > +             mutex_unlock(&data->lock);
> > +
> > +             set_current_state(TASK_INTERRUPTIBLE);
> > +             if (kthread_should_stop())
> > +                     break;
> > +
> > +             /*
> > +              * On a 240W system, the Socket Energy status
> > +              * register may wrap around in
> > +              * 2^32*15.3 e-6/240 = 273.8041 secs (~4.5 mins)
> > +              *
> > +              * let us accumulate for every 100secs
> > +              */
> > +             schedule_timeout(msecs_to_jiffies(100000));
> > +     }
> > +     return 0;
> > +}
> > +
> > +static const struct hwmon_ops amd_energy_ops = {
> > +     .is_visible = amd_energy_is_visible,
> > +     .read = amd_energy_read,
> > +     .read_string = amd_energy_read_labels,
> > +};
> > +
> > +static int amd_create_sensor(struct device *dev,
> > +                          struct amd_energy_data *data,
> > +                          u8 type, u32 config)
> > +{
> > +     int i;
> > +     u32 *s_config;
> > +
> > +     struct hwmon_channel_info *info = &data->energy_info;
> > +
> > +     nr_socks = num_possible_nodes();
> > +     nr_cpus = num_present_cpus();
> > +
> > +     s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
> > +                             sizeof(u32), GFP_KERNEL);
> > +     if (!s_config)
> > +             return -ENOMEM;
> > +
> > +     data->energy_ctrs = devm_kcalloc(dev, nr_socks,
> > +                                      sizeof(u64), GFP_KERNEL);
> > +     if (!data->energy_ctrs)
> > +             return -ENOMEM;
> > +
> > +     data->prev_value = devm_kcalloc(dev, nr_socks,
> > +                                     sizeof(u64), GFP_KERNEL);
> > +     if (!data->prev_value)
> > +             return -ENOMEM;
> > +
> > +     info->type = type;
> > +     info->config = s_config;
> > +
> > +     for (i = 0; i < nr_cpus + nr_socks; i++)
> > +             s_config[i] = config;
> > +
> > +     return 0;
> > +}
> > +
> > +static int amd_energy_probe(struct platform_device *pdev)
> > +{
> > +     struct device *hwmon_dev;
> > +     struct amd_energy_data *data;
> > +     struct device *dev = &pdev->dev;
> > +     int ret;
> > +
> > +     data = devm_kzalloc(dev,
> > +                         sizeof(struct amd_energy_data), GFP_KERNEL);
> > +     if (!data)
> > +             return -ENOMEM;
> > +
> > +     data->chip.ops = &amd_energy_ops;
> > +     data->chip.info = data->info;
> > +
> > +     /* Populate per-core energy reporting */
> > +     data->info[0] = &data->energy_info;
> > +     amd_create_sensor(dev, data,  hwmon_energy,
> > +                       HWMON_E_INPUT | HWMON_E_LABEL);
> > +
> > +     mutex_init(&data->lock);
> > +
> > +     ret = get_energy_units(data);
> > +     if (ret)
> > +             return ret;
> > +
> > +     hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> > +                                                      data,
> > +                                                      &data->chip,
> > +                                                      NULL);
> > +     if (IS_ERR(hwmon_dev))
> > +             return PTR_ERR(hwmon_dev);
> > +
> > +     wrap_accumulate = kthread_run(socket_accumulator, data,
> > +                                   "%s", dev_name(hwmon_dev));
> > +     if (IS_ERR(wrap_accumulate))
> > +             return PTR_ERR(wrap_accumulate);
> > +
> > +     return 0;
>
>         return PTR_ERR_OR_ZERO(wrap_accumulate);
>
> > +}
> > +
> > +static int amd_energy_remove(struct platform_device *pdev)
> > +{
> > +     if (wrap_accumulate)
> > +             kthread_stop(wrap_accumulate);
> > +
> > +     return 0;
> > +}
> > +
> > +static const struct platform_device_id amd_energy_ids[] = {
> > +     { .name = DRVNAME, },
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> > +
> > +static struct platform_driver amd_energy_driver = {
> > +     .probe = amd_energy_probe,
> > +     .remove = amd_energy_remove,
> > +     .id_table = amd_energy_ids,
> > +     .driver = {
> > +             .name = DRVNAME,
> > +     },
> > +};
> > +
> > +static struct platform_device *amd_energy_platdev;
> > +
> > +static const struct x86_cpu_id cpu_ids[] __initconst = {
> > +     X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
> > +     X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> > +
> > +static int __init amd_energy_init(void)
> > +{
> > +     int ret;
> > +
> > +     if (!x86_match_cpu(cpu_ids))
> > +             return -ENODEV;
> > +
> > +     ret = platform_driver_register(&amd_energy_driver);
> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> > +     if (!amd_energy_platdev)
> > +             return -ENOMEM;
> > +
> > +     ret = platform_device_add(amd_energy_platdev);
> > +     if (ret) {
> > +             platform_device_unregister(amd_energy_platdev);
> > +             return ret;
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static void __exit amd_energy_exit(void)
> > +{
> > +     platform_device_unregister(amd_energy_platdev);
> > +     platform_driver_unregister(&amd_energy_driver);
> > +}
> > +
> > +module_init(amd_energy_init);
> > +module_exit(amd_energy_exit);
> > +
> > +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> > +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> > +MODULE_LICENSE("GPL");

Thank you for reviewing the code, will address and resubmit.

-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] hwmon: (amd_energy) Add documentation
  2020-05-06 16:33   ` Guenter Roeck
@ 2020-05-06 17:11     ` Naveen Krishna Ch
  2020-05-06 19:27       ` Guenter Roeck
  0 siblings, 1 reply; 11+ messages in thread
From: Naveen Krishna Ch @ 2020-05-06 17:11 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter

On Wed, 6 May 2020 at 22:03, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On Fri, May 01, 2020 at 11:20:02PM +0530, Naveen Krishna Chatradhi wrote:
> > Document amd_energy driver with all chips supported by it.
> >
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > ---
> > Changes in v5: None
> >
> >  Documentation/hwmon/amd_energy.rst | 100 +++++++++++++++++++++++++++++
> >  Documentation/hwmon/index.rst      |   1 +
> >  2 files changed, 101 insertions(+)
> >  create mode 100644 Documentation/hwmon/amd_energy.rst
> >
> > diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
> > new file mode 100644
> > index 000000000000..2216c8b13e58
> > --- /dev/null
> > +++ b/Documentation/hwmon/amd_energy.rst
> > @@ -0,0 +1,100 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +Kernel driver amd_energy
> > +==========================
> > +
> > +Supported chips:
> > +
> > +* AMD Family 17h Processors
> > +
> > +  Prefix: 'amd_energy'
> > +
> > +  Addresses used:  RAPL MSRs
> > +
> > +  Datasheets:
> > +
> > +  - Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors
> > +
> > +     https://developer.amd.com/wp-content/resources/55570-B1_PUB.zip
> > +
> > +  - Preliminary Processor Programming Reference (PPR) for AMD Family 17h Model 31h, Revision B0 Processors
> > +
> > +     https://developer.amd.com/wp-content/resources/56176_ppr_Family_17h_Model_71h_B0_pub_Rev_3.06.zip
> > +
> > +Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > +
> > +Description
> > +-----------
> > +
> > +The Energy driver exposes the energy counters that are
> > +reported via the Running Average Power Limit (RAPL)
> > +Model-specific Registers (MSRs) via the hardware monitor
> > +(HWMON) sysfs interface.
> > +
> > +1. Power, Energy and Time Units
> > +   MSR_RAPL_POWER_UNIT/ C001_0299:
> > +   shared with all cores in the socket
> > +
> > +2. Energy consumed by each Core
> > +   MSR_CORE_ENERGY_STATUS/ C001_029A:
> > +   32-bitRO, Accumulator, core-level power reporting
> > +
> > +3. Energy consumed by Socket
> > +   MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
> > +   32-bitRO, Accumulator, socket-level power reporting,
> > +   shared with all cores in socket
> > +
> > +These registers are updated every 1ms and cleared on
> > +reset of the system.
> > +
> > +Energy Caluclation
> > +------------------
> > +
> > +Energy information (in Joules) is based on the multiplier,
> > +1/2^ESU; where ESU is an unsigned integer read from
> > +MSR_RAPL_POWER_UNIT register. Default value is 10000b,
> > +indicating energy status unit is 15.3 micro-Joules increment.
> > +
> > +Reported values are scaled as per the formula
> > +
> > +scaled value = ((1/2^ESU) * (Raw value) * 1000000UL) in Joules
> > +
> > +Users calculate power for a given domain by calculating
> > +     dEnergy/dTime for that domain.
> > +
> > +Socket energy accumulation
> > +--------------------------
> > +
> > +Current Socket energy status register is 32bit, assuming a 240W
> > +system, the register would wrap around in
> > +
> > +     2^32*15.3 e-6/240 = 273.80416512 secs to wrap(~4.5 mins)
> > +
> > +To improve the wrap around time, a kernel thread is implemented
> > +to accumulate the socket energy counter to a 64-bit counter. The
> > +kernel thread starts running during probe, wakes up at 100secs
>
> wakes up every 100 seconds
>
> > +and stops running in remove.
>
> stops running when the driver is removed.
Will correct them
>
> All counters need to be be updated by the kernel thread, not just the socket
> counter. If the socket counter can wrap in 4.5 minutes, the matching per-core
> counters on a 64-core system can wrap every 4.5 * 64 = 288 minutes, which
> isn't much better. This might be even worse on a system with fewer cores and
> higher per-core power.

Agreed, just need few clarifications though
1. Is it OK to implement another thread for cores alone, as it need not run as
frequently as the socket thread.
2. We have a scenario on servers, a thread accumulating energy for all 128 cores
might compromise the compute. So, i would like to provide a configuration
symbol or sysfs mechanism to enable/disable the core accumulation.

>
> > +
> > +A socket energy read would return the current register value
> > +added to the respective energy accumulator.
> > +
> > +Sysfs attributes
> > +----------------
> > +
> > +=============== ========  =====================================
> > +Attribute    Label     Description
> > +===============      ========  =====================================
> > +
> > +* For index N between [1] and [nr_cpus]
> > +
> > +===============      ========  ======================================
> > +energy[N]_input EcoreX         Core Energy   X = [0] to [nr_cpus - 1]
> > +                       Measured input core energy
> > +===============      ========  ======================================
> > +
> > +* For N between [nr_cpus] and [nr_cpus + nr_socks]
> > +
> > +===============      ========  ======================================
> > +energy[N]_input EsocketX  Socket Energy X = [0] to [nr_socks -1]
> > +                       Measured input socket energy
> > +=============== ========  ======================================
> > diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
> > index 8ef62fd39787..fc4b89810e67 100644
> > --- a/Documentation/hwmon/index.rst
> > +++ b/Documentation/hwmon/index.rst
> > @@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
> >     adt7470
> >     adt7475
> >     amc6821
> > +   amd_energy
> >     asb100
> >     asc7621
> >     aspeed-pwm-tacho



-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-05-01 17:50 [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
                   ` (2 preceding siblings ...)
  2020-05-06 16:23 ` [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
@ 2020-05-06 18:32 ` Guenter Roeck
  2020-05-14 15:56   ` Naveen Krishna Ch
  3 siblings, 1 reply; 11+ messages in thread
From: Guenter Roeck @ 2020-05-06 18:32 UTC (permalink / raw)
  To: Naveen Krishna Chatradhi; +Cc: linux-hwmon, naveenkrishna.ch

On Fri, May 01, 2020 at 11:20:01PM +0530, Naveen Krishna Chatradhi wrote:
> This patch adds hwmon based amd_energy driver support for
> family 17h processors from AMD.
> 
> The driver provides following interface to the userspace
> 1. Reports the per core consumption
> 	* file: "energy%d_input", label: "Ecore%03d"
> 2. Reports per socket energy consumption
> 	* file: "energy%d_input", label: "Esocket%d"
> 3. To, increase the wrap around time of the socket energy
>    counters, a 64bit accumultor is implemented.
> 4. Reports scaled energy value in Joules.
> 
> Cc: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>

When running the code, I still see that both hyperthreading siblings are
reported. On Ryzen 3900X:

amd_energy-isa-0000
Adapter: ISA adapter
Ecore000:    740.54 J
Ecore001:    618.91 J
Ecore002:    565.79 J
Ecore003:    629.43 J
Ecore004:    602.00 J
Ecore005:    610.42 J
Ecore006:    447.83 J
Ecore007:    443.74 J
Ecore008:    436.55 J
Ecore009:    445.41 J
Ecore010:    442.97 J
Ecore011:    445.35 J
Ecore012:    740.54 J	<--- everything from here is duplicates
Ecore013:    618.91 J
Ecore014:    565.79 J
Ecore015:    629.43 J
Ecore016:    602.00 J
Ecore017:    610.42 J
Ecore018:    447.83 J
Ecore019:    443.74 J
Ecore020:    436.55 J
Ecore021:    445.41 J
Ecore022:    442.97 J
Ecore023:    445.35 J
Esocket0:     74.62 kJ

That does not make sense. Please filter out hyperthreading siblings.

Also, adding up the individual cores from the output above results
in 6.428 kJ, which is far off the 74.62 kJ reported for the socket.
The driver was instantiated during boot, so I'd expect no overflow.
Also, a couple of minutes later I see 6.497 kJ vs. 87.73 kJ.
So the socket accumuulated some 13 kJ while the individual cores
only accumulated a miniscule .069 kJ.

Looking at my 3900X system with an uptime of 30 minutes, I see ~107 kJ
energy for the socket. That translates to an average power consumption
of ~60W, which seems high for an idle system. On the other side,
individual cores show 450 - 750 Joule, which translates to an average
power consumption between ~0.25W and ~0.41W per core.

Letting the system run for a minute under load, I noticed two things:
- The socket counter wrapped, even though the kernel thread is running
  (it reported 122.33 kJ initially and 77.85 kJ after one minute).
- Per-core energy consumption for one minute under load is ~500 Joule,
  which translates to ~8W, or ~96W total with 12 cores. That makes
  sense, but it suggests that the socket energy counter is way off.

Do you have an explanation for the difference and the numbers ?

Thanks,
Guenter

> ---
> Changes in v5:
> 1. To improve wrap around time. A kernel thread is implemented
>    to accumulate the socket energy counters into a 64bit.
> 2. Address other comments from Guenter.
> 
>  drivers/hwmon/Kconfig      |  10 ++
>  drivers/hwmon/Makefile     |   1 +
>  drivers/hwmon/amd_energy.c | 329 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 340 insertions(+)
>  create mode 100644 drivers/hwmon/amd_energy.c
> 
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index 4c62f900bf7e..e165e10c49ef 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
>  	  This driver can also be built as a module. If so, the module
>  	  will be called fam15h_power.
>  
> +config SENSORS_AMD_ENERGY
> +	tristate "AMD RAPL MSR based Energy driver"
> +	depends on X86
> +	help
> +	  If you say yes here you get support for core and package energy
> +	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> +
> +	  This driver can also be built as a module. If so, the module
> +	  will be called as amd_energy.
> +
>  config SENSORS_APPLESMC
>  	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
>  	depends on INPUT && X86
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index b0b9c8e57176..318f89dc7133 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
>  obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
>  obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
>  obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
> +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
>  obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
>  obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
>  obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> new file mode 100644
> index 000000000000..16364576f067
> --- /dev/null
> +++ b/drivers/hwmon/amd_energy.c
> @@ -0,0 +1,329 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Copyright (C) 2020 Advanced Micro Devices, Inc.
> + */
> +#include <asm/cpu_device_id.h>
> +
> +#include <linux/bits.h>
> +#include <linux/cpu.h>
> +#include <linux/cpumask.h>
> +#include <linux/device.h>
> +#include <linux/hwmon.h>
> +#include <linux/kernel.h>
> +#include <linux/kthread.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/processor.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +
> +#define DRVNAME			"amd_energy"
> +
> +#define ENERGY_PWR_UNIT_MSR	0xC0010299
> +#define ENERGY_CORE_MSR		0xC001029A
> +#define ENERGY_PKG_MSR		0xC001029B
> +
> +#define AMD_ENERGY_UNIT_MASK	0x01F00
> +
> +struct amd_energy_data {
> +	struct hwmon_channel_info energy_info;
> +	const struct hwmon_channel_info *info[2];
> +	struct hwmon_chip_info chip;
> +	/* Lock around the accumulator */
> +	struct mutex lock;
> +	/* Energy Status Units */
> +	u64 energy_units;
> +	/* An accumulator for each socket */
> +	u64 *energy_ctrs;
> +	u64 *prev_value;
> +};
> +
> +/* */
> +struct task_struct *wrap_accumulate;
> +static int nr_cpus, nr_socks;
> +
> +static int amd_energy_read_labels(struct device *dev,
> +				  enum hwmon_sensor_types type,
> +				 u32 attr, int channel,
> +				 const char **str)
> +{
> +	char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
> +
> +	if (channel >= nr_cpus)
> +		scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
> +	else
> +		scnprintf(buf, 9, "Ecore%03u", channel);
> +
> +	*str = buf;
> +
> +	return 0;
> +}
> +
> +static int get_energy_units(struct amd_energy_data *data)
> +{
> +	u64 rapl_units;
> +	int ret;
> +
> +	ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> +	if (ret)
> +		return ret;
> +
> +	data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> +	return 0;
> +}
> +
> +static int read_accumulate(struct amd_energy_data *data)
> +{
> +	int ret, cu;
> +	u64 input = 0;
> +
> +	for (cu = 0; cu < nr_socks; cu++) {
> +		int cpu;
> +
> +		cpu = cpumask_first_and(cpu_online_mask,
> +					cpumask_of_node(cu));
> +
> +		ret = rdmsrl_safe_on_cpu(cpu, ENERGY_PKG_MSR, &input);
> +		if (ret)
> +			return ret;
> +
> +		if (input > data->prev_value[cu])
> +			data->energy_ctrs[cu] +=
> +				input - data->prev_value[cu];
> +		else
> +			data->energy_ctrs[cu] +=
> +				UINT_MAX - data->prev_value[cu] + input;
> +
> +		data->prev_value[cu] = input;
> +	}
> +		return 0;
> +}
> +
> +static int amd_energy_read(struct device *dev,
> +			   enum hwmon_sensor_types type,
> +			   u32 attr, int channel, long *val)
> +{
> +	struct amd_energy_data *data = dev_get_drvdata(dev);
> +	int ret, cpu;
> +	u32 reg;
> +	u64 input;
> +
> +	if (channel >= nr_cpus) {
> +		reg = ENERGY_PKG_MSR;
> +		cpu = cpumask_first_and(cpu_online_mask,
> +					cpumask_of_node
> +					(channel - nr_cpus));
> +	} else {
> +		reg = ENERGY_CORE_MSR;
> +		cpu = channel;
> +	}
> +
> +	if (!cpu_online(cpu))
> +		return -ENODEV;
> +
> +	if (data->energy_units == 0 && get_energy_units(data))
> +		return -EAGAIN;
> +
> +	mutex_lock(&data->lock);
> +	ret = rdmsrl_safe_on_cpu(cpu, reg, &input);
> +	if (ret)
> +		return -EAGAIN;
> +
> +	/* Accumulation is for sockets only */
> +	if (channel >= nr_cpus)
> +		input += data->energy_ctrs[channel - nr_cpus];
> +
> +	/* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
> +	*val = (long)input * div64_ul(1000000UL,
> +				      BIT(data->energy_units));
> +	mutex_unlock(&data->lock);
> +
> +	return 0;
> +}
> +
> +static umode_t amd_energy_is_visible(const void *_data,
> +				     enum hwmon_sensor_types type,
> +				     u32 attr, int channel)
> +{
> +	return 0444;
> +}
> +
> +static int socket_accumulator(void *p)
> +{
> +	struct amd_energy_data *data = (struct amd_energy_data *)p;
> +
> +	while (!kthread_should_stop()) {
> +		mutex_lock(&data->lock);
> +		read_accumulate(data);
> +		mutex_unlock(&data->lock);
> +
> +		set_current_state(TASK_INTERRUPTIBLE);
> +		if (kthread_should_stop())
> +			break;
> +
> +		/*
> +		 * On a 240W system, the Socket Energy status
> +		 * register may wrap around in
> +		 * 2^32*15.3 e-6/240 = 273.8041 secs (~4.5 mins)
> +		 *
> +		 * let us accumulate for every 100secs
> +		 */
> +		schedule_timeout(msecs_to_jiffies(100000));
> +	}
> +	return 0;
> +}
> +
> +static const struct hwmon_ops amd_energy_ops = {
> +	.is_visible = amd_energy_is_visible,
> +	.read = amd_energy_read,
> +	.read_string = amd_energy_read_labels,
> +};
> +
> +static int amd_create_sensor(struct device *dev,
> +			     struct amd_energy_data *data,
> +			     u8 type, u32 config)
> +{
> +	int i;
> +	u32 *s_config;
> +
> +	struct hwmon_channel_info *info = &data->energy_info;
> +
> +	nr_socks = num_possible_nodes();
> +	nr_cpus = num_present_cpus();
> +
> +	s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
> +				sizeof(u32), GFP_KERNEL);
> +	if (!s_config)
> +		return -ENOMEM;
> +
> +	data->energy_ctrs = devm_kcalloc(dev, nr_socks,
> +					 sizeof(u64), GFP_KERNEL);
> +	if (!data->energy_ctrs)
> +		return -ENOMEM;
> +
> +	data->prev_value = devm_kcalloc(dev, nr_socks,
> +					sizeof(u64), GFP_KERNEL);
> +	if (!data->prev_value)
> +		return -ENOMEM;
> +
> +	info->type = type;
> +	info->config = s_config;
> +
> +	for (i = 0; i < nr_cpus + nr_socks; i++)
> +		s_config[i] = config;
> +
> +	return 0;
> +}
> +
> +static int amd_energy_probe(struct platform_device *pdev)
> +{
> +	struct device *hwmon_dev;
> +	struct amd_energy_data *data;
> +	struct device *dev = &pdev->dev;
> +	int ret;
> +
> +	data = devm_kzalloc(dev,
> +			    sizeof(struct amd_energy_data), GFP_KERNEL);
> +	if (!data)
> +		return -ENOMEM;
> +
> +	data->chip.ops = &amd_energy_ops;
> +	data->chip.info = data->info;
> +
> +	/* Populate per-core energy reporting */
> +	data->info[0] = &data->energy_info;
> +	amd_create_sensor(dev, data,  hwmon_energy,
> +			  HWMON_E_INPUT | HWMON_E_LABEL);
> +
> +	mutex_init(&data->lock);
> +
> +	ret = get_energy_units(data);
> +	if (ret)
> +		return ret;
> +
> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> +							 data,
> +							 &data->chip,
> +							 NULL);
> +	if (IS_ERR(hwmon_dev))
> +		return PTR_ERR(hwmon_dev);
> +
> +	wrap_accumulate = kthread_run(socket_accumulator, data,
> +				      "%s", dev_name(hwmon_dev));
> +	if (IS_ERR(wrap_accumulate))
> +		return PTR_ERR(wrap_accumulate);
> +
> +	return 0;
> +}
> +
> +static int amd_energy_remove(struct platform_device *pdev)
> +{
> +	if (wrap_accumulate)
> +		kthread_stop(wrap_accumulate);
> +
> +	return 0;
> +}
> +
> +static const struct platform_device_id amd_energy_ids[] = {
> +	{ .name = DRVNAME, },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> +
> +static struct platform_driver amd_energy_driver = {
> +	.probe = amd_energy_probe,
> +	.remove	= amd_energy_remove,
> +	.id_table = amd_energy_ids,
> +	.driver = {
> +		.name = DRVNAME,
> +	},
> +};
> +
> +static struct platform_device *amd_energy_platdev;
> +
> +static const struct x86_cpu_id cpu_ids[] __initconst = {
> +	X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
> +	X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
> +	{}
> +};
> +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> +
> +static int __init amd_energy_init(void)
> +{
> +	int ret;
> +
> +	if (!x86_match_cpu(cpu_ids))
> +		return -ENODEV;
> +
> +	ret = platform_driver_register(&amd_energy_driver);
> +	if (ret < 0)
> +		return ret;
> +
> +	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> +	if (!amd_energy_platdev)
> +		return -ENOMEM;
> +
> +	ret = platform_device_add(amd_energy_platdev);
> +	if (ret) {
> +		platform_device_unregister(amd_energy_platdev);
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +static void __exit amd_energy_exit(void)
> +{
> +	platform_device_unregister(amd_energy_platdev);
> +	platform_driver_unregister(&amd_energy_driver);
> +}
> +
> +module_init(amd_energy_init);
> +module_exit(amd_energy_exit);
> +
> +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> +MODULE_LICENSE("GPL");

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] hwmon: (amd_energy) Add documentation
  2020-05-06 17:11     ` Naveen Krishna Ch
@ 2020-05-06 19:27       ` Guenter Roeck
  2020-05-11 16:49         ` Naveen Krishna Ch
  0 siblings, 1 reply; 11+ messages in thread
From: Guenter Roeck @ 2020-05-06 19:27 UTC (permalink / raw)
  To: Naveen Krishna Ch; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi,

On 5/6/20 10:11 AM, Naveen Krishna Ch wrote:
> Hi Guenter
> 
> On Wed, 6 May 2020 at 22:03, Guenter Roeck <linux@roeck-us.net> wrote:
>>
>> On Fri, May 01, 2020 at 11:20:02PM +0530, Naveen Krishna Chatradhi wrote:
>>> Document amd_energy driver with all chips supported by it.
>>>
>>> Cc: Guenter Roeck <linux@roeck-us.net>
>>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
>>> ---
>>> Changes in v5: None
>>>
>>>  Documentation/hwmon/amd_energy.rst | 100 +++++++++++++++++++++++++++++
>>>  Documentation/hwmon/index.rst      |   1 +
>>>  2 files changed, 101 insertions(+)
>>>  create mode 100644 Documentation/hwmon/amd_energy.rst
>>>
>>> diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
>>> new file mode 100644
>>> index 000000000000..2216c8b13e58
>>> --- /dev/null
>>> +++ b/Documentation/hwmon/amd_energy.rst
>>> @@ -0,0 +1,100 @@
>>> +.. SPDX-License-Identifier: GPL-2.0
>>> +
>>> +Kernel driver amd_energy
>>> +==========================
>>> +
>>> +Supported chips:
>>> +
>>> +* AMD Family 17h Processors
>>> +
>>> +  Prefix: 'amd_energy'
>>> +
>>> +  Addresses used:  RAPL MSRs
>>> +
>>> +  Datasheets:
>>> +
>>> +  - Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors
>>> +
>>> +     https://developer.amd.com/wp-content/resources/55570-B1_PUB.zip
>>> +
>>> +  - Preliminary Processor Programming Reference (PPR) for AMD Family 17h Model 31h, Revision B0 Processors
>>> +
>>> +     https://developer.amd.com/wp-content/resources/56176_ppr_Family_17h_Model_71h_B0_pub_Rev_3.06.zip
>>> +
>>> +Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
>>> +
>>> +Description
>>> +-----------
>>> +
>>> +The Energy driver exposes the energy counters that are
>>> +reported via the Running Average Power Limit (RAPL)
>>> +Model-specific Registers (MSRs) via the hardware monitor
>>> +(HWMON) sysfs interface.
>>> +
>>> +1. Power, Energy and Time Units
>>> +   MSR_RAPL_POWER_UNIT/ C001_0299:
>>> +   shared with all cores in the socket
>>> +
>>> +2. Energy consumed by each Core
>>> +   MSR_CORE_ENERGY_STATUS/ C001_029A:
>>> +   32-bitRO, Accumulator, core-level power reporting
>>> +
>>> +3. Energy consumed by Socket
>>> +   MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
>>> +   32-bitRO, Accumulator, socket-level power reporting,
>>> +   shared with all cores in socket
>>> +
>>> +These registers are updated every 1ms and cleared on
>>> +reset of the system.
>>> +
>>> +Energy Caluclation
>>> +------------------
>>> +
>>> +Energy information (in Joules) is based on the multiplier,
>>> +1/2^ESU; where ESU is an unsigned integer read from
>>> +MSR_RAPL_POWER_UNIT register. Default value is 10000b,
>>> +indicating energy status unit is 15.3 micro-Joules increment.
>>> +
>>> +Reported values are scaled as per the formula
>>> +
>>> +scaled value = ((1/2^ESU) * (Raw value) * 1000000UL) in Joules
>>> +
>>> +Users calculate power for a given domain by calculating
>>> +     dEnergy/dTime for that domain.
>>> +
>>> +Socket energy accumulation
>>> +--------------------------
>>> +
>>> +Current Socket energy status register is 32bit, assuming a 240W
>>> +system, the register would wrap around in
>>> +
>>> +     2^32*15.3 e-6/240 = 273.80416512 secs to wrap(~4.5 mins)
>>> +
>>> +To improve the wrap around time, a kernel thread is implemented
>>> +to accumulate the socket energy counter to a 64-bit counter. The
>>> +kernel thread starts running during probe, wakes up at 100secs
>>
>> wakes up every 100 seconds
>>
>>> +and stops running in remove.
>>
>> stops running when the driver is removed.
> Will correct them
>>
>> All counters need to be be updated by the kernel thread, not just the socket
>> counter. If the socket counter can wrap in 4.5 minutes, the matching per-core
>> counters on a 64-core system can wrap every 4.5 * 64 = 288 minutes, which
>> isn't much better. This might be even worse on a system with fewer cores and
>> higher per-core power.
> 
> Agreed, just need few clarifications though
> 1. Is it OK to implement another thread for cores alone, as it need not run as
> frequently as the socket thread.

Your call, but personally I think it is not worth the overhead; see below.

> 2. We have a scenario on servers, a thread accumulating energy for all 128 cores
> might compromise the compute. So, i would like to provide a configuration
> symbol or sysfs mechanism to enable/disable the core accumulation.
> 

Another option would be to use a single thread but only update a single core
per socket at a time. If the socket thread needs to run every N seconds,
one would assume that the core thread only needs to run every N * (number
of cores) seconds (assuming that it uses the same scale). If so, reading
the data for one core (or maybe a couple of cores if the scale is different)
plus the data for the socket should not be that expensive.

If that is not acceptable, it might make more sense to blacklist the driver
entirely in such situations; without accumulation the reported values are
pretty much worthless.

Thanks,
Guenter

>>
>>> +
>>> +A socket energy read would return the current register value
>>> +added to the respective energy accumulator.
>>> +
>>> +Sysfs attributes
>>> +----------------
>>> +
>>> +=============== ========  =====================================
>>> +Attribute    Label     Description
>>> +===============      ========  =====================================
>>> +
>>> +* For index N between [1] and [nr_cpus]
>>> +
>>> +===============      ========  ======================================
>>> +energy[N]_input EcoreX         Core Energy   X = [0] to [nr_cpus - 1]
>>> +                       Measured input core energy
>>> +===============      ========  ======================================
>>> +
>>> +* For N between [nr_cpus] and [nr_cpus + nr_socks]
>>> +
>>> +===============      ========  ======================================
>>> +energy[N]_input EsocketX  Socket Energy X = [0] to [nr_socks -1]
>>> +                       Measured input socket energy
>>> +=============== ========  ======================================
>>> diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
>>> index 8ef62fd39787..fc4b89810e67 100644
>>> --- a/Documentation/hwmon/index.rst
>>> +++ b/Documentation/hwmon/index.rst
>>> @@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
>>>     adt7470
>>>     adt7475
>>>     amc6821
>>> +   amd_energy
>>>     asb100
>>>     asc7621
>>>     aspeed-pwm-tacho
> 
> 
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] hwmon: (amd_energy) Add documentation
  2020-05-06 19:27       ` Guenter Roeck
@ 2020-05-11 16:49         ` Naveen Krishna Ch
  0 siblings, 0 replies; 11+ messages in thread
From: Naveen Krishna Ch @ 2020-05-11 16:49 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter

On Thu, 7 May 2020 at 00:57, Guenter Roeck <linux@roeck-us.net> wrote:
>
> Hi,
>
> On 5/6/20 10:11 AM, Naveen Krishna Ch wrote:
> > Hi Guenter
> >
> > On Wed, 6 May 2020 at 22:03, Guenter Roeck <linux@roeck-us.net> wrote:
> >>
> >> On Fri, May 01, 2020 at 11:20:02PM +0530, Naveen Krishna Chatradhi wrote:
> >>> Document amd_energy driver with all chips supported by it.
> >>>
> >>> Cc: Guenter Roeck <linux@roeck-us.net>
> >>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> >>> ---
> >>> Changes in v5: None
> >>>
> >>>  Documentation/hwmon/amd_energy.rst | 100 +++++++++++++++++++++++++++++
> >>>  Documentation/hwmon/index.rst      |   1 +
> >>>  2 files changed, 101 insertions(+)
> >>>  create mode 100644 Documentation/hwmon/amd_energy.rst
> >>>
> >>> diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
> >>> new file mode 100644
> >>> index 000000000000..2216c8b13e58
> >>> --- /dev/null
> >>> +++ b/Documentation/hwmon/amd_energy.rst
> >>> @@ -0,0 +1,100 @@
> >>> +.. SPDX-License-Identifier: GPL-2.0
> >>> +
> >>> +Kernel driver amd_energy
> >>> +==========================
> >>> +
> >>> +Supported chips:
> >>> +
> >>> +* AMD Family 17h Processors
> >>> +
> >>> +  Prefix: 'amd_energy'
> >>> +
> >>> +  Addresses used:  RAPL MSRs
> >>> +
> >>> +  Datasheets:
> >>> +
> >>> +  - Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors
> >>> +
> >>> +     https://developer.amd.com/wp-content/resources/55570-B1_PUB.zip
> >>> +
> >>> +  - Preliminary Processor Programming Reference (PPR) for AMD Family 17h Model 31h, Revision B0 Processors
> >>> +
> >>> +     https://developer.amd.com/wp-content/resources/56176_ppr_Family_17h_Model_71h_B0_pub_Rev_3.06.zip
> >>> +
> >>> +Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
> >>> +
> >>> +Description
> >>> +-----------
> >>> +
> >>> +The Energy driver exposes the energy counters that are
> >>> +reported via the Running Average Power Limit (RAPL)
> >>> +Model-specific Registers (MSRs) via the hardware monitor
> >>> +(HWMON) sysfs interface.
> >>> +
> >>> +1. Power, Energy and Time Units
> >>> +   MSR_RAPL_POWER_UNIT/ C001_0299:
> >>> +   shared with all cores in the socket
> >>> +
> >>> +2. Energy consumed by each Core
> >>> +   MSR_CORE_ENERGY_STATUS/ C001_029A:
> >>> +   32-bitRO, Accumulator, core-level power reporting
> >>> +
> >>> +3. Energy consumed by Socket
> >>> +   MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
> >>> +   32-bitRO, Accumulator, socket-level power reporting,
> >>> +   shared with all cores in socket
> >>> +
> >>> +These registers are updated every 1ms and cleared on
> >>> +reset of the system.
> >>> +
> >>> +Energy Caluclation
> >>> +------------------
> >>> +
> >>> +Energy information (in Joules) is based on the multiplier,
> >>> +1/2^ESU; where ESU is an unsigned integer read from
> >>> +MSR_RAPL_POWER_UNIT register. Default value is 10000b,
> >>> +indicating energy status unit is 15.3 micro-Joules increment.
> >>> +
> >>> +Reported values are scaled as per the formula
> >>> +
> >>> +scaled value = ((1/2^ESU) * (Raw value) * 1000000UL) in Joules
> >>> +
> >>> +Users calculate power for a given domain by calculating
> >>> +     dEnergy/dTime for that domain.
> >>> +
> >>> +Socket energy accumulation
> >>> +--------------------------
> >>> +
> >>> +Current Socket energy status register is 32bit, assuming a 240W
> >>> +system, the register would wrap around in
> >>> +
> >>> +     2^32*15.3 e-6/240 = 273.80416512 secs to wrap(~4.5 mins)
> >>> +
> >>> +To improve the wrap around time, a kernel thread is implemented
> >>> +to accumulate the socket energy counter to a 64-bit counter. The
> >>> +kernel thread starts running during probe, wakes up at 100secs
> >>
> >> wakes up every 100 seconds
> >>
> >>> +and stops running in remove.
> >>
> >> stops running when the driver is removed.
> > Will correct them
> >>
> >> All counters need to be be updated by the kernel thread, not just the socket
> >> counter. If the socket counter can wrap in 4.5 minutes, the matching per-core
> >> counters on a 64-core system can wrap every 4.5 * 64 = 288 minutes, which
> >> isn't much better. This might be even worse on a system with fewer cores and
> >> higher per-core power.
> >
> > Agreed, just need few clarifications though
> > 1. Is it OK to implement another thread for cores alone, as it need not run as
> > frequently as the socket thread.
>
> Your call, but personally I think it is not worth the overhead; see below.
>
> > 2. We have a scenario on servers, a thread accumulating energy for all 128 cores
> > might compromise the compute. So, i would like to provide a configuration
> > symbol or sysfs mechanism to enable/disable the core accumulation.
> >
>
> Another option would be to use a single thread but only update a single core
> per socket at a time. If the socket thread needs to run every N seconds,
> one would assume that the core thread only needs to run every N * (number
> of cores) seconds (assuming that it uses the same scale). If so, reading
> the data for one core (or maybe a couple of cores if the scale is different)
> plus the data for the socket should not be that expensive.
This is good and possible. Thanks
>
> If that is not acceptable, it might make more sense to blacklist the driver
> entirely in such situations; without accumulation the reported values are
> pretty much worthless.
Sure, will implement core accumulation as well.
>
> Thanks,
> Guenter
>
> >>
> >>> +
> >>> +A socket energy read would return the current register value
> >>> +added to the respective energy accumulator.
> >>> +
> >>> +Sysfs attributes
> >>> +----------------
> >>> +
> >>> +=============== ========  =====================================
> >>> +Attribute    Label     Description
> >>> +===============      ========  =====================================
> >>> +
> >>> +* For index N between [1] and [nr_cpus]
> >>> +
> >>> +===============      ========  ======================================
> >>> +energy[N]_input EcoreX         Core Energy   X = [0] to [nr_cpus - 1]
> >>> +                       Measured input core energy
> >>> +===============      ========  ======================================
> >>> +
> >>> +* For N between [nr_cpus] and [nr_cpus + nr_socks]
> >>> +
> >>> +===============      ========  ======================================
> >>> +energy[N]_input EsocketX  Socket Energy X = [0] to [nr_socks -1]
> >>> +                       Measured input socket energy
> >>> +=============== ========  ======================================
> >>> diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
> >>> index 8ef62fd39787..fc4b89810e67 100644
> >>> --- a/Documentation/hwmon/index.rst
> >>> +++ b/Documentation/hwmon/index.rst
> >>> @@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
> >>>     adt7470
> >>>     adt7475
> >>>     amc6821
> >>> +   amd_energy
> >>>     asb100
> >>>     asc7621
> >>>     aspeed-pwm-tacho
> >
> >
> >
>


-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters
  2020-05-06 18:32 ` Guenter Roeck
@ 2020-05-14 15:56   ` Naveen Krishna Ch
  0 siblings, 0 replies; 11+ messages in thread
From: Naveen Krishna Ch @ 2020-05-14 15:56 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

HI Guenter

On Thu, 7 May 2020 at 00:02, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On Fri, May 01, 2020 at 11:20:01PM +0530, Naveen Krishna Chatradhi wrote:
> > This patch adds hwmon based amd_energy driver support for
> > family 17h processors from AMD.
> >
> > The driver provides following interface to the userspace
> > 1. Reports the per core consumption
> >       * file: "energy%d_input", label: "Ecore%03d"
> > 2. Reports per socket energy consumption
> >       * file: "energy%d_input", label: "Esocket%d"
> > 3. To, increase the wrap around time of the socket energy
> >    counters, a 64bit accumultor is implemented.
> > 4. Reports scaled energy value in Joules.
> >
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
>
> When running the code, I still see that both hyperthreading siblings are
> reported. On Ryzen 3900X:
>
> amd_energy-isa-0000
> Adapter: ISA adapter
> Ecore000:    740.54 J
> Ecore001:    618.91 J
> Ecore002:    565.79 J
> Ecore003:    629.43 J
> Ecore004:    602.00 J
> Ecore005:    610.42 J
> Ecore006:    447.83 J
> Ecore007:    443.74 J
> Ecore008:    436.55 J
> Ecore009:    445.41 J
> Ecore010:    442.97 J
> Ecore011:    445.35 J
> Ecore012:    740.54 J   <--- everything from here is duplicates
> Ecore013:    618.91 J
> Ecore014:    565.79 J
> Ecore015:    629.43 J
> Ecore016:    602.00 J
> Ecore017:    610.42 J
> Ecore018:    447.83 J
> Ecore019:    443.74 J
> Ecore020:    436.55 J
> Ecore021:    445.41 J
> Ecore022:    442.97 J
> Ecore023:    445.35 J
> Esocket0:     74.62 kJ
>
> That does not make sense. Please filter out hyperthreading siblings.
As Linux enumerates the core and thread siblings as cpus, My idea is
to let the user request energy consumption for any of the listed cpus by
its index.
I was trying to address the use case where, user wants to allocate work
on specific cpus, may want to gather energy of those cpus before and
after the application is run. But, yes since the registers are core level. if
the user tries to aggregate all the core values, there would duplicates.
I will remove the siblings.

>
> Also, adding up the individual cores from the output above results
> in 6.428 kJ, which is far off the 74.62 kJ reported for the socket.
> The driver was instantiated during boot, so I'd expect no overflow.
> Also, a couple of minutes later I see 6.497 kJ vs. 87.73 kJ.
> So the socket accumuulated some 13 kJ while the individual cores
> only accumulated a miniscule .069 kJ.
>
> Looking at my 3900X system with an uptime of 30 minutes, I see ~107 kJ
> energy for the socket. That translates to an average power consumption
> of ~60W, which seems high for an idle system. On the other side,
> individual cores show 450 - 750 Joule, which translates to an average
> power consumption between ~0.25W and ~0.41W per core.
>
> Letting the system run for a minute under load, I noticed two things:
> - The socket counter wrapped, even though the kernel thread is running
>   (it reported 122.33 kJ initially and 77.85 kJ after one minute).
Thanks for pointing out. I've identified an issue in the accumulation code,
which fixes this wrap around.

> - Per-core energy consumption for one minute under load is ~500 Joule,
>   which translates to ~8W, or ~96W total with 12 cores. That makes
>   sense, but it suggests that the socket energy counter is way off.
>
> Do you have an explanation for the difference and the numbers ?

At low work loads the socket energy counters is far higher than the
aggregated values of core energy counters. This is due to the energy
consumed by the other components. At higher loads i noticed
the gap between the socket value eases up to 30 times aggregated
values of all the cores in the socket.

Yes Guenter, i noticed and discussed with the hardware team. This is
all the information that is available at present.

The energy unit scale is the same for all the core and socket register.
I will submit a patch accumulate the core energy counters aswell.
>
> Thanks,
> Guenter
>
> > ---
> > Changes in v5:
> > 1. To improve wrap around time. A kernel thread is implemented
> >    to accumulate the socket energy counters into a 64bit.
> > 2. Address other comments from Guenter.
> >
> >  drivers/hwmon/Kconfig      |  10 ++
> >  drivers/hwmon/Makefile     |   1 +
> >  drivers/hwmon/amd_energy.c | 329 +++++++++++++++++++++++++++++++++++++
> >  3 files changed, 340 insertions(+)
> >  create mode 100644 drivers/hwmon/amd_energy.c
> >
> > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> > index 4c62f900bf7e..e165e10c49ef 100644
> > --- a/drivers/hwmon/Kconfig
> > +++ b/drivers/hwmon/Kconfig
> > @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
> >         This driver can also be built as a module. If so, the module
> >         will be called fam15h_power.
> >
> > +config SENSORS_AMD_ENERGY
> > +     tristate "AMD RAPL MSR based Energy driver"
> > +     depends on X86
> > +     help
> > +       If you say yes here you get support for core and package energy
> > +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> > +
> > +       This driver can also be built as a module. If so, the module
> > +       will be called as amd_energy.
> > +
> >  config SENSORS_APPLESMC
> >       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
> >       depends on INPUT && X86
> > diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> > index b0b9c8e57176..318f89dc7133 100644
> > --- a/drivers/hwmon/Makefile
> > +++ b/drivers/hwmon/Makefile
> > @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
> >  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
> >  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
> >  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
> > +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
> >  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
> >  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
> >  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
> > diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> > new file mode 100644
> > index 000000000000..16364576f067
> > --- /dev/null
> > +++ b/drivers/hwmon/amd_energy.c
> > @@ -0,0 +1,329 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +/*
> > + * Copyright (C) 2020 Advanced Micro Devices, Inc.
> > + */
> > +#include <asm/cpu_device_id.h>
> > +
> > +#include <linux/bits.h>
> > +#include <linux/cpu.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/device.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/kernel.h>
> > +#include <linux/kthread.h>
> > +#include <linux/list.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/processor.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +
> > +#define DRVNAME                      "amd_energy"
> > +
> > +#define ENERGY_PWR_UNIT_MSR  0xC0010299
> > +#define ENERGY_CORE_MSR              0xC001029A
> > +#define ENERGY_PKG_MSR               0xC001029B
> > +
> > +#define AMD_ENERGY_UNIT_MASK 0x01F00
> > +
> > +struct amd_energy_data {
> > +     struct hwmon_channel_info energy_info;
> > +     const struct hwmon_channel_info *info[2];
> > +     struct hwmon_chip_info chip;
> > +     /* Lock around the accumulator */
> > +     struct mutex lock;
> > +     /* Energy Status Units */
> > +     u64 energy_units;
> > +     /* An accumulator for each socket */
> > +     u64 *energy_ctrs;
> > +     u64 *prev_value;
> > +};
> > +
> > +/* */
> > +struct task_struct *wrap_accumulate;
> > +static int nr_cpus, nr_socks;
> > +
> > +static int amd_energy_read_labels(struct device *dev,
> > +                               enum hwmon_sensor_types type,
> > +                              u32 attr, int channel,
> > +                              const char **str)
> > +{
> > +     char *buf = devm_kcalloc(dev, 10, sizeof(char), GFP_KERNEL);
> > +
> > +     if (channel >= nr_cpus)
> > +             scnprintf(buf, 9, "Esocket%u", channel - nr_cpus);
> > +     else
> > +             scnprintf(buf, 9, "Ecore%03u", channel);
> > +
> > +     *str = buf;
> > +
> > +     return 0;
> > +}
> > +
> > +static int get_energy_units(struct amd_energy_data *data)
> > +{
> > +     u64 rapl_units;
> > +     int ret;
> > +
> > +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> > +     if (ret)
> > +             return ret;
> > +
> > +     data->energy_units = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> > +     return 0;
> > +}
> > +
> > +static int read_accumulate(struct amd_energy_data *data)
> > +{
> > +     int ret, cu;
> > +     u64 input = 0;
> > +
> > +     for (cu = 0; cu < nr_socks; cu++) {
> > +             int cpu;
> > +
> > +             cpu = cpumask_first_and(cpu_online_mask,
> > +                                     cpumask_of_node(cu));
> > +
> > +             ret = rdmsrl_safe_on_cpu(cpu, ENERGY_PKG_MSR, &input);
> > +             if (ret)
> > +                     return ret;
> > +
> > +             if (input > data->prev_value[cu])
> > +                     data->energy_ctrs[cu] +=
> > +                             input - data->prev_value[cu];
> > +             else
> > +                     data->energy_ctrs[cu] +=
> > +                             UINT_MAX - data->prev_value[cu] + input;
> > +
> > +             data->prev_value[cu] = input;
> > +     }
> > +             return 0;
> > +}
> > +
> > +static int amd_energy_read(struct device *dev,
> > +                        enum hwmon_sensor_types type,
> > +                        u32 attr, int channel, long *val)
> > +{
> > +     struct amd_energy_data *data = dev_get_drvdata(dev);
> > +     int ret, cpu;
> > +     u32 reg;
> > +     u64 input;
> > +
> > +     if (channel >= nr_cpus) {
> > +             reg = ENERGY_PKG_MSR;
> > +             cpu = cpumask_first_and(cpu_online_mask,
> > +                                     cpumask_of_node
> > +                                     (channel - nr_cpus));
> > +     } else {
> > +             reg = ENERGY_CORE_MSR;
> > +             cpu = channel;
> > +     }
> > +
> > +     if (!cpu_online(cpu))
> > +             return -ENODEV;
> > +
> > +     if (data->energy_units == 0 && get_energy_units(data))
> > +             return -EAGAIN;
> > +
> > +     mutex_lock(&data->lock);
> > +     ret = rdmsrl_safe_on_cpu(cpu, reg, &input);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     /* Accumulation is for sockets only */
> > +     if (channel >= nr_cpus)
> > +             input += data->energy_ctrs[channel - nr_cpus];
> > +
> > +     /* Energy consumed = (1/(2^ESU) * RAW * 1000000UL) Joules */
> > +     *val = (long)input * div64_ul(1000000UL,
> > +                                   BIT(data->energy_units));
> > +     mutex_unlock(&data->lock);
> > +
> > +     return 0;
> > +}
> > +
> > +static umode_t amd_energy_is_visible(const void *_data,
> > +                                  enum hwmon_sensor_types type,
> > +                                  u32 attr, int channel)
> > +{
> > +     return 0444;
> > +}
> > +
> > +static int socket_accumulator(void *p)
> > +{
> > +     struct amd_energy_data *data = (struct amd_energy_data *)p;
> > +
> > +     while (!kthread_should_stop()) {
> > +             mutex_lock(&data->lock);
> > +             read_accumulate(data);
> > +             mutex_unlock(&data->lock);
> > +
> > +             set_current_state(TASK_INTERRUPTIBLE);
> > +             if (kthread_should_stop())
> > +                     break;
> > +
> > +             /*
> > +              * On a 240W system, the Socket Energy status
> > +              * register may wrap around in
> > +              * 2^32*15.3 e-6/240 = 273.8041 secs (~4.5 mins)
> > +              *
> > +              * let us accumulate for every 100secs
> > +              */
> > +             schedule_timeout(msecs_to_jiffies(100000));
> > +     }
> > +     return 0;
> > +}
> > +
> > +static const struct hwmon_ops amd_energy_ops = {
> > +     .is_visible = amd_energy_is_visible,
> > +     .read = amd_energy_read,
> > +     .read_string = amd_energy_read_labels,
> > +};
> > +
> > +static int amd_create_sensor(struct device *dev,
> > +                          struct amd_energy_data *data,
> > +                          u8 type, u32 config)
> > +{
> > +     int i;
> > +     u32 *s_config;
> > +
> > +     struct hwmon_channel_info *info = &data->energy_info;
> > +
> > +     nr_socks = num_possible_nodes();
> > +     nr_cpus = num_present_cpus();
> > +
> > +     s_config = devm_kcalloc(dev, nr_cpus + nr_socks,
> > +                             sizeof(u32), GFP_KERNEL);
> > +     if (!s_config)
> > +             return -ENOMEM;
> > +
> > +     data->energy_ctrs = devm_kcalloc(dev, nr_socks,
> > +                                      sizeof(u64), GFP_KERNEL);
> > +     if (!data->energy_ctrs)
> > +             return -ENOMEM;
> > +
> > +     data->prev_value = devm_kcalloc(dev, nr_socks,
> > +                                     sizeof(u64), GFP_KERNEL);
> > +     if (!data->prev_value)
> > +             return -ENOMEM;
> > +
> > +     info->type = type;
> > +     info->config = s_config;
> > +
> > +     for (i = 0; i < nr_cpus + nr_socks; i++)
> > +             s_config[i] = config;
> > +
> > +     return 0;
> > +}
> > +
> > +static int amd_energy_probe(struct platform_device *pdev)
> > +{
> > +     struct device *hwmon_dev;
> > +     struct amd_energy_data *data;
> > +     struct device *dev = &pdev->dev;
> > +     int ret;
> > +
> > +     data = devm_kzalloc(dev,
> > +                         sizeof(struct amd_energy_data), GFP_KERNEL);
> > +     if (!data)
> > +             return -ENOMEM;
> > +
> > +     data->chip.ops = &amd_energy_ops;
> > +     data->chip.info = data->info;
> > +
> > +     /* Populate per-core energy reporting */
> > +     data->info[0] = &data->energy_info;
> > +     amd_create_sensor(dev, data,  hwmon_energy,
> > +                       HWMON_E_INPUT | HWMON_E_LABEL);
> > +
> > +     mutex_init(&data->lock);
> > +
> > +     ret = get_energy_units(data);
> > +     if (ret)
> > +             return ret;
> > +
> > +     hwmon_dev = devm_hwmon_device_register_with_info(dev, DRVNAME,
> > +                                                      data,
> > +                                                      &data->chip,
> > +                                                      NULL);
> > +     if (IS_ERR(hwmon_dev))
> > +             return PTR_ERR(hwmon_dev);
> > +
> > +     wrap_accumulate = kthread_run(socket_accumulator, data,
> > +                                   "%s", dev_name(hwmon_dev));
> > +     if (IS_ERR(wrap_accumulate))
> > +             return PTR_ERR(wrap_accumulate);
> > +
> > +     return 0;
> > +}
> > +
> > +static int amd_energy_remove(struct platform_device *pdev)
> > +{
> > +     if (wrap_accumulate)
> > +             kthread_stop(wrap_accumulate);
> > +
> > +     return 0;
> > +}
> > +
> > +static const struct platform_device_id amd_energy_ids[] = {
> > +     { .name = DRVNAME, },
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> > +
> > +static struct platform_driver amd_energy_driver = {
> > +     .probe = amd_energy_probe,
> > +     .remove = amd_energy_remove,
> > +     .id_table = amd_energy_ids,
> > +     .driver = {
> > +             .name = DRVNAME,
> > +     },
> > +};
> > +
> > +static struct platform_device *amd_energy_platdev;
> > +
> > +static const struct x86_cpu_id cpu_ids[] __initconst = {
> > +     X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL),
> > +     X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> > +
> > +static int __init amd_energy_init(void)
> > +{
> > +     int ret;
> > +
> > +     if (!x86_match_cpu(cpu_ids))
> > +             return -ENODEV;
> > +
> > +     ret = platform_driver_register(&amd_energy_driver);
> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> > +     if (!amd_energy_platdev)
> > +             return -ENOMEM;
> > +
> > +     ret = platform_device_add(amd_energy_platdev);
> > +     if (ret) {
> > +             platform_device_unregister(amd_energy_platdev);
> > +             return ret;
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static void __exit amd_energy_exit(void)
> > +{
> > +     platform_device_unregister(amd_energy_platdev);
> > +     platform_driver_unregister(&amd_energy_driver);
> > +}
> > +
> > +module_init(amd_energy_init);
> > +module_exit(amd_energy_exit);
> > +
> > +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> > +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> > +MODULE_LICENSE("GPL");



--
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, back to index

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-01 17:50 [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
2020-05-01 17:50 ` [PATCH 2/3] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
2020-05-06 16:33   ` Guenter Roeck
2020-05-06 17:11     ` Naveen Krishna Ch
2020-05-06 19:27       ` Guenter Roeck
2020-05-11 16:49         ` Naveen Krishna Ch
2020-05-01 17:50 ` [PATCH 3/3] MAINTAINERS: add entry for AMD energy driver Naveen Krishna Chatradhi
2020-05-06 16:23 ` [PATCH 1/3] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
2020-05-06 17:06   ` Naveen Krishna Ch
2020-05-06 18:32 ` Guenter Roeck
2020-05-14 15:56   ` Naveen Krishna Ch

Linux-Hwmon Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-hwmon/0 linux-hwmon/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-hwmon linux-hwmon/ https://lore.kernel.org/linux-hwmon \
		linux-hwmon@vger.kernel.org
	public-inbox-index linux-hwmon

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-hwmon


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git