linux-hwmon.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
@ 2020-04-09 15:39 Naveen Krishna Chatradhi
  2020-04-09 15:39 ` [PATCH 2/2] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
  2020-04-10 15:57 ` [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
  0 siblings, 2 replies; 12+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-04-09 15:39 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

This patch adds hwmon based amd_energy driver support for
family 17h processors from AMD.

The driver provides following interface to the userspace
1. Reports the per core and per socket energy consumption
   via sysfs entries created per core and per socket respectively.
    * per core energy consumed via "core_energy%d_input"
    * package/socket energy consumed via "sock_energy%d_input".
2. Uses topology_max_packages() to get number of sockets.
3. Uses num_present_cpus() to get the number of CPUs.
4. Reports the energy units via energy_unit sysfs entry

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
 drivers/hwmon/Kconfig      |  10 ++
 drivers/hwmon/Makefile     |   1 +
 drivers/hwmon/amd_energy.c | 273 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 284 insertions(+)
 create mode 100644 drivers/hwmon/amd_energy.c

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 05a30832c6ba..d83f1403b429 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
 	  This driver can also be built as a module. If so, the module
 	  will be called fam15h_power.
 
+config SENSORS_AMD_ENERGY
+	tristate "AMD RAPL MSR based Energy driver"
+	depends on X86
+	help
+	  If you say yes here you get support for core and package energy
+	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called as amd_energy.
+
 config SENSORS_APPLESMC
 	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
 	depends on INPUT && X86
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index b0b9c8e57176..318f89dc7133 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
 obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
 obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
 obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
+obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
 obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
 obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
 obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
new file mode 100644
index 000000000000..ddb7073ae39b
--- /dev/null
+++ b/drivers/hwmon/amd_energy.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+#include <linux/list.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/processor.h>
+#include <linux/platform_device.h>
+#include <linux/cpumask.h>
+
+#include <asm/cpu_device_id.h>
+
+#define DRVNAME	"amd_energy"
+
+#define ENERGY_PWR_UNIT_MSR	0xC0010299
+#define ENERGY_CORE_MSR	0xC001029A
+#define ENERGY_PCK_MSR		0xC001029B
+
+#define AMD_TIME_UNIT_MASK	0xF0000
+#define AMD_ENERGY_UNIT_MASK	0x01F00
+#define AMD_POWER_UNIT_MASK	0x0000F
+
+#define ENERGY_STATUS_MASK	0xffffffff
+
+#define AMD_FAM_17		0x17 /* ZP, SSP */
+
+/* Useful macros */
+#define AMD_CPU_FAM_ANY(_family, _model)	\
+{						\
+	.vendor		= X86_VENDOR_AMD,	\
+	.family		= _family,		\
+	.model		= _model,		\
+	.feature	= X86_FEATURE_ANY,	\
+}
+
+#define AMD_CPU_FAM(_model)			\
+	AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
+
+static struct device_attribute units_attr;
+
+struct sensor_data {
+	unsigned int scale;
+	union {
+		unsigned int cpu_nr;
+		unsigned int sock_nr;
+	};
+	struct device_attribute dev_attr_input;
+	char input[32];
+};
+
+struct amd_energy_sensors {
+	struct sensor_data *data;
+	struct attribute **attrs;
+	struct attribute_group group;
+	const struct attribute_group *groups[1];
+};
+
+static ssize_t amd_units_u64_show(struct device *dev,
+	struct device_attribute *dev_attr, char *buffer)
+{
+	uint64_t rapl_units;
+	uint64_t energy_unit;
+	int ret = 0;
+
+	ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
+	if (ret)
+		return -EAGAIN;
+
+	energy_unit = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
+
+	return snprintf(buffer, 24, "%llu\n", energy_unit);
+}
+
+static ssize_t amd_core_u64_show(struct device *dev,
+		struct device_attribute *dev_attr, char *buffer)
+{
+	unsigned long long value;
+	struct sensor_data *sensor;
+	int ret = 0;
+
+	sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
+	ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
+	if (ret)
+		return -EAGAIN;
+
+	return snprintf(buffer, 24, "%llu\n", value);
+}
+
+static ssize_t amd_sock_u64_show(struct device *dev,
+		struct device_attribute *dev_attr, char *buffer)
+{
+	unsigned long long value;
+	struct sensor_data *sensor;
+	int ret = 0;
+	int cpu, cpu_nr;
+
+	sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
+
+	for_each_possible_cpu(cpu) {
+		struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+		if (c->initialized && c->logical_die_id == sensor->sock_nr) {
+			cpu_nr = cpu;
+			break;
+		}
+		cpu_nr = 0;
+	}
+
+	ret = rdmsrl_safe_on_cpu(cpu_nr, ENERGY_PCK_MSR, &value);
+	if (ret)
+		return -EAGAIN;
+
+	return snprintf(buffer, 24, "%llu\n", value);
+}
+
+static int amd_energy_probe(struct platform_device *pdev)
+{
+	struct amd_energy_sensors *amd_sensors;
+	struct device *hwdev, *dev = &pdev->dev;
+	int nr_cpus, nr_socks, idx = 0;
+
+	nr_cpus = num_present_cpus();
+	nr_socks = topology_max_packages();
+
+	amd_sensors = devm_kzalloc(dev, sizeof(*amd_sensors), GFP_KERNEL);
+	if (!amd_sensors)
+		return -ENOMEM;
+
+	amd_sensors->data = devm_kcalloc(dev, nr_cpus + nr_socks,
+				sizeof(*amd_sensors->data), GFP_KERNEL);
+	if (!amd_sensors->data)
+		return -ENOMEM;
+
+	amd_sensors->attrs = devm_kcalloc(dev, nr_cpus + nr_socks,
+				sizeof(*amd_sensors->attrs), GFP_KERNEL);
+	if (!amd_sensors->attrs)
+		return -ENOMEM;
+
+	/* populate attributes for number of cpus. */
+	for (idx = 0; idx < ; idx++) {
+		struct sensor_data *sensor = &amd_sensors->data[idx];
+
+		snprintf(sensor->input, sizeof(sensor->input),
+				"core_energy%d_input", idx);
+
+		sensor->dev_attr_input.attr.mode = 0444;
+		sensor->dev_attr_input.attr.name = sensor->input;
+		sensor->dev_attr_input.show = amd_core_u64_show;
+
+		sensor->cpu_nr = idx;
+		amd_sensors->attrs[idx] = &sensor->dev_attr_input.attr;
+
+		sysfs_attr_init(amd_sensors->attrs[idx]);
+	}
+
+	/* populate attributes for number of sockets. */
+	for (idx = 0; idx < nr_socks; idx++) {
+		struct sensor_data *sensor =
+			&amd_sensors->data[nr_cpus + idx];
+
+		snprintf(sensor->input,
+			sizeof(sensor->input), "sock_energy%d_input", idx);
+
+		sensor->dev_attr_input.attr.mode = 0444;
+		sensor->dev_attr_input.attr.name = sensor->input;
+		sensor->dev_attr_input.show = amd_sock_u64_show;
+
+		sensor->sock_nr = idx;
+		amd_sensors->attrs[nr_cpus + idx] =
+			&sensor->dev_attr_input.attr;
+
+		sysfs_attr_init(amd_sensors->attrs[nr_cpus + idx]);
+	}
+
+	amd_sensors->group.attrs = amd_sensors->attrs;
+	amd_sensors->groups[0] = &amd_sensors->group;
+
+	platform_set_drvdata(pdev, amd_sensors);
+
+	hwdev = devm_hwmon_device_register_with_groups(dev,
+			"amd_energy", amd_sensors, amd_sensors->groups);
+	if (!hwdev)
+		return PTR_ERR_OR_ZERO(hwdev);
+
+	/* populate attributes for energy units */
+	units_attr.attr.name = "energy_units";
+	units_attr.attr.mode = 0444;
+	units_attr.show = amd_units_u64_show;
+
+	return sysfs_create_file(&hwdev->kobj, &units_attr.attr);
+}
+
+static int amd_energy_remove(struct platform_device *pdev)
+{
+	sysfs_remove_file(&pdev->dev.kobj, &units_attr.attr);
+	return 0;
+}
+
+static const struct platform_device_id amd_energy_ids[] = {
+	{ .name = DRVNAME, },
+	{}
+};
+MODULE_DEVICE_TABLE(platform, amd_energy_ids);
+
+static struct platform_driver amd_energy_driver = {
+	.probe = amd_energy_probe,
+	.remove	= amd_energy_remove,
+	.id_table = amd_energy_ids,
+	.driver = {
+		.name = DRVNAME,
+	},
+};
+
+static struct platform_device *amd_energy_platdev;
+
+static const struct x86_cpu_id cpu_ids[] __initconst = {
+	AMD_CPU_FAM(17),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
+
+static int __init amd_energy_init(void)
+{
+	const struct x86_cpu_id *id;
+	int ret;
+
+	id = x86_match_cpu(cpu_ids);
+	if (!id) {
+		pr_err("driver does not support CPU family %d model %d\n",
+			boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+		return -ENODEV;
+	}
+
+	ret = platform_driver_register(&amd_energy_driver);
+	if (ret < 0)
+		return ret;
+
+	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
+	if (!amd_energy_platdev)
+		return -ENOMEM;
+
+	ret = platform_device_add(amd_energy_platdev);
+	if (ret) {
+		platform_device_unregister(amd_energy_platdev);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit amd_energy_exit(void)
+{
+	platform_device_unregister(amd_energy_platdev);
+	platform_driver_unregister(&amd_energy_driver);
+}
+
+module_init(amd_energy_init);
+module_exit(amd_energy_exit);
+
+MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
+MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
+MODULE_LICENSE("GPL");
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] hwmon: (amd_energy) Add documentation
  2020-04-09 15:39 [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
@ 2020-04-09 15:39 ` Naveen Krishna Chatradhi
  2020-04-10 15:57 ` [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
  1 sibling, 0 replies; 12+ messages in thread
From: Naveen Krishna Chatradhi @ 2020-04-09 15:39 UTC (permalink / raw)
  To: linux-hwmon; +Cc: naveenkrishna.ch, Naveen Krishna Chatradhi, Guenter Roeck

Document amd_energy driver with all chips supported by it.

Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
---
 Documentation/hwmon/amd_energy.rst | 78 ++++++++++++++++++++++++++++++++++++++
 Documentation/hwmon/index.rst      |  1 +
 2 files changed, 79 insertions(+)
 create mode 100644 Documentation/hwmon/amd_energy.rst

diff --git a/Documentation/hwmon/amd_energy.rst b/Documentation/hwmon/amd_energy.rst
new file mode 100644
index 000000000000..516694d3c1e0
--- /dev/null
+++ b/Documentation/hwmon/amd_energy.rst
@@ -0,0 +1,78 @@
+Kernel driver amd_energy
+==========================
+
+Supported chips:
+
+* AMD Family 17h Processors
+
+  Prefix: 'amd_energy'
+
+  Addresses used:  RAPL MSRs
+
+  Datasheets:
+
+  - Processor Programming Reference (PPR) for
+        AMD Family 17h Model 01h, Revision B1 Processors
+  - Preliminary Processor Programming Reference (PPR) for
+        AMD Family 17h Model 31h, Revision B0 Processors
+
+Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
+
+Description
+-----------
+
+The Energy driver exposes the energy counters that are
+reported via the Running Average Power Limit (RAPL)
+Model-specific Registers (MSRs) via the hardware monitor
+(HWMON) sysfs interface.
+
+1. Power, Energy and Time Units
+  * MSR_RAPL_POWER_UNIT/ C001_0299:
+    -> 32-bit(RO), Configured in BIOS
+    -> shared with all cores in the socket
+
+  * A sysfs entry "energy_units" is created to show the
+    [ESU] energy units in joules
+
+2. Energy consumed by each Core
+  * MSR_CORE_ENERGY_STATUS/ C001_029A:
+    -> 32-bitRO, Accumulator, core-level power reporting
+
+  * A sysfs entry "core_energy%d_input" for each core is
+    created to show the core energy counter value.
+
+3. Energy consumed by Socket
+  * MSR_PACKAGE_ENERGY_STATUS/ C001_029B:
+    -> 32-bitRO, Accumulator, socket-level power reporting,
+    -> shared with all cores in socket
+
+  * A sysfs entry "socket_energy%d_input" for each socket
+    is created to show the socket energy counter value.
+
+Energy Caluclation:
+
+Energy information (in Joules) is based on the multiplier,
+1/2^ESU; where ESU is an unsigned integer read from
+MSR_RAPL_POWER_UNIT register. Default value is 10000b,
+indicating energy status unit is 15.3 micro-Joules increment.
+
+These registers are updated every 1ms and cleared on
+reset of the system.
+
+Users calculate power for a given domain by calculating
+	dEnergy/dTime for that domain.
+
+Sysfs attributes
+----------------
+
+======================= =================================
+
+core_energy[N]_input	Measured input core energy
+			[one sysfs entry for each core]
+
+socket_energy[N]_input	Measured input socket energy
+			[one sysfs entry for each socket]
+
+energy_units		ESU Energy Status Units.
+
+======================= ==================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8ef62fd39787..fc4b89810e67 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -39,6 +39,7 @@ Hardware Monitoring Kernel Drivers
    adt7470
    adt7475
    amc6821
+   amd_energy
    asb100
    asc7621
    aspeed-pwm-tacho
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-09 15:39 [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
  2020-04-09 15:39 ` [PATCH 2/2] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
@ 2020-04-10 15:57 ` Guenter Roeck
  2020-04-11  4:49   ` Naveen Krishna Ch
  2020-04-11 12:26   ` Naveen Krishna Ch
  1 sibling, 2 replies; 12+ messages in thread
From: Guenter Roeck @ 2020-04-10 15:57 UTC (permalink / raw)
  To: Naveen Krishna Chatradhi, linux-hwmon; +Cc: naveenkrishna.ch

On 4/9/20 8:39 AM, Naveen Krishna Chatradhi wrote:
> This patch adds hwmon based amd_energy driver support for
> family 17h processors from AMD.
> 
> The driver provides following interface to the userspace
> 1. Reports the per core and per socket energy consumption
>    via sysfs entries created per core and per socket respectively.
>     * per core energy consumed via "core_energy%d_input"
>     * package/socket energy consumed via "sock_energy%d_input".
> 2. Uses topology_max_packages() to get number of sockets.
> 3. Uses num_present_cpus() to get the number of CPUs.
> 4. Reports the energy units via energy_unit sysfs entry
> 
> Cc: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> ---
>  drivers/hwmon/Kconfig      |  10 ++
>  drivers/hwmon/Makefile     |   1 +
>  drivers/hwmon/amd_energy.c | 273 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 284 insertions(+)
>  create mode 100644 drivers/hwmon/amd_energy.c
> 
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index 05a30832c6ba..d83f1403b429 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
>  	  This driver can also be built as a module. If so, the module
>  	  will be called fam15h_power.
>  
> +config SENSORS_AMD_ENERGY
> +	tristate "AMD RAPL MSR based Energy driver"
> +	depends on X86
> +	help
> +	  If you say yes here you get support for core and package energy
> +	  sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> +
> +	  This driver can also be built as a module. If so, the module
> +	  will be called as amd_energy.
> +
>  config SENSORS_APPLESMC
>  	tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
>  	depends on INPUT && X86
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index b0b9c8e57176..318f89dc7133 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)	+= adt7411.o
>  obj-$(CONFIG_SENSORS_ADT7462)	+= adt7462.o
>  obj-$(CONFIG_SENSORS_ADT7470)	+= adt7470.o
>  obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
> +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
>  obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
>  obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
>  obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> new file mode 100644
> index 000000000000..ddb7073ae39b
> --- /dev/null
> +++ b/drivers/hwmon/amd_energy.c
> @@ -0,0 +1,273 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Copyright (C) 2019 Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/device.h>
> +#include <linux/sysfs.h>
> +#include <linux/cpu.h>
> +#include <linux/list.h>
> +#include <linux/hwmon.h>
> +#include <linux/hwmon-sysfs.h>
> +#include <linux/processor.h>
> +#include <linux/platform_device.h>
> +#include <linux/cpumask.h>

Alphabetic include file order, please.

> +
> +#include <asm/cpu_device_id.h>
> +
> +#define DRVNAME	"amd_energy"
> +
> +#define ENERGY_PWR_UNIT_MSR	0xC0010299
> +#define ENERGY_CORE_MSR	0xC001029A
> +#define ENERGY_PCK_MSR		0xC001029B
> +
> +#define AMD_TIME_UNIT_MASK	0xF0000
> +#define AMD_ENERGY_UNIT_MASK	0x01F00
> +#define AMD_POWER_UNIT_MASK	0x0000F
> +
> +#define ENERGY_STATUS_MASK	0xffffffff
> +
> +#define AMD_FAM_17		0x17 /* ZP, SSP */
> +
> +/* Useful macros */
> +#define AMD_CPU_FAM_ANY(_family, _model)	\
> +{						\
> +	.vendor		= X86_VENDOR_AMD,	\
> +	.family		= _family,		\
> +	.model		= _model,		\
> +	.feature	= X86_FEATURE_ANY,	\
> +}
> +
> +#define AMD_CPU_FAM(_model)			\
> +	AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
> +
> +static struct device_attribute units_attr;
> +
> +struct sensor_data {
> +	unsigned int scale;
> +	union {
> +		unsigned int cpu_nr;
> +		unsigned int sock_nr;
> +	};
> +	struct device_attribute dev_attr_input;
> +	char input[32];
> +};
> +
> +struct amd_energy_sensors {
> +	struct sensor_data *data;
> +	struct attribute **attrs;
> +	struct attribute_group group;
> +	const struct attribute_group *groups[1];

Even if acceptable, this would be wrong. The list of groups
ends with an empty entry, meaning this array would have to have
at least two entries (one for the terminator).

> +};
> +
> +static ssize_t amd_units_u64_show(struct device *dev,
> +	struct device_attribute *dev_attr, char *buffer)
> +{
> +	uint64_t rapl_units;
> +	uint64_t energy_unit;
> +	int ret = 0;
> +
> +	ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> +	if (ret)
> +		return -EAGAIN;
> +
> +	energy_unit = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> +
> +	return snprintf(buffer, 24, "%llu\n", energy_unit);
> +}
> +
> +static ssize_t amd_core_u64_show(struct device *dev,
> +		struct device_attribute *dev_attr, char *buffer)
> +{
> +	unsigned long long value;
> +	struct sensor_data *sensor;
> +	int ret = 0;
> +
> +	sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> +	ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
> +	if (ret)
> +		return -EAGAIN;
> +
> +	return snprintf(buffer, 24, "%llu\n", value);

It seems to me this reports raw values. This is unacceptable. Values need
to be scaled to match the ABI. Energy is reported in microJoule.

> +}
> +
> +static ssize_t amd_sock_u64_show(struct device *dev,
> +		struct device_attribute *dev_attr, char *buffer)
> +{
> +	unsigned long long value;
> +	struct sensor_data *sensor;
> +	int ret = 0;
> +	int cpu, cpu_nr;
> +
> +	sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> +
> +	for_each_possible_cpu(cpu) {
> +		struct cpuinfo_x86 *c = &cpu_data(cpu);
> +
> +		if (c->initialized && c->logical_die_id == sensor->sock_nr) {
> +			cpu_nr = cpu;
> +			break;
> +		}
> +		cpu_nr = 0;
> +	}
> +
> +	ret = rdmsrl_safe_on_cpu(cpu_nr, ENERGY_PCK_MSR, &value);
> +	if (ret)
> +		return -EAGAIN;
> +
> +	return snprintf(buffer, 24, "%llu\n", value);
> +}
> +
> +static int amd_energy_probe(struct platform_device *pdev)
> +{
> +	struct amd_energy_sensors *amd_sensors;
> +	struct device *hwdev, *dev = &pdev->dev;
> +	int nr_cpus, nr_socks, idx = 0;
> +
> +	nr_cpus = num_present_cpus();
> +	nr_socks = topology_max_packages();
> +
> +	amd_sensors = devm_kzalloc(dev, sizeof(*amd_sensors), GFP_KERNEL);
> +	if (!amd_sensors)
> +		return -ENOMEM;
> +
> +	amd_sensors->data = devm_kcalloc(dev, nr_cpus + nr_socks,
> +				sizeof(*amd_sensors->data), GFP_KERNEL);
> +	if (!amd_sensors->data)
> +		return -ENOMEM;
> +
> +	amd_sensors->attrs = devm_kcalloc(dev, nr_cpus + nr_socks,
> +				sizeof(*amd_sensors->attrs), GFP_KERNEL);
> +	if (!amd_sensors->attrs)
> +		return -ENOMEM;
> +
> +	/* populate attributes for number of cpus. */
> +	for (idx = 0; idx < ; idx++) {
> +		struct sensor_data *sensor = &amd_sensors->data[idx];
> +
> +		snprintf(sensor->input, sizeof(sensor->input),
> +				"core_energy%d_input", idx);
> +

This is unacceptable. Please use standard attributes (energyX_input).
If you want to report what the sensor is for, use labels.

> +		sensor->dev_attr_input.attr.mode = 0444;
> +		sensor->dev_attr_input.attr.name = sensor->input;
> +		sensor->dev_attr_input.show = amd_core_u64_show;
> +
> +		sensor->cpu_nr = idx;
> +		amd_sensors->attrs[idx] = &sensor->dev_attr_input.attr;
> +
> +		sysfs_attr_init(amd_sensors->attrs[idx]);
> +	}
> +
> +	/* populate attributes for number of sockets. */
> +	for (idx = 0; idx < nr_socks; idx++) {
> +		struct sensor_data *sensor =
> +			&amd_sensors->data[nr_cpus + idx];
> +
> +		snprintf(sensor->input,
> +			sizeof(sensor->input), "sock_energy%d_input", idx);
> +
> +		sensor->dev_attr_input.attr.mode = 0444;
> +		sensor->dev_attr_input.attr.name = sensor->input;
> +		sensor->dev_attr_input.show = amd_sock_u64_show;
> +
> +		sensor->sock_nr = idx;
> +		amd_sensors->attrs[nr_cpus + idx] =
> +			&sensor->dev_attr_input.attr;
> +
> +		sysfs_attr_init(amd_sensors->attrs[nr_cpus + idx]);
> +	}

This all makes me wonder what is reported for inactive/disabled CPUs.

> +
> +	amd_sensors->group.attrs = amd_sensors->attrs;
> +	amd_sensors->groups[0] = &amd_sensors->group;
> +
> +	platform_set_drvdata(pdev, amd_sensors);
> +
> +	hwdev = devm_hwmon_device_register_with_groups(dev,
> +			"amd_energy", amd_sensors, amd_sensors->groups);

Please rework the driver to use the devm_hwmon_device_register_with_info() API.

> +	if (!hwdev)
> +		return PTR_ERR_OR_ZERO(hwdev);
> +
> +	/* populate attributes for energy units */
> +	units_attr.attr.name = "energy_units";
> +	units_attr.attr.mode = 0444;
> +	units_attr.show = amd_units_u64_show;
> +
> +	return sysfs_create_file(&hwdev->kobj, &units_attr.attr);

This is irrelevant for this driver. Units are fixed. Besides, registering
sysfs attributes after driver registration is racy and won't be accepted.
Non-standard attributes can be added with the groups argument of
devm_hwmon_device_register_with_info() (not that I see any here).

> +}
> +
> +static int amd_energy_remove(struct platform_device *pdev)
> +{
> +	sysfs_remove_file(&pdev->dev.kobj, &units_attr.attr);
> +	return 0;
> +}
> +
> +static const struct platform_device_id amd_energy_ids[] = {
> +	{ .name = DRVNAME, },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> +
> +static struct platform_driver amd_energy_driver = {
> +	.probe = amd_energy_probe,
> +	.remove	= amd_energy_remove,
> +	.id_table = amd_energy_ids,
> +	.driver = {
> +		.name = DRVNAME,
> +	},
> +};
> +
> +static struct platform_device *amd_energy_platdev;
> +
> +static const struct x86_cpu_id cpu_ids[] __initconst = {
> +	AMD_CPU_FAM(17),
> +	{}
> +};
> +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> +
> +static int __init amd_energy_init(void)
> +{
> +	const struct x86_cpu_id *id;
> +	int ret;
> +
> +	id = x86_match_cpu(cpu_ids);
> +	if (!id) {
> +		pr_err("driver does not support CPU family %d model %d\n",
> +			boot_cpu_data.x86, boot_cpu_data.x86_model);
> +

This is not an error and thus does not warrant an error message.

> +		return -ENODEV;
> +	}
> +
> +	ret = platform_driver_register(&amd_energy_driver);
> +	if (ret < 0)
> +		return ret;
> +
> +	amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> +	if (!amd_energy_platdev)
> +		return -ENOMEM;
> +
> +	ret = platform_device_add(amd_energy_platdev);
> +	if (ret) {
> +		platform_device_unregister(amd_energy_platdev);
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +static void __exit amd_energy_exit(void)
> +{
> +	platform_device_unregister(amd_energy_platdev);
> +	platform_driver_unregister(&amd_energy_driver);
> +}
> +
> +module_init(amd_energy_init);
> +module_exit(amd_energy_exit);
> +
> +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> +MODULE_LICENSE("GPL");
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-10 15:57 ` [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
@ 2020-04-11  4:49   ` Naveen Krishna Ch
  2020-04-11  5:16     ` Guenter Roeck
  2020-04-11 12:26   ` Naveen Krishna Ch
  1 sibling, 1 reply; 12+ messages in thread
From: Naveen Krishna Ch @ 2020-04-11  4:49 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi,

Thank you for taking time to review and provide feedback.
If you can kindly address a few questions, i would start addressing
your comments and submit another version.

On Fri, 10 Apr 2020 at 21:27, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 4/9/20 8:39 AM, Naveen Krishna Chatradhi wrote:
> > This patch adds hwmon based amd_energy driver support for
> > family 17h processors from AMD.
> >
> > The driver provides following interface to the userspace
> > 1. Reports the per core and per socket energy consumption
> >    via sysfs entries created per core and per socket respectively.
> >     * per core energy consumed via "core_energy%d_input"
> >     * package/socket energy consumed via "sock_energy%d_input".
> > 2. Uses topology_max_packages() to get number of sockets.
> > 3. Uses num_present_cpus() to get the number of CPUs.
> > 4. Reports the energy units via energy_unit sysfs entry
> >
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > ---
> >  drivers/hwmon/Kconfig      |  10 ++
> >  drivers/hwmon/Makefile     |   1 +
> >  drivers/hwmon/amd_energy.c | 273 +++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 284 insertions(+)
> >  create mode 100644 drivers/hwmon/amd_energy.c
> >
> > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> > index 05a30832c6ba..d83f1403b429 100644
> > --- a/drivers/hwmon/Kconfig
> > +++ b/drivers/hwmon/Kconfig
> > @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
> >         This driver can also be built as a module. If so, the module
> >         will be called fam15h_power.
> >
> > +config SENSORS_AMD_ENERGY
> > +     tristate "AMD RAPL MSR based Energy driver"
> > +     depends on X86
> > +     help
> > +       If you say yes here you get support for core and package energy
> > +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> > +
> > +       This driver can also be built as a module. If so, the module
> > +       will be called as amd_energy.
> > +
> >  config SENSORS_APPLESMC
> >       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
> >       depends on INPUT && X86
> > diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> > index b0b9c8e57176..318f89dc7133 100644
> > --- a/drivers/hwmon/Makefile
> > +++ b/drivers/hwmon/Makefile
> > @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
> >  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
> >  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
> >  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
> > +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
> >  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
> >  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
> >  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
> > diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> > new file mode 100644
> > index 000000000000..ddb7073ae39b
> > --- /dev/null
> > +++ b/drivers/hwmon/amd_energy.c
> > @@ -0,0 +1,273 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +/*
> > + * Copyright (C) 2019 Advanced Micro Devices, Inc.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +#include <linux/device.h>
> > +#include <linux/sysfs.h>
> > +#include <linux/cpu.h>
> > +#include <linux/list.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/hwmon-sysfs.h>
> > +#include <linux/processor.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/cpumask.h>
>
> Alphabetic include file order, please.
I missed it, will correct it.
>
> > +
> > +#include <asm/cpu_device_id.h>
> > +
> > +#define DRVNAME      "amd_energy"
> > +
> > +#define ENERGY_PWR_UNIT_MSR  0xC0010299
> > +#define ENERGY_CORE_MSR      0xC001029A
> > +#define ENERGY_PCK_MSR               0xC001029B
> > +
> > +#define AMD_TIME_UNIT_MASK   0xF0000
> > +#define AMD_ENERGY_UNIT_MASK 0x01F00
> > +#define AMD_POWER_UNIT_MASK  0x0000F
> > +
> > +#define ENERGY_STATUS_MASK   0xffffffff
> > +
> > +#define AMD_FAM_17           0x17 /* ZP, SSP */
> > +
> > +/* Useful macros */
> > +#define AMD_CPU_FAM_ANY(_family, _model)     \
> > +{                                            \
> > +     .vendor         = X86_VENDOR_AMD,       \
> > +     .family         = _family,              \
> > +     .model          = _model,               \
> > +     .feature        = X86_FEATURE_ANY,      \
> > +}
> > +
> > +#define AMD_CPU_FAM(_model)                  \
> > +     AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
> > +
> > +static struct device_attribute units_attr;
> > +
> > +struct sensor_data {
> > +     unsigned int scale;
> > +     union {
> > +             unsigned int cpu_nr;
> > +             unsigned int sock_nr;
> > +     };
> > +     struct device_attribute dev_attr_input;
> > +     char input[32];
> > +};
> > +
> > +struct amd_energy_sensors {
> > +     struct sensor_data *data;
> > +     struct attribute **attrs;
> > +     struct attribute_group group;
> > +     const struct attribute_group *groups[1];
>
> Even if acceptable, this would be wrong. The list of groups
> ends with an empty entry, meaning this array would have to have
> at least two entries (one for the terminator).
Will correct it.
>
> > +};
> > +
> > +static ssize_t amd_units_u64_show(struct device *dev,
> > +     struct device_attribute *dev_attr, char *buffer)
> > +{
> > +     uint64_t rapl_units;
> > +     uint64_t energy_unit;
> > +     int ret = 0;
> > +
> > +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     energy_unit = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> > +
> > +     return snprintf(buffer, 24, "%llu\n", energy_unit);
> > +}
> > +
> > +static ssize_t amd_core_u64_show(struct device *dev,
> > +             struct device_attribute *dev_attr, char *buffer)
> > +{
> > +     unsigned long long value;
> > +     struct sensor_data *sensor;
> > +     int ret = 0;
> > +
> > +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> > +     ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     return snprintf(buffer, 24, "%llu\n", value);
>
> It seems to me this reports raw values. This is unacceptable. Values need
> to be scaled to match the ABI. Energy is reported in microJoule.
I was of the opinion that driver provides the raw values and user-space (can use
float/double)  will be able to scale the value, which would involve
calculation of
"1/2^ESU * RAW value"

>
> > +}
> > +
> > +static ssize_t amd_sock_u64_show(struct device *dev,
> > +             struct device_attribute *dev_attr, char *buffer)
> > +{
> > +     unsigned long long value;
> > +     struct sensor_data *sensor;
> > +     int ret = 0;
> > +     int cpu, cpu_nr;
> > +
> > +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> > +
> > +     for_each_possible_cpu(cpu) {
> > +             struct cpuinfo_x86 *c = &cpu_data(cpu);
> > +
> > +             if (c->initialized && c->logical_die_id == sensor->sock_nr) {
> > +                     cpu_nr = cpu;
> > +                     break;
> > +             }
> > +             cpu_nr = 0;
> > +     }
> > +
> > +     ret = rdmsrl_safe_on_cpu(cpu_nr, ENERGY_PCK_MSR, &value);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     return snprintf(buffer, 24, "%llu\n", value);
> > +}
> > +
> > +static int amd_energy_probe(struct platform_device *pdev)
> > +{
> > +     struct amd_energy_sensors *amd_sensors;
> > +     struct device *hwdev, *dev = &pdev->dev;
> > +     int nr_cpus, nr_socks, idx = 0;
> > +
> > +     nr_cpus = num_present_cpus();
> > +     nr_socks = topology_max_packages();
> > +
> > +     amd_sensors = devm_kzalloc(dev, sizeof(*amd_sensors), GFP_KERNEL);
> > +     if (!amd_sensors)
> > +             return -ENOMEM;
> > +
> > +     amd_sensors->data = devm_kcalloc(dev, nr_cpus + nr_socks,
> > +                             sizeof(*amd_sensors->data), GFP_KERNEL);
> > +     if (!amd_sensors->data)
> > +             return -ENOMEM;
> > +
> > +     amd_sensors->attrs = devm_kcalloc(dev, nr_cpus + nr_socks,
> > +                             sizeof(*amd_sensors->attrs), GFP_KERNEL);
> > +     if (!amd_sensors->attrs)
> > +             return -ENOMEM;
> > +
> > +     /* populate attributes for number of cpus. */
> > +     for (idx = 0; idx < ; idx++) {
> > +             struct sensor_data *sensor = &amd_sensors->data[idx];
> > +
> > +             snprintf(sensor->input, sizeof(sensor->input),
> > +                             "core_energy%d_input", idx);
> > +
>
> This is unacceptable. Please use standard attributes (energyX_input).
> If you want to report what the sensor is for, use labels.
Got it.
>
> > +             sensor->dev_attr_input.attr.mode = 0444;
> > +             sensor->dev_attr_input.attr.name = sensor->input;
> > +             sensor->dev_attr_input.show = amd_core_u64_show;
> > +
> > +             sensor->cpu_nr = idx;
> > +             amd_sensors->attrs[idx] = &sensor->dev_attr_input.attr;
> > +
> > +             sysfs_attr_init(amd_sensors->attrs[idx]);
> > +     }
> > +
> > +     /* populate attributes for number of sockets. */
> > +     for (idx = 0; idx < nr_socks; idx++) {
> > +             struct sensor_data *sensor =
> > +                     &amd_sensors->data[nr_cpus + idx];
> > +
> > +             snprintf(sensor->input,
> > +                     sizeof(sensor->input), "sock_energy%d_input", idx);
> > +
> > +             sensor->dev_attr_input.attr.mode = 0444;
> > +             sensor->dev_attr_input.attr.name = sensor->input;
> > +             sensor->dev_attr_input.show = amd_sock_u64_show;
> > +
> > +             sensor->sock_nr = idx;
> > +             amd_sensors->attrs[nr_cpus + idx] =
> > +                     &sensor->dev_attr_input.attr;
> > +
> > +             sysfs_attr_init(amd_sensors->attrs[nr_cpus + idx]);
> > +     }
>
> This all makes me wonder what is reported for inactive/disabled CPUs.
>
> > +
> > +     amd_sensors->group.attrs = amd_sensors->attrs;
> > +     amd_sensors->groups[0] = &amd_sensors->group;
> > +
> > +     platform_set_drvdata(pdev, amd_sensors);
> > +
> > +     hwdev = devm_hwmon_device_register_with_groups(dev,
> > +                     "amd_energy", amd_sensors, amd_sensors->groups);
>
> Please rework the driver to use the devm_hwmon_device_register_with_info() API.
>
> > +     if (!hwdev)
> > +             return PTR_ERR_OR_ZERO(hwdev);
> > +
> > +     /* populate attributes for energy units */
> > +     units_attr.attr.name = "energy_units";
> > +     units_attr.attr.mode = 0444;
> > +     units_attr.show = amd_units_u64_show;
> > +
> > +     return sysfs_create_file(&hwdev->kobj, &units_attr.attr);
>
> This is irrelevant for this driver. Units are fixed. Besides, registering
> sysfs attributes after driver registration is racy and won't be accepted.
> Non-standard attributes can be added with the groups argument of
> devm_hwmon_device_register_with_info() (not that I see any here).
I will try to change the driver around using
devm_hwmon_device_register_with_info()

>
> > +}
> > +
> > +static int amd_energy_remove(struct platform_device *pdev)
> > +{
> > +     sysfs_remove_file(&pdev->dev.kobj, &units_attr.attr);
> > +     return 0;
> > +}
> > +
> > +static const struct platform_device_id amd_energy_ids[] = {
> > +     { .name = DRVNAME, },
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> > +
> > +static struct platform_driver amd_energy_driver = {
> > +     .probe = amd_energy_probe,
> > +     .remove = amd_energy_remove,
> > +     .id_table = amd_energy_ids,
> > +     .driver = {
> > +             .name = DRVNAME,
> > +     },
> > +};
> > +
> > +static struct platform_device *amd_energy_platdev;
> > +
> > +static const struct x86_cpu_id cpu_ids[] __initconst = {
> > +     AMD_CPU_FAM(17),
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> > +
> > +static int __init amd_energy_init(void)
> > +{
> > +     const struct x86_cpu_id *id;
> > +     int ret;
> > +
> > +     id = x86_match_cpu(cpu_ids);
> > +     if (!id) {
> > +             pr_err("driver does not support CPU family %d model %d\n",
> > +                     boot_cpu_data.x86, boot_cpu_data.x86_model);
> > +
>
> This is not an error and thus does not warrant an error message.
Ok
>
> > +             return -ENODEV;
> > +     }
> > +
> > +     ret = platform_driver_register(&amd_energy_driver);
> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> > +     if (!amd_energy_platdev)
> > +             return -ENOMEM;
> > +
> > +     ret = platform_device_add(amd_energy_platdev);
> > +     if (ret) {
> > +             platform_device_unregister(amd_energy_platdev);
> > +             return ret;
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static void __exit amd_energy_exit(void)
> > +{
> > +     platform_device_unregister(amd_energy_platdev);
> > +     platform_driver_unregister(&amd_energy_driver);
> > +}
> > +
> > +module_init(amd_energy_init);
> > +module_exit(amd_energy_exit);
> > +
> > +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> > +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> > +MODULE_LICENSE("GPL");
> >
>


-- 
Shine bright,
Naveen

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-11  4:49   ` Naveen Krishna Ch
@ 2020-04-11  5:16     ` Guenter Roeck
  2020-04-11  5:43       ` Naveen Krishna Ch
  0 siblings, 1 reply; 12+ messages in thread
From: Guenter Roeck @ 2020-04-11  5:16 UTC (permalink / raw)
  To: Naveen Krishna Ch; +Cc: Naveen Krishna Chatradhi, linux-hwmon

On 4/10/20 9:49 PM, Naveen Krishna Ch wrote:
> Hi,
> 
> Thank you for taking time to review and provide feedback.
> If you can kindly address a few questions, i would start addressing
> your comments and submit another version.
> 

[ ... ]

>>> +
>>> +static ssize_t amd_core_u64_show(struct device *dev,
>>> +             struct device_attribute *dev_attr, char *buffer)
>>> +{
>>> +     unsigned long long value;
>>> +     struct sensor_data *sensor;
>>> +     int ret = 0;
>>> +
>>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
>>> +     ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
>>> +     if (ret)
>>> +             return -EAGAIN;
>>> +
>>> +     return snprintf(buffer, 24, "%llu\n", value);
>>
>> It seems to me this reports raw values. This is unacceptable. Values need
>> to be scaled to match the ABI. Energy is reported in microJoule.
> I was of the opinion that driver provides the raw values and user-space (can use
> float/double)  will be able to scale the value, which would involve
> calculation of
> "1/2^ESU * RAW value"
> 

Please see Documentation/hwmon/sysfs-interface.rst. Sure, there is
/etc/sensors3.conf, but that only applies if the kernel driver can not
provide scaled values. This is not the case here.

FWIW, I implemented prototype code for this some time ago as
extension of the k10temp driver, and the arithmetic wasn't that
difficult.

Guenter

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-11  5:16     ` Guenter Roeck
@ 2020-04-11  5:43       ` Naveen Krishna Ch
  0 siblings, 0 replies; 12+ messages in thread
From: Naveen Krishna Ch @ 2020-04-11  5:43 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter,

On Sat, 11 Apr 2020 at 10:46, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 4/10/20 9:49 PM, Naveen Krishna Ch wrote:
> > Hi,
> >
> > Thank you for taking time to review and provide feedback.
> > If you can kindly address a few questions, i would start addressing
> > your comments and submit another version.
> >
>
> [ ... ]
>
> >>> +
> >>> +static ssize_t amd_core_u64_show(struct device *dev,
> >>> +             struct device_attribute *dev_attr, char *buffer)
> >>> +{
> >>> +     unsigned long long value;
> >>> +     struct sensor_data *sensor;
> >>> +     int ret = 0;
> >>> +
> >>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> >>> +     ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
> >>> +     if (ret)
> >>> +             return -EAGAIN;
> >>> +
> >>> +     return snprintf(buffer, 24, "%llu\n", value);
> >>
> >> It seems to me this reports raw values. This is unacceptable. Values need
> >> to be scaled to match the ABI. Energy is reported in microJoule.
> > I was of the opinion that driver provides the raw values and user-space (can use
> > float/double)  will be able to scale the value, which would involve
> > calculation of
> > "1/2^ESU * RAW value"
> >
>
> Please see Documentation/hwmon/sysfs-interface.rst. Sure, there is
> /etc/sensors3.conf, but that only applies if the kernel driver can not
> provide scaled values. This is not the case here.
I did not refer this page.
>
> FWIW, I implemented prototype code for this some time ago as
> extension of the k10temp driver, and the arithmetic wasn't that
> difficult.
Will take clues and implement.

>
> Guenter
Thank you.


-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-10 15:57 ` [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
  2020-04-11  4:49   ` Naveen Krishna Ch
@ 2020-04-11 12:26   ` Naveen Krishna Ch
  2020-04-11 15:48     ` Guenter Roeck
  1 sibling, 1 reply; 12+ messages in thread
From: Naveen Krishna Ch @ 2020-04-11 12:26 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter

I would be glad, If you could help me with the following query.
On Fri, 10 Apr 2020 at 21:27, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 4/9/20 8:39 AM, Naveen Krishna Chatradhi wrote:
> > This patch adds hwmon based amd_energy driver support for
> > family 17h processors from AMD.
> >
> > The driver provides following interface to the userspace
> > 1. Reports the per core and per socket energy consumption
> >    via sysfs entries created per core and per socket respectively.
> >     * per core energy consumed via "core_energy%d_input"
> >     * package/socket energy consumed via "sock_energy%d_input".
> > 2. Uses topology_max_packages() to get number of sockets.
> > 3. Uses num_present_cpus() to get the number of CPUs.
> > 4. Reports the energy units via energy_unit sysfs entry
> >
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > ---
> >  drivers/hwmon/Kconfig      |  10 ++
> >  drivers/hwmon/Makefile     |   1 +
> >  drivers/hwmon/amd_energy.c | 273 +++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 284 insertions(+)
> >  create mode 100644 drivers/hwmon/amd_energy.c
> >
> > diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> > index 05a30832c6ba..d83f1403b429 100644
> > --- a/drivers/hwmon/Kconfig
> > +++ b/drivers/hwmon/Kconfig
> > @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
> >         This driver can also be built as a module. If so, the module
> >         will be called fam15h_power.
> >
> > +config SENSORS_AMD_ENERGY
> > +     tristate "AMD RAPL MSR based Energy driver"
> > +     depends on X86
> > +     help
> > +       If you say yes here you get support for core and package energy
> > +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> > +
> > +       This driver can also be built as a module. If so, the module
> > +       will be called as amd_energy.
> > +
> >  config SENSORS_APPLESMC
> >       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
> >       depends on INPUT && X86
> > diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> > index b0b9c8e57176..318f89dc7133 100644
> > --- a/drivers/hwmon/Makefile
> > +++ b/drivers/hwmon/Makefile
> > @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
> >  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
> >  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
> >  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
> > +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
> >  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
> >  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
> >  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
> > diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> > new file mode 100644
> > index 000000000000..ddb7073ae39b
> > --- /dev/null
> > +++ b/drivers/hwmon/amd_energy.c
> > @@ -0,0 +1,273 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +/*
> > + * Copyright (C) 2019 Advanced Micro Devices, Inc.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +#include <linux/device.h>
> > +#include <linux/sysfs.h>
> > +#include <linux/cpu.h>
> > +#include <linux/list.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/hwmon-sysfs.h>
> > +#include <linux/processor.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/cpumask.h>
>
> Alphabetic include file order, please.
>
> > +
> > +#include <asm/cpu_device_id.h>
> > +
> > +#define DRVNAME      "amd_energy"
> > +
> > +#define ENERGY_PWR_UNIT_MSR  0xC0010299
> > +#define ENERGY_CORE_MSR      0xC001029A
> > +#define ENERGY_PCK_MSR               0xC001029B
> > +
> > +#define AMD_TIME_UNIT_MASK   0xF0000
> > +#define AMD_ENERGY_UNIT_MASK 0x01F00
> > +#define AMD_POWER_UNIT_MASK  0x0000F
> > +
> > +#define ENERGY_STATUS_MASK   0xffffffff
> > +
> > +#define AMD_FAM_17           0x17 /* ZP, SSP */
> > +
> > +/* Useful macros */
> > +#define AMD_CPU_FAM_ANY(_family, _model)     \
> > +{                                            \
> > +     .vendor         = X86_VENDOR_AMD,       \
> > +     .family         = _family,              \
> > +     .model          = _model,               \
> > +     .feature        = X86_FEATURE_ANY,      \
> > +}
> > +
> > +#define AMD_CPU_FAM(_model)                  \
> > +     AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
> > +
> > +static struct device_attribute units_attr;
> > +
> > +struct sensor_data {
> > +     unsigned int scale;
> > +     union {
> > +             unsigned int cpu_nr;
> > +             unsigned int sock_nr;
> > +     };
> > +     struct device_attribute dev_attr_input;
> > +     char input[32];
> > +};
> > +
> > +struct amd_energy_sensors {
> > +     struct sensor_data *data;
> > +     struct attribute **attrs;
> > +     struct attribute_group group;
> > +     const struct attribute_group *groups[1];
>
> Even if acceptable, this would be wrong. The list of groups
> ends with an empty entry, meaning this array would have to have
> at least two entries (one for the terminator).
>
> > +};
> > +
> > +static ssize_t amd_units_u64_show(struct device *dev,
> > +     struct device_attribute *dev_attr, char *buffer)
> > +{
> > +     uint64_t rapl_units;
> > +     uint64_t energy_unit;
> > +     int ret = 0;
> > +
> > +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     energy_unit = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> > +
> > +     return snprintf(buffer, 24, "%llu\n", energy_unit);
> > +}
> > +
> > +static ssize_t amd_core_u64_show(struct device *dev,
> > +             struct device_attribute *dev_attr, char *buffer)
> > +{
> > +     unsigned long long value;
> > +     struct sensor_data *sensor;
> > +     int ret = 0;
> > +
> > +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> > +     ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     return snprintf(buffer, 24, "%llu\n", value);
>
> It seems to me this reports raw values. This is unacceptable. Values need
> to be scaled to match the ABI. Energy is reported in microJoule.
>
> > +}
> > +
> > +static ssize_t amd_sock_u64_show(struct device *dev,
> > +             struct device_attribute *dev_attr, char *buffer)
> > +{
> > +     unsigned long long value;
> > +     struct sensor_data *sensor;
> > +     int ret = 0;
> > +     int cpu, cpu_nr;
> > +
> > +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> > +
> > +     for_each_possible_cpu(cpu) {
> > +             struct cpuinfo_x86 *c = &cpu_data(cpu);
> > +
> > +             if (c->initialized && c->logical_die_id == sensor->sock_nr) {
> > +                     cpu_nr = cpu;
> > +                     break;
> > +             }
> > +             cpu_nr = 0;
> > +     }
> > +
> > +     ret = rdmsrl_safe_on_cpu(cpu_nr, ENERGY_PCK_MSR, &value);
> > +     if (ret)
> > +             return -EAGAIN;
> > +
> > +     return snprintf(buffer, 24, "%llu\n", value);
> > +}
> > +
> > +static int amd_energy_probe(struct platform_device *pdev)
> > +{
> > +     struct amd_energy_sensors *amd_sensors;
> > +     struct device *hwdev, *dev = &pdev->dev;
> > +     int nr_cpus, nr_socks, idx = 0;
> > +
> > +     nr_cpus = num_present_cpus();
> > +     nr_socks = topology_max_packages();
> > +
> > +     amd_sensors = devm_kzalloc(dev, sizeof(*amd_sensors), GFP_KERNEL);
> > +     if (!amd_sensors)
> > +             return -ENOMEM;
> > +
> > +     amd_sensors->data = devm_kcalloc(dev, nr_cpus + nr_socks,
> > +                             sizeof(*amd_sensors->data), GFP_KERNEL);
> > +     if (!amd_sensors->data)
> > +             return -ENOMEM;
> > +
> > +     amd_sensors->attrs = devm_kcalloc(dev, nr_cpus + nr_socks,
> > +                             sizeof(*amd_sensors->attrs), GFP_KERNEL);
> > +     if (!amd_sensors->attrs)
> > +             return -ENOMEM;
> > +
> > +     /* populate attributes for number of cpus. */
> > +     for (idx = 0; idx < ; idx++) {
> > +             struct sensor_data *sensor = &amd_sensors->data[idx];
> > +
> > +             snprintf(sensor->input, sizeof(sensor->input),
> > +                             "core_energy%d_input", idx);
> > +
>
> This is unacceptable. Please use standard attributes (energyX_input).
> If you want to report what the sensor is for, use labels.
>
> > +             sensor->dev_attr_input.attr.mode = 0444;
> > +             sensor->dev_attr_input.attr.name = sensor->input;
> > +             sensor->dev_attr_input.show = amd_core_u64_show;
> > +
> > +             sensor->cpu_nr = idx;
> > +             amd_sensors->attrs[idx] = &sensor->dev_attr_input.attr;
> > +
> > +             sysfs_attr_init(amd_sensors->attrs[idx]);
> > +     }
> > +
> > +     /* populate attributes for number of sockets. */
> > +     for (idx = 0; idx < nr_socks; idx++) {
> > +             struct sensor_data *sensor =
> > +                     &amd_sensors->data[nr_cpus + idx];
> > +
> > +             snprintf(sensor->input,
> > +                     sizeof(sensor->input), "sock_energy%d_input", idx);
> > +
> > +             sensor->dev_attr_input.attr.mode = 0444;
> > +             sensor->dev_attr_input.attr.name = sensor->input;
> > +             sensor->dev_attr_input.show = amd_sock_u64_show;
> > +
> > +             sensor->sock_nr = idx;
> > +             amd_sensors->attrs[nr_cpus + idx] =
> > +                     &sensor->dev_attr_input.attr;
> > +
> > +             sysfs_attr_init(amd_sensors->attrs[nr_cpus + idx]);
> > +     }
>
> This all makes me wonder what is reported for inactive/disabled CPUs.
>
> > +
> > +     amd_sensors->group.attrs = amd_sensors->attrs;
> > +     amd_sensors->groups[0] = &amd_sensors->group;
> > +
> > +     platform_set_drvdata(pdev, amd_sensors);
> > +
> > +     hwdev = devm_hwmon_device_register_with_groups(dev,
> > +                     "amd_energy", amd_sensors, amd_sensors->groups);
>
> Please rework the driver to use the devm_hwmon_device_register_with_info() API.
Our current, platform has 64 cores per socket and 2 such sockets on a
board. There is a core energy counter register for each core and
Package counter for each socket.  The topology varies from platform to
platform.


To keep this driver reusable for all platforms. I think, we need to
define the hwmon_chip_info and channel_info structures dynamically
(hwmon_chip_info has const variables). Is there a way to define a
pre-processor statement which can create an array for a given
platform.

Could you suggest me a way to defining these hwmon_chip_info and
channel array dynamically ?  This reason a reason for me to use groups
instead of the chip_info.
>
> > +     if (!hwdev)
> > +             return PTR_ERR_OR_ZERO(hwdev);
> > +
> > +     /* populate attributes for energy units */
> > +     units_attr.attr.name = "energy_units";
> > +     units_attr.attr.mode = 0444;
> > +     units_attr.show = amd_units_u64_show;
> > +
> > +     return sysfs_create_file(&hwdev->kobj, &units_attr.attr);
>
> This is irrelevant for this driver. Units are fixed. Besides, registering
> sysfs attributes after driver registration is racy and won't be accepted.
> Non-standard attributes can be added with the groups argument of
> devm_hwmon_device_register_with_info() (not that I see any here).
>
> > +}
> > +
> > +static int amd_energy_remove(struct platform_device *pdev)
> > +{
> > +     sysfs_remove_file(&pdev->dev.kobj, &units_attr.attr);
> > +     return 0;
> > +}
> > +
> > +static const struct platform_device_id amd_energy_ids[] = {
> > +     { .name = DRVNAME, },
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(platform, amd_energy_ids);
> > +
> > +static struct platform_driver amd_energy_driver = {
> > +     .probe = amd_energy_probe,
> > +     .remove = amd_energy_remove,
> > +     .id_table = amd_energy_ids,
> > +     .driver = {
> > +             .name = DRVNAME,
> > +     },
> > +};
> > +
> > +static struct platform_device *amd_energy_platdev;
> > +
> > +static const struct x86_cpu_id cpu_ids[] __initconst = {
> > +     AMD_CPU_FAM(17),
> > +     {}
> > +};
> > +MODULE_DEVICE_TABLE(x86cpu, cpu_ids);
> > +
> > +static int __init amd_energy_init(void)
> > +{
> > +     const struct x86_cpu_id *id;
> > +     int ret;
> > +
> > +     id = x86_match_cpu(cpu_ids);
> > +     if (!id) {
> > +             pr_err("driver does not support CPU family %d model %d\n",
> > +                     boot_cpu_data.x86, boot_cpu_data.x86_model);
> > +
>
> This is not an error and thus does not warrant an error message.
>
> > +             return -ENODEV;
> > +     }
> > +
> > +     ret = platform_driver_register(&amd_energy_driver);
> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     amd_energy_platdev = platform_device_alloc(DRVNAME, 0);
> > +     if (!amd_energy_platdev)
> > +             return -ENOMEM;
> > +
> > +     ret = platform_device_add(amd_energy_platdev);
> > +     if (ret) {
> > +             platform_device_unregister(amd_energy_platdev);
> > +             return ret;
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static void __exit amd_energy_exit(void)
> > +{
> > +     platform_device_unregister(amd_energy_platdev);
> > +     platform_driver_unregister(&amd_energy_driver);
> > +}
> > +
> > +module_init(amd_energy_init);
> > +module_exit(amd_energy_exit);
> > +
> > +MODULE_DESCRIPTION("Driver for AMD Energy reporting from RAPL MSR via HWMON interface");
> > +MODULE_AUTHOR("Naveen Krishna Chatradhi <nchatrad@amd.com>");
> > +MODULE_LICENSE("GPL");
> >
>


-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-11 12:26   ` Naveen Krishna Ch
@ 2020-04-11 15:48     ` Guenter Roeck
  2020-04-11 17:07       ` Naveen Krishna Ch
  0 siblings, 1 reply; 12+ messages in thread
From: Guenter Roeck @ 2020-04-11 15:48 UTC (permalink / raw)
  To: Naveen Krishna Ch; +Cc: Naveen Krishna Chatradhi, linux-hwmon

On 4/11/20 5:26 AM, Naveen Krishna Ch wrote:
> Hi Guenter
> 
> I would be glad, If you could help me with the following query.
> On Fri, 10 Apr 2020 at 21:27, Guenter Roeck <linux@roeck-us.net> wrote:
>>
>> On 4/9/20 8:39 AM, Naveen Krishna Chatradhi wrote:
>>> This patch adds hwmon based amd_energy driver support for
>>> family 17h processors from AMD.
>>>
>>> The driver provides following interface to the userspace
>>> 1. Reports the per core and per socket energy consumption
>>>    via sysfs entries created per core and per socket respectively.
>>>     * per core energy consumed via "core_energy%d_input"
>>>     * package/socket energy consumed via "sock_energy%d_input".
>>> 2. Uses topology_max_packages() to get number of sockets.
>>> 3. Uses num_present_cpus() to get the number of CPUs.
>>> 4. Reports the energy units via energy_unit sysfs entry
>>>
>>> Cc: Guenter Roeck <linux@roeck-us.net>
>>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
>>> ---
>>>  drivers/hwmon/Kconfig      |  10 ++
>>>  drivers/hwmon/Makefile     |   1 +
>>>  drivers/hwmon/amd_energy.c | 273 +++++++++++++++++++++++++++++++++++++++++++++
>>>  3 files changed, 284 insertions(+)
>>>  create mode 100644 drivers/hwmon/amd_energy.c
>>>
>>> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
>>> index 05a30832c6ba..d83f1403b429 100644
>>> --- a/drivers/hwmon/Kconfig
>>> +++ b/drivers/hwmon/Kconfig
>>> @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
>>>         This driver can also be built as a module. If so, the module
>>>         will be called fam15h_power.
>>>
>>> +config SENSORS_AMD_ENERGY
>>> +     tristate "AMD RAPL MSR based Energy driver"
>>> +     depends on X86
>>> +     help
>>> +       If you say yes here you get support for core and package energy
>>> +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
>>> +
>>> +       This driver can also be built as a module. If so, the module
>>> +       will be called as amd_energy.
>>> +
>>>  config SENSORS_APPLESMC
>>>       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
>>>       depends on INPUT && X86
>>> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
>>> index b0b9c8e57176..318f89dc7133 100644
>>> --- a/drivers/hwmon/Makefile
>>> +++ b/drivers/hwmon/Makefile
>>> @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
>>>  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
>>>  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
>>>  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
>>> +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
>>>  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
>>>  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
>>>  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
>>> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
>>> new file mode 100644
>>> index 000000000000..ddb7073ae39b
>>> --- /dev/null
>>> +++ b/drivers/hwmon/amd_energy.c
>>> @@ -0,0 +1,273 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +
>>> +/*
>>> + * Copyright (C) 2019 Advanced Micro Devices, Inc.
>>> + */
>>> +
>>> +#include <linux/kernel.h>
>>> +#include <linux/module.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/types.h>
>>> +#include <linux/device.h>
>>> +#include <linux/sysfs.h>
>>> +#include <linux/cpu.h>
>>> +#include <linux/list.h>
>>> +#include <linux/hwmon.h>
>>> +#include <linux/hwmon-sysfs.h>
>>> +#include <linux/processor.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/cpumask.h>
>>
>> Alphabetic include file order, please.
>>
>>> +
>>> +#include <asm/cpu_device_id.h>
>>> +
>>> +#define DRVNAME      "amd_energy"
>>> +
>>> +#define ENERGY_PWR_UNIT_MSR  0xC0010299
>>> +#define ENERGY_CORE_MSR      0xC001029A
>>> +#define ENERGY_PCK_MSR               0xC001029B
>>> +
>>> +#define AMD_TIME_UNIT_MASK   0xF0000
>>> +#define AMD_ENERGY_UNIT_MASK 0x01F00
>>> +#define AMD_POWER_UNIT_MASK  0x0000F
>>> +
>>> +#define ENERGY_STATUS_MASK   0xffffffff
>>> +
>>> +#define AMD_FAM_17           0x17 /* ZP, SSP */
>>> +
>>> +/* Useful macros */
>>> +#define AMD_CPU_FAM_ANY(_family, _model)     \
>>> +{                                            \
>>> +     .vendor         = X86_VENDOR_AMD,       \
>>> +     .family         = _family,              \
>>> +     .model          = _model,               \
>>> +     .feature        = X86_FEATURE_ANY,      \
>>> +}
>>> +
>>> +#define AMD_CPU_FAM(_model)                  \
>>> +     AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
>>> +
>>> +static struct device_attribute units_attr;
>>> +
>>> +struct sensor_data {
>>> +     unsigned int scale;
>>> +     union {
>>> +             unsigned int cpu_nr;
>>> +             unsigned int sock_nr;
>>> +     };
>>> +     struct device_attribute dev_attr_input;
>>> +     char input[32];
>>> +};
>>> +
>>> +struct amd_energy_sensors {
>>> +     struct sensor_data *data;
>>> +     struct attribute **attrs;
>>> +     struct attribute_group group;
>>> +     const struct attribute_group *groups[1];
>>
>> Even if acceptable, this would be wrong. The list of groups
>> ends with an empty entry, meaning this array would have to have
>> at least two entries (one for the terminator).
>>
>>> +};
>>> +
>>> +static ssize_t amd_units_u64_show(struct device *dev,
>>> +     struct device_attribute *dev_attr, char *buffer)
>>> +{
>>> +     uint64_t rapl_units;
>>> +     uint64_t energy_unit;
>>> +     int ret = 0;
>>> +
>>> +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
>>> +     if (ret)
>>> +             return -EAGAIN;
>>> +
>>> +     energy_unit = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
>>> +
>>> +     return snprintf(buffer, 24, "%llu\n", energy_unit);
>>> +}
>>> +
>>> +static ssize_t amd_core_u64_show(struct device *dev,
>>> +             struct device_attribute *dev_attr, char *buffer)
>>> +{
>>> +     unsigned long long value;
>>> +     struct sensor_data *sensor;
>>> +     int ret = 0;
>>> +
>>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
>>> +     ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
>>> +     if (ret)
>>> +             return -EAGAIN;
>>> +
>>> +     return snprintf(buffer, 24, "%llu\n", value);
>>
>> It seems to me this reports raw values. This is unacceptable. Values need
>> to be scaled to match the ABI. Energy is reported in microJoule.
>>
>>> +}
>>> +
>>> +static ssize_t amd_sock_u64_show(struct device *dev,
>>> +             struct device_attribute *dev_attr, char *buffer)
>>> +{
>>> +     unsigned long long value;
>>> +     struct sensor_data *sensor;
>>> +     int ret = 0;
>>> +     int cpu, cpu_nr;
>>> +
>>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
>>> +
>>> +     for_each_possible_cpu(cpu) {
>>> +             struct cpuinfo_x86 *c = &cpu_data(cpu);
>>> +
>>> +             if (c->initialized && c->logical_die_id == sensor->sock_nr) {
>>> +                     cpu_nr = cpu;
>>> +                     break;
>>> +             }
>>> +             cpu_nr = 0;
>>> +     }
>>> +
>>> +     ret = rdmsrl_safe_on_cpu(cpu_nr, ENERGY_PCK_MSR, &value);
>>> +     if (ret)
>>> +             return -EAGAIN;
>>> +
>>> +     return snprintf(buffer, 24, "%llu\n", value);
>>> +}
>>> +
>>> +static int amd_energy_probe(struct platform_device *pdev)
>>> +{
>>> +     struct amd_energy_sensors *amd_sensors;
>>> +     struct device *hwdev, *dev = &pdev->dev;
>>> +     int nr_cpus, nr_socks, idx = 0;
>>> +
>>> +     nr_cpus = num_present_cpus();
>>> +     nr_socks = topology_max_packages();
>>> +
>>> +     amd_sensors = devm_kzalloc(dev, sizeof(*amd_sensors), GFP_KERNEL);
>>> +     if (!amd_sensors)
>>> +             return -ENOMEM;
>>> +
>>> +     amd_sensors->data = devm_kcalloc(dev, nr_cpus + nr_socks,
>>> +                             sizeof(*amd_sensors->data), GFP_KERNEL);
>>> +     if (!amd_sensors->data)
>>> +             return -ENOMEM;
>>> +
>>> +     amd_sensors->attrs = devm_kcalloc(dev, nr_cpus + nr_socks,
>>> +                             sizeof(*amd_sensors->attrs), GFP_KERNEL);
>>> +     if (!amd_sensors->attrs)
>>> +             return -ENOMEM;
>>> +
>>> +     /* populate attributes for number of cpus. */
>>> +     for (idx = 0; idx < ; idx++) {
>>> +             struct sensor_data *sensor = &amd_sensors->data[idx];
>>> +
>>> +             snprintf(sensor->input, sizeof(sensor->input),
>>> +                             "core_energy%d_input", idx);
>>> +
>>
>> This is unacceptable. Please use standard attributes (energyX_input).
>> If you want to report what the sensor is for, use labels.
>>
>>> +             sensor->dev_attr_input.attr.mode = 0444;
>>> +             sensor->dev_attr_input.attr.name = sensor->input;
>>> +             sensor->dev_attr_input.show = amd_core_u64_show;
>>> +
>>> +             sensor->cpu_nr = idx;
>>> +             amd_sensors->attrs[idx] = &sensor->dev_attr_input.attr;
>>> +
>>> +             sysfs_attr_init(amd_sensors->attrs[idx]);
>>> +     }
>>> +
>>> +     /* populate attributes for number of sockets. */
>>> +     for (idx = 0; idx < nr_socks; idx++) {
>>> +             struct sensor_data *sensor =
>>> +                     &amd_sensors->data[nr_cpus + idx];
>>> +
>>> +             snprintf(sensor->input,
>>> +                     sizeof(sensor->input), "sock_energy%d_input", idx);
>>> +
>>> +             sensor->dev_attr_input.attr.mode = 0444;
>>> +             sensor->dev_attr_input.attr.name = sensor->input;
>>> +             sensor->dev_attr_input.show = amd_sock_u64_show;
>>> +
>>> +             sensor->sock_nr = idx;
>>> +             amd_sensors->attrs[nr_cpus + idx] =
>>> +                     &sensor->dev_attr_input.attr;
>>> +
>>> +             sysfs_attr_init(amd_sensors->attrs[nr_cpus + idx]);
>>> +     }
>>
>> This all makes me wonder what is reported for inactive/disabled CPUs.
>>
>>> +
>>> +     amd_sensors->group.attrs = amd_sensors->attrs;
>>> +     amd_sensors->groups[0] = &amd_sensors->group;
>>> +
>>> +     platform_set_drvdata(pdev, amd_sensors);
>>> +
>>> +     hwdev = devm_hwmon_device_register_with_groups(dev,
>>> +                     "amd_energy", amd_sensors, amd_sensors->groups);
>>
>> Please rework the driver to use the devm_hwmon_device_register_with_info() API.
> Our current, platform has 64 cores per socket and 2 such sockets on a
> board. There is a core energy counter register for each core and
> Package counter for each socket.  The topology varies from platform to
> platform.
> 
> 
> To keep this driver reusable for all platforms. I think, we need to
> define the hwmon_chip_info and channel_info structures dynamically
> (hwmon_chip_info has const variables). Is there a way to define a
> pre-processor statement which can create an array for a given
> platform.
> 
> Could you suggest me a way to defining these hwmon_chip_info and
> channel array dynamically ?  This reason a reason for me to use groups
> instead of the chip_info.

The approach used by the tmp421 driver should work. 'const'
doesn't apply to the pointers, but to the data they contain.
But that doesn't mean the data itself has to be const. You
can still assign the pointers dynamically. The  habanalabs
driver does it fully dynamically (I did not review that code,
so it may be a bit messy).

Guenter

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-11 15:48     ` Guenter Roeck
@ 2020-04-11 17:07       ` Naveen Krishna Ch
  2020-04-12 20:04         ` Naveen Krishna Ch
  0 siblings, 1 reply; 12+ messages in thread
From: Naveen Krishna Ch @ 2020-04-11 17:07 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter

On Sat, 11 Apr 2020 at 21:18, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 4/11/20 5:26 AM, Naveen Krishna Ch wrote:
> > Hi Guenter
> >
> > I would be glad, If you could help me with the following query.
> > On Fri, 10 Apr 2020 at 21:27, Guenter Roeck <linux@roeck-us.net> wrote:
> >>
> >> On 4/9/20 8:39 AM, Naveen Krishna Chatradhi wrote:
> >>> This patch adds hwmon based amd_energy driver support for
> >>> family 17h processors from AMD.
> >>>
> >>> The driver provides following interface to the userspace
> >>> 1. Reports the per core and per socket energy consumption
> >>>    via sysfs entries created per core and per socket respectively.
> >>>     * per core energy consumed via "core_energy%d_input"
> >>>     * package/socket energy consumed via "sock_energy%d_input".
> >>> 2. Uses topology_max_packages() to get number of sockets.
> >>> 3. Uses num_present_cpus() to get the number of CPUs.
> >>> 4. Reports the energy units via energy_unit sysfs entry
> >>>
> >>> Cc: Guenter Roeck <linux@roeck-us.net>
> >>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> >>> ---
> >>>  drivers/hwmon/Kconfig      |  10 ++
> >>>  drivers/hwmon/Makefile     |   1 +
> >>>  drivers/hwmon/amd_energy.c | 273 +++++++++++++++++++++++++++++++++++++++++++++
> >>>  3 files changed, 284 insertions(+)
> >>>  create mode 100644 drivers/hwmon/amd_energy.c
> >>>
> >>> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> >>> index 05a30832c6ba..d83f1403b429 100644
> >>> --- a/drivers/hwmon/Kconfig
> >>> +++ b/drivers/hwmon/Kconfig
> >>> @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
> >>>         This driver can also be built as a module. If so, the module
> >>>         will be called fam15h_power.
> >>>
> >>> +config SENSORS_AMD_ENERGY
> >>> +     tristate "AMD RAPL MSR based Energy driver"
> >>> +     depends on X86
> >>> +     help
> >>> +       If you say yes here you get support for core and package energy
> >>> +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> >>> +
> >>> +       This driver can also be built as a module. If so, the module
> >>> +       will be called as amd_energy.
> >>> +
> >>>  config SENSORS_APPLESMC
> >>>       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
> >>>       depends on INPUT && X86
> >>> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> >>> index b0b9c8e57176..318f89dc7133 100644
> >>> --- a/drivers/hwmon/Makefile
> >>> +++ b/drivers/hwmon/Makefile
> >>> @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
> >>>  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
> >>>  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
> >>>  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
> >>> +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
> >>>  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
> >>>  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
> >>>  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
> >>> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> >>> new file mode 100644
> >>> index 000000000000..ddb7073ae39b
> >>> --- /dev/null
> >>> +++ b/drivers/hwmon/amd_energy.c
> >>> @@ -0,0 +1,273 @@
> >>> +// SPDX-License-Identifier: GPL-2.0-only
> >>> +
> >>> +/*
> >>> + * Copyright (C) 2019 Advanced Micro Devices, Inc.
> >>> + */
> >>> +
> >>> +#include <linux/kernel.h>
> >>> +#include <linux/module.h>
> >>> +#include <linux/slab.h>
> >>> +#include <linux/types.h>
> >>> +#include <linux/device.h>
> >>> +#include <linux/sysfs.h>
> >>> +#include <linux/cpu.h>
> >>> +#include <linux/list.h>
> >>> +#include <linux/hwmon.h>
> >>> +#include <linux/hwmon-sysfs.h>
> >>> +#include <linux/processor.h>
> >>> +#include <linux/platform_device.h>
> >>> +#include <linux/cpumask.h>
> >>
> >> Alphabetic include file order, please.
> >>
> >>> +
> >>> +#include <asm/cpu_device_id.h>
> >>> +
> >>> +#define DRVNAME      "amd_energy"
> >>> +
> >>> +#define ENERGY_PWR_UNIT_MSR  0xC0010299
> >>> +#define ENERGY_CORE_MSR      0xC001029A
> >>> +#define ENERGY_PCK_MSR               0xC001029B
> >>> +
> >>> +#define AMD_TIME_UNIT_MASK   0xF0000
> >>> +#define AMD_ENERGY_UNIT_MASK 0x01F00
> >>> +#define AMD_POWER_UNIT_MASK  0x0000F
> >>> +
> >>> +#define ENERGY_STATUS_MASK   0xffffffff
> >>> +
> >>> +#define AMD_FAM_17           0x17 /* ZP, SSP */
> >>> +
> >>> +/* Useful macros */
> >>> +#define AMD_CPU_FAM_ANY(_family, _model)     \
> >>> +{                                            \
> >>> +     .vendor         = X86_VENDOR_AMD,       \
> >>> +     .family         = _family,              \
> >>> +     .model          = _model,               \
> >>> +     .feature        = X86_FEATURE_ANY,      \
> >>> +}
> >>> +
> >>> +#define AMD_CPU_FAM(_model)                  \
> >>> +     AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
> >>> +
> >>> +static struct device_attribute units_attr;
> >>> +
> >>> +struct sensor_data {
> >>> +     unsigned int scale;
> >>> +     union {
> >>> +             unsigned int cpu_nr;
> >>> +             unsigned int sock_nr;
> >>> +     };
> >>> +     struct device_attribute dev_attr_input;
> >>> +     char input[32];
> >>> +};
> >>> +
> >>> +struct amd_energy_sensors {
> >>> +     struct sensor_data *data;
> >>> +     struct attribute **attrs;
> >>> +     struct attribute_group group;
> >>> +     const struct attribute_group *groups[1];
> >>
> >> Even if acceptable, this would be wrong. The list of groups
> >> ends with an empty entry, meaning this array would have to have
> >> at least two entries (one for the terminator).
> >>
> >>> +};
> >>> +
> >>> +static ssize_t amd_units_u64_show(struct device *dev,
> >>> +     struct device_attribute *dev_attr, char *buffer)
> >>> +{
> >>> +     uint64_t rapl_units;
> >>> +     uint64_t energy_unit;
> >>> +     int ret = 0;
> >>> +
> >>> +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> >>> +     if (ret)
> >>> +             return -EAGAIN;
> >>> +
> >>> +     energy_unit = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> >>> +
> >>> +     return snprintf(buffer, 24, "%llu\n", energy_unit);
> >>> +}
> >>> +
> >>> +static ssize_t amd_core_u64_show(struct device *dev,
> >>> +             struct device_attribute *dev_attr, char *buffer)
> >>> +{
> >>> +     unsigned long long value;
> >>> +     struct sensor_data *sensor;
> >>> +     int ret = 0;
> >>> +
> >>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> >>> +     ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
> >>> +     if (ret)
> >>> +             return -EAGAIN;
> >>> +
> >>> +     return snprintf(buffer, 24, "%llu\n", value);
> >>
> >> It seems to me this reports raw values. This is unacceptable. Values need
> >> to be scaled to match the ABI. Energy is reported in microJoule.
> >>
> >>> +}
> >>> +
> >>> +static ssize_t amd_sock_u64_show(struct device *dev,
> >>> +             struct device_attribute *dev_attr, char *buffer)
> >>> +{
> >>> +     unsigned long long value;
> >>> +     struct sensor_data *sensor;
> >>> +     int ret = 0;
> >>> +     int cpu, cpu_nr;
> >>> +
> >>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> >>> +
> >>> +     for_each_possible_cpu(cpu) {
> >>> +             struct cpuinfo_x86 *c = &cpu_data(cpu);
> >>> +
> >>> +             if (c->initialized && c->logical_die_id == sensor->sock_nr) {
> >>> +                     cpu_nr = cpu;
> >>> +                     break;
> >>> +             }
> >>> +             cpu_nr = 0;
> >>> +     }
> >>> +
> >>> +     ret = rdmsrl_safe_on_cpu(cpu_nr, ENERGY_PCK_MSR, &value);
> >>> +     if (ret)
> >>> +             return -EAGAIN;
> >>> +
> >>> +     return snprintf(buffer, 24, "%llu\n", value);
> >>> +}
> >>> +
> >>> +static int amd_energy_probe(struct platform_device *pdev)
> >>> +{
> >>> +     struct amd_energy_sensors *amd_sensors;
> >>> +     struct device *hwdev, *dev = &pdev->dev;
> >>> +     int nr_cpus, nr_socks, idx = 0;
> >>> +
> >>> +     nr_cpus = num_present_cpus();
> >>> +     nr_socks = topology_max_packages();
> >>> +
> >>> +     amd_sensors = devm_kzalloc(dev, sizeof(*amd_sensors), GFP_KERNEL);
> >>> +     if (!amd_sensors)
> >>> +             return -ENOMEM;
> >>> +
> >>> +     amd_sensors->data = devm_kcalloc(dev, nr_cpus + nr_socks,
> >>> +                             sizeof(*amd_sensors->data), GFP_KERNEL);
> >>> +     if (!amd_sensors->data)
> >>> +             return -ENOMEM;
> >>> +
> >>> +     amd_sensors->attrs = devm_kcalloc(dev, nr_cpus + nr_socks,
> >>> +                             sizeof(*amd_sensors->attrs), GFP_KERNEL);
> >>> +     if (!amd_sensors->attrs)
> >>> +             return -ENOMEM;
> >>> +
> >>> +     /* populate attributes for number of cpus. */
> >>> +     for (idx = 0; idx < ; idx++) {
> >>> +             struct sensor_data *sensor = &amd_sensors->data[idx];
> >>> +
> >>> +             snprintf(sensor->input, sizeof(sensor->input),
> >>> +                             "core_energy%d_input", idx);
> >>> +
> >>
> >> This is unacceptable. Please use standard attributes (energyX_input).
> >> If you want to report what the sensor is for, use labels.
> >>
> >>> +             sensor->dev_attr_input.attr.mode = 0444;
> >>> +             sensor->dev_attr_input.attr.name = sensor->input;
> >>> +             sensor->dev_attr_input.show = amd_core_u64_show;
> >>> +
> >>> +             sensor->cpu_nr = idx;
> >>> +             amd_sensors->attrs[idx] = &sensor->dev_attr_input.attr;
> >>> +
> >>> +             sysfs_attr_init(amd_sensors->attrs[idx]);
> >>> +     }
> >>> +
> >>> +     /* populate attributes for number of sockets. */
> >>> +     for (idx = 0; idx < nr_socks; idx++) {
> >>> +             struct sensor_data *sensor =
> >>> +                     &amd_sensors->data[nr_cpus + idx];
> >>> +
> >>> +             snprintf(sensor->input,
> >>> +                     sizeof(sensor->input), "sock_energy%d_input", idx);
> >>> +
> >>> +             sensor->dev_attr_input.attr.mode = 0444;
> >>> +             sensor->dev_attr_input.attr.name = sensor->input;
> >>> +             sensor->dev_attr_input.show = amd_sock_u64_show;
> >>> +
> >>> +             sensor->sock_nr = idx;
> >>> +             amd_sensors->attrs[nr_cpus + idx] =
> >>> +                     &sensor->dev_attr_input.attr;
> >>> +
> >>> +             sysfs_attr_init(amd_sensors->attrs[nr_cpus + idx]);
> >>> +     }
> >>
> >> This all makes me wonder what is reported for inactive/disabled CPUs.
> >>
> >>> +
> >>> +     amd_sensors->group.attrs = amd_sensors->attrs;
> >>> +     amd_sensors->groups[0] = &amd_sensors->group;
> >>> +
> >>> +     platform_set_drvdata(pdev, amd_sensors);
> >>> +
> >>> +     hwdev = devm_hwmon_device_register_with_groups(dev,
> >>> +                     "amd_energy", amd_sensors, amd_sensors->groups);
> >>
> >> Please rework the driver to use the devm_hwmon_device_register_with_info() API.
> > Our current, platform has 64 cores per socket and 2 such sockets on a
> > board. There is a core energy counter register for each core and
> > Package counter for each socket.  The topology varies from platform to
> > platform.
> >
> >
> > To keep this driver reusable for all platforms. I think, we need to
> > define the hwmon_chip_info and channel_info structures dynamically
> > (hwmon_chip_info has const variables). Is there a way to define a
> > pre-processor statement which can create an array for a given
> > platform.
> >
> > Could you suggest me a way to defining these hwmon_chip_info and
> > channel array dynamically ?  This reason a reason for me to use groups
> > instead of the chip_info.
>
> The approach used by the tmp421 driver should work. 'const'
> doesn't apply to the pointers, but to the data they contain.
> But that doesn't mean the data itself has to be const. You
> can still assign the pointers dynamically. The  habanalabs
> driver does it fully dynamically (I did not review that code,
> so it may be a bit messy).
Thank you very much for the pointer. I will go through the code.
>
> Guenter



-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-11 17:07       ` Naveen Krishna Ch
@ 2020-04-12 20:04         ` Naveen Krishna Ch
  2020-04-12 22:27           ` Guenter Roeck
  0 siblings, 1 reply; 12+ messages in thread
From: Naveen Krishna Ch @ 2020-04-12 20:04 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter

On Sat, 11 Apr 2020 at 22:37, Naveen Krishna Ch
<naveenkrishna.ch@gmail.com> wrote:
>
> Hi Guenter
>
> On Sat, 11 Apr 2020 at 21:18, Guenter Roeck <linux@roeck-us.net> wrote:
> >
> > On 4/11/20 5:26 AM, Naveen Krishna Ch wrote:
> > > Hi Guenter
> > >
> > > I would be glad, If you could help me with the following query.
> > > On Fri, 10 Apr 2020 at 21:27, Guenter Roeck <linux@roeck-us.net> wrote:
> > >>
> > >> On 4/9/20 8:39 AM, Naveen Krishna Chatradhi wrote:
> > >>> This patch adds hwmon based amd_energy driver support for
> > >>> family 17h processors from AMD.
> > >>>
> > >>> The driver provides following interface to the userspace
> > >>> 1. Reports the per core and per socket energy consumption
> > >>>    via sysfs entries created per core and per socket respectively.
> > >>>     * per core energy consumed via "core_energy%d_input"
> > >>>     * package/socket energy consumed via "sock_energy%d_input".
> > >>> 2. Uses topology_max_packages() to get number of sockets.
> > >>> 3. Uses num_present_cpus() to get the number of CPUs.
> > >>> 4. Reports the energy units via energy_unit sysfs entry
> > >>>
> > >>> Cc: Guenter Roeck <linux@roeck-us.net>
> > >>> Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
> > >>> ---
> > >>>  drivers/hwmon/Kconfig      |  10 ++
> > >>>  drivers/hwmon/Makefile     |   1 +
> > >>>  drivers/hwmon/amd_energy.c | 273 +++++++++++++++++++++++++++++++++++++++++++++
> > >>>  3 files changed, 284 insertions(+)
> > >>>  create mode 100644 drivers/hwmon/amd_energy.c
> > >>>
> > >>> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> > >>> index 05a30832c6ba..d83f1403b429 100644
> > >>> --- a/drivers/hwmon/Kconfig
> > >>> +++ b/drivers/hwmon/Kconfig
> > >>> @@ -324,6 +324,16 @@ config SENSORS_FAM15H_POWER
> > >>>         This driver can also be built as a module. If so, the module
> > >>>         will be called fam15h_power.
> > >>>
> > >>> +config SENSORS_AMD_ENERGY
> > >>> +     tristate "AMD RAPL MSR based Energy driver"
> > >>> +     depends on X86
> > >>> +     help
> > >>> +       If you say yes here you get support for core and package energy
> > >>> +       sensors, based on RAPL MSR for AMD family 17h and above CPUs.
> > >>> +
> > >>> +       This driver can also be built as a module. If so, the module
> > >>> +       will be called as amd_energy.
> > >>> +
> > >>>  config SENSORS_APPLESMC
> > >>>       tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
> > >>>       depends on INPUT && X86
> > >>> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> > >>> index b0b9c8e57176..318f89dc7133 100644
> > >>> --- a/drivers/hwmon/Makefile
> > >>> +++ b/drivers/hwmon/Makefile
> > >>> @@ -45,6 +45,7 @@ obj-$(CONFIG_SENSORS_ADT7411)       += adt7411.o
> > >>>  obj-$(CONFIG_SENSORS_ADT7462)        += adt7462.o
> > >>>  obj-$(CONFIG_SENSORS_ADT7470)        += adt7470.o
> > >>>  obj-$(CONFIG_SENSORS_ADT7475)        += adt7475.o
> > >>> +obj-$(CONFIG_SENSORS_AMD_ENERGY) += amd_energy.o
> > >>>  obj-$(CONFIG_SENSORS_APPLESMC)       += applesmc.o
> > >>>  obj-$(CONFIG_SENSORS_ARM_SCMI)       += scmi-hwmon.o
> > >>>  obj-$(CONFIG_SENSORS_ARM_SCPI)       += scpi-hwmon.o
> > >>> diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
> > >>> new file mode 100644
> > >>> index 000000000000..ddb7073ae39b
> > >>> --- /dev/null
> > >>> +++ b/drivers/hwmon/amd_energy.c
> > >>> @@ -0,0 +1,273 @@
> > >>> +// SPDX-License-Identifier: GPL-2.0-only
> > >>> +
> > >>> +/*
> > >>> + * Copyright (C) 2019 Advanced Micro Devices, Inc.
> > >>> + */
> > >>> +
> > >>> +#include <linux/kernel.h>
> > >>> +#include <linux/module.h>
> > >>> +#include <linux/slab.h>
> > >>> +#include <linux/types.h>
> > >>> +#include <linux/device.h>
> > >>> +#include <linux/sysfs.h>
> > >>> +#include <linux/cpu.h>
> > >>> +#include <linux/list.h>
> > >>> +#include <linux/hwmon.h>
> > >>> +#include <linux/hwmon-sysfs.h>
> > >>> +#include <linux/processor.h>
> > >>> +#include <linux/platform_device.h>
> > >>> +#include <linux/cpumask.h>
> > >>
> > >> Alphabetic include file order, please.
> > >>
> > >>> +
> > >>> +#include <asm/cpu_device_id.h>
> > >>> +
> > >>> +#define DRVNAME      "amd_energy"
> > >>> +
> > >>> +#define ENERGY_PWR_UNIT_MSR  0xC0010299
> > >>> +#define ENERGY_CORE_MSR      0xC001029A
> > >>> +#define ENERGY_PCK_MSR               0xC001029B
> > >>> +
> > >>> +#define AMD_TIME_UNIT_MASK   0xF0000
> > >>> +#define AMD_ENERGY_UNIT_MASK 0x01F00
> > >>> +#define AMD_POWER_UNIT_MASK  0x0000F
> > >>> +
> > >>> +#define ENERGY_STATUS_MASK   0xffffffff
> > >>> +
> > >>> +#define AMD_FAM_17           0x17 /* ZP, SSP */
> > >>> +
> > >>> +/* Useful macros */
> > >>> +#define AMD_CPU_FAM_ANY(_family, _model)     \
> > >>> +{                                            \
> > >>> +     .vendor         = X86_VENDOR_AMD,       \
> > >>> +     .family         = _family,              \
> > >>> +     .model          = _model,               \
> > >>> +     .feature        = X86_FEATURE_ANY,      \
> > >>> +}
> > >>> +
> > >>> +#define AMD_CPU_FAM(_model)                  \
> > >>> +     AMD_CPU_FAM_ANY(AMD_FAM_##_model, X86_MODEL_ANY)
> > >>> +
> > >>> +static struct device_attribute units_attr;
> > >>> +
> > >>> +struct sensor_data {
> > >>> +     unsigned int scale;
> > >>> +     union {
> > >>> +             unsigned int cpu_nr;
> > >>> +             unsigned int sock_nr;
> > >>> +     };
> > >>> +     struct device_attribute dev_attr_input;
> > >>> +     char input[32];
> > >>> +};
> > >>> +
> > >>> +struct amd_energy_sensors {
> > >>> +     struct sensor_data *data;
> > >>> +     struct attribute **attrs;
> > >>> +     struct attribute_group group;
> > >>> +     const struct attribute_group *groups[1];
> > >>
> > >> Even if acceptable, this would be wrong. The list of groups
> > >> ends with an empty entry, meaning this array would have to have
> > >> at least two entries (one for the terminator).
> > >>
> > >>> +};
> > >>> +
> > >>> +static ssize_t amd_units_u64_show(struct device *dev,
> > >>> +     struct device_attribute *dev_attr, char *buffer)
> > >>> +{
> > >>> +     uint64_t rapl_units;
> > >>> +     uint64_t energy_unit;
> > >>> +     int ret = 0;
> > >>> +
> > >>> +     ret = rdmsrl_safe(ENERGY_PWR_UNIT_MSR, &rapl_units);
> > >>> +     if (ret)
> > >>> +             return -EAGAIN;
> > >>> +
> > >>> +     energy_unit = (rapl_units & AMD_ENERGY_UNIT_MASK) >> 8;
> > >>> +
> > >>> +     return snprintf(buffer, 24, "%llu\n", energy_unit);
> > >>> +}
> > >>> +
> > >>> +static ssize_t amd_core_u64_show(struct device *dev,
> > >>> +             struct device_attribute *dev_attr, char *buffer)
> > >>> +{
> > >>> +     unsigned long long value;
> > >>> +     struct sensor_data *sensor;
> > >>> +     int ret = 0;
> > >>> +
> > >>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> > >>> +     ret = rdmsrl_safe_on_cpu(sensor->cpu_nr, ENERGY_CORE_MSR, &value);
> > >>> +     if (ret)
> > >>> +             return -EAGAIN;
> > >>> +
> > >>> +     return snprintf(buffer, 24, "%llu\n", value);
> > >>
> > >> It seems to me this reports raw values. This is unacceptable. Values need
> > >> to be scaled to match the ABI. Energy is reported in microJoule.
> > >>
> > >>> +}
> > >>> +
> > >>> +static ssize_t amd_sock_u64_show(struct device *dev,
> > >>> +             struct device_attribute *dev_attr, char *buffer)
> > >>> +{
> > >>> +     unsigned long long value;
> > >>> +     struct sensor_data *sensor;
> > >>> +     int ret = 0;
> > >>> +     int cpu, cpu_nr;
> > >>> +
> > >>> +     sensor = container_of(dev_attr, struct sensor_data, dev_attr_input);
> > >>> +
> > >>> +     for_each_possible_cpu(cpu) {
> > >>> +             struct cpuinfo_x86 *c = &cpu_data(cpu);
> > >>> +
> > >>> +             if (c->initialized && c->logical_die_id == sensor->sock_nr) {
> > >>> +                     cpu_nr = cpu;
> > >>> +                     break;
> > >>> +             }
> > >>> +             cpu_nr = 0;
> > >>> +     }
> > >>> +
> > >>> +     ret = rdmsrl_safe_on_cpu(cpu_nr, ENERGY_PCK_MSR, &value);
> > >>> +     if (ret)
> > >>> +             return -EAGAIN;
> > >>> +
> > >>> +     return snprintf(buffer, 24, "%llu\n", value);
> > >>> +}
> > >>> +
> > >>> +static int amd_energy_probe(struct platform_device *pdev)
> > >>> +{
> > >>> +     struct amd_energy_sensors *amd_sensors;
> > >>> +     struct device *hwdev, *dev = &pdev->dev;
> > >>> +     int nr_cpus, nr_socks, idx = 0;
> > >>> +
> > >>> +     nr_cpus = num_present_cpus();
> > >>> +     nr_socks = topology_max_packages();
> > >>> +
> > >>> +     amd_sensors = devm_kzalloc(dev, sizeof(*amd_sensors), GFP_KERNEL);
> > >>> +     if (!amd_sensors)
> > >>> +             return -ENOMEM;
> > >>> +
> > >>> +     amd_sensors->data = devm_kcalloc(dev, nr_cpus + nr_socks,
> > >>> +                             sizeof(*amd_sensors->data), GFP_KERNEL);
> > >>> +     if (!amd_sensors->data)
> > >>> +             return -ENOMEM;
> > >>> +
> > >>> +     amd_sensors->attrs = devm_kcalloc(dev, nr_cpus + nr_socks,
> > >>> +                             sizeof(*amd_sensors->attrs), GFP_KERNEL);
> > >>> +     if (!amd_sensors->attrs)
> > >>> +             return -ENOMEM;
> > >>> +
> > >>> +     /* populate attributes for number of cpus. */
> > >>> +     for (idx = 0; idx < ; idx++) {
> > >>> +             struct sensor_data *sensor = &amd_sensors->data[idx];
> > >>> +
> > >>> +             snprintf(sensor->input, sizeof(sensor->input),
> > >>> +                             "core_energy%d_input", idx);
> > >>> +
> > >>
> > >> This is unacceptable. Please use standard attributes (energyX_input).
> > >> If you want to report what the sensor is for, use labels.
> > >>
> > >>> +             sensor->dev_attr_input.attr.mode = 0444;
> > >>> +             sensor->dev_attr_input.attr.name = sensor->input;
> > >>> +             sensor->dev_attr_input.show = amd_core_u64_show;
> > >>> +
> > >>> +             sensor->cpu_nr = idx;
> > >>> +             amd_sensors->attrs[idx] = &sensor->dev_attr_input.attr;
> > >>> +
> > >>> +             sysfs_attr_init(amd_sensors->attrs[idx]);
> > >>> +     }
> > >>> +
> > >>> +     /* populate attributes for number of sockets. */
> > >>> +     for (idx = 0; idx < nr_socks; idx++) {
> > >>> +             struct sensor_data *sensor =
> > >>> +                     &amd_sensors->data[nr_cpus + idx];
> > >>> +
> > >>> +             snprintf(sensor->input,
> > >>> +                     sizeof(sensor->input), "sock_energy%d_input", idx);
> > >>> +
> > >>> +             sensor->dev_attr_input.attr.mode = 0444;
> > >>> +             sensor->dev_attr_input.attr.name = sensor->input;
> > >>> +             sensor->dev_attr_input.show = amd_sock_u64_show;
> > >>> +
> > >>> +             sensor->sock_nr = idx;
> > >>> +             amd_sensors->attrs[nr_cpus + idx] =
> > >>> +                     &sensor->dev_attr_input.attr;
> > >>> +
> > >>> +             sysfs_attr_init(amd_sensors->attrs[nr_cpus + idx]);
> > >>> +     }
> > >>
> > >> This all makes me wonder what is reported for inactive/disabled CPUs.
> > >>
> > >>> +
> > >>> +     amd_sensors->group.attrs = amd_sensors->attrs;
> > >>> +     amd_sensors->groups[0] = &amd_sensors->group;
> > >>> +
> > >>> +     platform_set_drvdata(pdev, amd_sensors);
> > >>> +
> > >>> +     hwdev = devm_hwmon_device_register_with_groups(dev,
> > >>> +                     "amd_energy", amd_sensors, amd_sensors->groups);
> > >>
> > >> Please rework the driver to use the devm_hwmon_device_register_with_info() API.
> > > Our current, platform has 64 cores per socket and 2 such sockets on a
> > > board. There is a core energy counter register for each core and
> > > Package counter for each socket.  The topology varies from platform to
> > > platform.
> > >
> > >
> > > To keep this driver reusable for all platforms. I think, we need to
> > > define the hwmon_chip_info and channel_info structures dynamically
> > > (hwmon_chip_info has const variables). Is there a way to define a
> > > pre-processor statement which can create an array for a given
> > > platform.
> > >
> > > Could you suggest me a way to defining these hwmon_chip_info and
> > > channel array dynamically ?  This reason a reason for me to use groups
> > > instead of the chip_info.
> >
> > The approach used by the tmp421 driver should work. 'const'
> > doesn't apply to the pointers, but to the data they contain.
> > But that doesn't mean the data itself has to be const. You
> > can still assign the pointers dynamically. The  habanalabs
> > driver does it fully dynamically (I did not review that code,
> > so it may be a bit messy).
> Thank you very much for the pointer. I will go through the code.
I've the few more questions

Constraint:
The platform has 2 sets of energy sensors, one at core level, one at
socket level.
If i populate the chip_info type as "hwmon_energy" for both sensors.
The naming of the sysfs entries are going to be continuous and the
user application
should read the label to identify which entry belongs to which sensor set.

Possible solutions :
I could think of following ways to avoid this
1. Create 2 different hwmon devices
2. Use "hwmon_in" as type for one of the sensor sets and
"hwmon_energy" for other.
3. Use "groups" for one of the sensor sets and populate the other via chip.
What do you suggest ?

Also, I noticed that the sysfs filename index for the hwmon_energy
type is starting with 1,
while hwmon_in starts with 0. Was this a design choice ?
Ex: energy1_input

Your suggestions would be helpful.


> >
> > Guenter
>
>
>
> --
> Shine bright,
> (: Nav :)



--
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-12 20:04         ` Naveen Krishna Ch
@ 2020-04-12 22:27           ` Guenter Roeck
  2020-04-20 10:51             ` Naveen Krishna Ch
  0 siblings, 1 reply; 12+ messages in thread
From: Guenter Roeck @ 2020-04-12 22:27 UTC (permalink / raw)
  To: Naveen Krishna Ch; +Cc: Naveen Krishna Chatradhi, linux-hwmon

On Mon, Apr 13, 2020 at 01:34:19AM +0530, Naveen Krishna Ch wrote:
> Hi Guenter
> 
[ ... ]
> 
> Constraint:
> The platform has 2 sets of energy sensors, one at core level, one at
> socket level.
> If i populate the chip_info type as "hwmon_energy" for both sensors.
> The naming of the sysfs entries are going to be continuous and the
> user application
> should read the label to identify which entry belongs to which sensor set.
> 

I am a bit at loss. what is the problem with having multiple _energy attributes
(energy1_input, energy2_input, ..., energy100_input) ?

> Possible solutions :
> I could think of following ways to avoid this
> 1. Create 2 different hwmon devices

You could do that, following the approach used by the k10temp driver. This is
really your call. The question in that case, though, would be why you would want
have a separate driver to start with and not just enhance the k10temp driver to
also report energy (and power consumption, though I understand that the registers
reporting it are not published).

> 2. Use "hwmon_in" as type for one of the sensor sets and
> "hwmon_energy" for other.

_in is for voltages. Please don't tell me you plan to report voltages with this
driver. If so, please extend the k10temp driver instead.

> 3. Use "groups" for one of the sensor sets and populate the other via chip.

This is a no-go.

> What do you suggest ?
> 
> Also, I noticed that the sysfs filename index for the hwmon_energy
> type is starting with 1,
> while hwmon_in starts with 0. Was this a design choice ?

I think this was mostly historic, but it preceeds my involvement with the
hwmon subsystem, so I don't really know.

Guenter

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters
  2020-04-12 22:27           ` Guenter Roeck
@ 2020-04-20 10:51             ` Naveen Krishna Ch
  0 siblings, 0 replies; 12+ messages in thread
From: Naveen Krishna Ch @ 2020-04-20 10:51 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Naveen Krishna Chatradhi, linux-hwmon

Hi Guenter,

On Mon, 13 Apr 2020 at 03:57, Guenter Roeck <linux@roeck-us.net> wrote:
>
> On Mon, Apr 13, 2020 at 01:34:19AM +0530, Naveen Krishna Ch wrote:
> > Hi Guenter
> >
> [ ... ]
> >
> > Constraint:
> > The platform has 2 sets of energy sensors, one at core level, one at
> > socket level.
> > If i populate the chip_info type as "hwmon_energy" for both sensors.
> > The naming of the sysfs entries are going to be continuous and the
> > user application
> > should read the label to identify which entry belongs to which sensor set.
> >
>
> I am a bit at loss. what is the problem with having multiple _energy attributes
> (energy1_input, energy2_input, ..., energy100_input) ?
>
> > Possible solutions :
> > I could think of following ways to avoid this
> > 1. Create 2 different hwmon devices
>
> You could do that, following the approach used by the k10temp driver. This is
> really your call. The question in that case, though, would be why you would want
> have a separate driver to start with and not just enhance the k10temp driver to
> also report energy (and power consumption, though I understand that the registers
> reporting it are not published).
>
> > 2. Use "hwmon_in" as type for one of the sensor sets and
> > "hwmon_energy" for other.
>
> _in is for voltages. Please don't tell me you plan to report voltages with this
> driver. If so, please extend the k10temp driver instead.
>
> > 3. Use "groups" for one of the sensor sets and populate the other via chip.
>
> This is a no-go.
>
> > What do you suggest ?
> >
> > Also, I noticed that the sysfs filename index for the hwmon_energy
> > type is starting with 1,
> > while hwmon_in starts with 0. Was this a design choice ?
>
> I think this was mostly historic, but it preceeds my involvement with the
> hwmon subsystem, so I don't really know.

Thanks for your inputs, i've submitted V2 version of the patch with your
comments addressed. Could you please review them and let us know your opinion.

https://lore.kernel.org/linux-hwmon/20200417190459.233179-1-nchatrad@amd.com/T/#t

>
> Guenter



-- 
Shine bright,
(: Nav :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-04-20 10:51 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-09 15:39 [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Naveen Krishna Chatradhi
2020-04-09 15:39 ` [PATCH 2/2] hwmon: (amd_energy) Add documentation Naveen Krishna Chatradhi
2020-04-10 15:57 ` [PATCH 1/2] hwmon: Add amd_energy driver to report energy counters Guenter Roeck
2020-04-11  4:49   ` Naveen Krishna Ch
2020-04-11  5:16     ` Guenter Roeck
2020-04-11  5:43       ` Naveen Krishna Ch
2020-04-11 12:26   ` Naveen Krishna Ch
2020-04-11 15:48     ` Guenter Roeck
2020-04-11 17:07       ` Naveen Krishna Ch
2020-04-12 20:04         ` Naveen Krishna Ch
2020-04-12 22:27           ` Guenter Roeck
2020-04-20 10:51             ` Naveen Krishna Ch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).