Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2] nvme: Add hardware monitoring support
@ 2019-10-29 22:32 Guenter Roeck
  2019-10-30  0:53 ` Keith Busch
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Guenter Roeck @ 2019-10-29 22:32 UTC (permalink / raw)
  To: Keith Busch
  Cc: Sagi Grimberg, linux-pm, linux-kernel, linux-nvme, Akinobu Mita,
	Jens Axboe, Guenter Roeck, Christoph Hellwig, Chris Healy

nvme devices report temperature information in the controller information
(for limits) and in the smart log. Currently, the only means to retrieve
this information is the nvme command line interface, which requires
super-user privileges.

At the same time, it would be desirable to use NVME temperature information
for thermal control.

This patch adds support to read NVME temperatures from the kernel using the
hwmon API and adds temperature zones for NVME drives. The thermal subsystem
can use this information to set thermal policies, and userspace can access
it using libsensors and/or the "sensors" command.

Example output from the "sensors" command:

nvme0-pci-0100
Adapter: PCI adapter
Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
Sensor 1:     +39.0°C
Sensor 2:     +41.0°C

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2: Use devm_kfree() to release memory in error path

 drivers/nvme/host/Kconfig      |  10 ++
 drivers/nvme/host/Makefile     |   1 +
 drivers/nvme/host/core.c       |   5 +
 drivers/nvme/host/nvme-hwmon.c | 163 +++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h       |   8 ++
 5 files changed, 187 insertions(+)
 create mode 100644 drivers/nvme/host/nvme-hwmon.c

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 2b36f052bfb9..aeb49e16e386 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -23,6 +23,16 @@ config NVME_MULTIPATH
 	   /dev/nvmeXnY device will show up for each NVMe namespaces,
 	   even if it is accessible through multiple controllers.
 
+config NVME_HWMON
+	bool "NVME hardware monitoring"
+	depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
+	help
+	  This provides support for NVME hardware monitoring. If enabled,
+	  a hardware monitoring device will be created for each NVME drive
+	  in the system.
+
+	  If unsure, say N.
+
 config NVME_FABRICS
 	tristate
 
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 8a4b671c5f0c..03de4797a877 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING)		+= trace.o
 nvme-core-$(CONFIG_NVME_MULTIPATH)	+= multipath.o
 nvme-core-$(CONFIG_NVM)			+= lightnvm.o
 nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS)	+= fault_inject.o
+nvme-core-$(CONFIG_NVME_HWMON)		+= nvme-hwmon.o
 
 nvme-y					+= pci.o
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fa7ba09dca77..fc1d4b146717 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2796,6 +2796,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	ctrl->oncs = le16_to_cpu(id->oncs);
 	ctrl->mtfa = le16_to_cpu(id->mtfa);
 	ctrl->oaes = le32_to_cpu(id->oaes);
+	ctrl->wctemp = le16_to_cpu(id->wctemp);
+	ctrl->cctemp = le16_to_cpu(id->cctemp);
+
 	atomic_set(&ctrl->abort_limit, id->acl + 1);
 	ctrl->vwc = id->vwc;
 	if (id->mdts)
@@ -2897,6 +2900,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 
 	ctrl->identified = true;
 
+	nvme_hwmon_init(ctrl);
+
 	return 0;
 
 out_free:
diff --git a/drivers/nvme/host/nvme-hwmon.c b/drivers/nvme/host/nvme-hwmon.c
new file mode 100644
index 000000000000..af5eda326ec6
--- /dev/null
+++ b/drivers/nvme/host/nvme-hwmon.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express hardware monitoring support
+ * Copyright (c) 2019, Guenter Roeck
+ */
+
+#include <linux/hwmon.h>
+
+#include "nvme.h"
+
+struct nvme_hwmon_data {
+	struct nvme_ctrl *ctrl;
+	struct nvme_smart_log log;
+};
+
+static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
+{
+	return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
+			    &data->log, sizeof(data->log), 0);
+}
+
+static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			   u32 attr, int channel, long *val)
+{
+	struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+	struct nvme_smart_log *log = &data->log;
+	int err;
+	int temp;
+
+	err = nvme_hwmon_get_smart_log(data);
+	if (err)
+		return err < 0 ? err : -EPROTO;
+
+	switch (attr) {
+	case hwmon_temp_max:
+		*val = (data->ctrl->wctemp - 273) * 1000;
+		break;
+	case hwmon_temp_crit:
+		*val = (data->ctrl->cctemp - 273) * 1000;
+		break;
+	case hwmon_temp_input:
+		if (!channel)
+			temp = le16_to_cpup((__le16 *)log->temperature);
+		else
+			temp = le16_to_cpu(log->temp_sensor[channel - 1]);
+		*val = (temp - 273) * 1000;
+		break;
+	case hwmon_temp_crit_alarm:
+		*val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static const char * const nvme_hwmon_sensor_names[] = {
+	"Composite",
+	"Sensor 1",
+	"Sensor 2",
+	"Sensor 3",
+	"Sensor 4",
+	"Sensor 5",
+	"Sensor 6",
+	"Sensor 7",
+	"Sensor 8",
+};
+
+static int nvme_hwmon_read_string(struct device *dev,
+				  enum hwmon_sensor_types type, u32 attr,
+				  int channel, const char **str)
+{
+	*str = nvme_hwmon_sensor_names[channel];
+	return 0;
+}
+
+static umode_t nvme_hwmon_is_visible(const void *_data,
+				     enum hwmon_sensor_types type,
+				     u32 attr, int channel)
+{
+	const struct nvme_hwmon_data *data = _data;
+
+	switch (attr) {
+	case hwmon_temp_crit:
+		if (!channel && data->ctrl->cctemp)
+			return 0444;
+		break;
+	case hwmon_temp_max:
+		if (!channel && data->ctrl->wctemp)
+			return 0444;
+		break;
+	case hwmon_temp_crit_alarm:
+		if (!channel)
+			return 0444;
+		break;
+	case hwmon_temp_input:
+	case hwmon_temp_label:
+		if (!channel || data->log.temp_sensor[channel - 1])
+			return 0444;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static const struct hwmon_channel_info *nvme_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+				HWMON_T_LABEL | HWMON_T_CRIT_ALARM,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL),
+	NULL
+};
+
+static const struct hwmon_ops nvme_hwmon_ops = {
+	.is_visible = nvme_hwmon_is_visible,
+	.read = nvme_hwmon_read,
+	.read_string = nvme_hwmon_read_string,
+};
+
+static const struct hwmon_chip_info nvme_hwmon_chip_info = {
+	.ops = &nvme_hwmon_ops,
+	.info = nvme_hwmon_info,
+};
+
+void nvme_hwmon_init(struct nvme_ctrl *ctrl)
+{
+	struct device *dev = ctrl->device;
+	struct nvme_hwmon_data *data;
+	struct device *hwmon;
+	int err;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return;
+
+	data->ctrl = ctrl;
+
+	err = nvme_hwmon_get_smart_log(data);
+	if (err) {
+		dev_warn(dev, "Failed to read smart log (error %d)\n", err);
+		devm_kfree(dev, data);
+		return;
+	}
+
+	hwmon = devm_hwmon_device_register_with_info(dev, dev_name(dev),
+						     data,
+						     &nvme_hwmon_chip_info,
+						     NULL);
+	if (IS_ERR(hwmon)) {
+		dev_warn(dev, "Failed to instantiate hwmon device\n");
+		devm_kfree(dev, data);
+	}
+}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 22e8401352c2..e6460c1216bc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -231,6 +231,8 @@ struct nvme_ctrl {
 	u16 kas;
 	u8 npss;
 	u8 apsta;
+	u16 wctemp;
+	u16 cctemp;
 	u32 oaes;
 	u32 aen_result;
 	u32 ctratt;
@@ -652,4 +654,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 	return dev_to_disk(dev)->private_data;
 }
 
+#if IS_ENABLED(CONFIG_NVME_HWMON)
+void nvme_hwmon_init(struct nvme_ctrl *ctrl);
+#else
+static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
+#endif
+
 #endif /* _NVME_H */
-- 
2.17.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-29 22:32 [PATCH v2] nvme: Add hardware monitoring support Guenter Roeck
@ 2019-10-30  0:53 ` Keith Busch
  2019-11-06 21:29   ` Pavel Machek
  2019-10-30 11:16 ` Akinobu Mita
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2019-10-30  0:53 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Sagi Grimberg, linux-pm, linux-kernel, linux-nvme, Akinobu Mita,
	Jens Axboe, Christoph Hellwig, Chris Healy

On Tue, Oct 29, 2019 at 03:32:14PM -0700, Guenter Roeck wrote:
> nvme devices report temperature information in the controller information
> (for limits) and in the smart log. Currently, the only means to retrieve
> this information is the nvme command line interface, which requires
> super-user privileges.
> 
> At the same time, it would be desirable to use NVME temperature information
> for thermal control.
> 
> This patch adds support to read NVME temperatures from the kernel using the
> hwmon API and adds temperature zones for NVME drives. The thermal subsystem
> can use this information to set thermal policies, and userspace can access
> it using libsensors and/or the "sensors" command.
> 
> Example output from the "sensors" command:
> 
> nvme0-pci-0100
> Adapter: PCI adapter
> Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
> Sensor 1:     +39.0°C
> Sensor 2:     +41.0°C
> 
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>

This looks fine to me, but I'll wait a few more days to see if there are
any additional comments..

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-29 22:32 [PATCH v2] nvme: Add hardware monitoring support Guenter Roeck
  2019-10-30  0:53 ` Keith Busch
@ 2019-10-30 11:16 ` Akinobu Mita
  2019-10-30 14:05   ` Christoph Hellwig
  2019-10-31  2:20   ` Guenter Roeck
  2019-10-30 14:12 ` Christoph Hellwig
  2019-10-30 23:40 ` Chris Healy
  3 siblings, 2 replies; 16+ messages in thread
From: Akinobu Mita @ 2019-10-30 11:16 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Sagi Grimberg, Linux PM, LKML, linux-nvme, Jens Axboe,
	Keith Busch, Christoph Hellwig, Chris Healy

2019年10月30日(水) 7:32 Guenter Roeck <linux@roeck-us.net>:
>
> nvme devices report temperature information in the controller information
> (for limits) and in the smart log. Currently, the only means to retrieve
> this information is the nvme command line interface, which requires
> super-user privileges.
>
> At the same time, it would be desirable to use NVME temperature information
> for thermal control.
>
> This patch adds support to read NVME temperatures from the kernel using the
> hwmon API and adds temperature zones for NVME drives. The thermal subsystem
> can use this information to set thermal policies, and userspace can access
> it using libsensors and/or the "sensors" command.
>
> Example output from the "sensors" command:
>
> nvme0-pci-0100
> Adapter: PCI adapter
> Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
> Sensor 1:     +39.0°C
> Sensor 2:     +41.0°C
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
> v2: Use devm_kfree() to release memory in error path
>
>  drivers/nvme/host/Kconfig      |  10 ++
>  drivers/nvme/host/Makefile     |   1 +
>  drivers/nvme/host/core.c       |   5 +
>  drivers/nvme/host/nvme-hwmon.c | 163 +++++++++++++++++++++++++++++++++
>  drivers/nvme/host/nvme.h       |   8 ++
>  5 files changed, 187 insertions(+)
>  create mode 100644 drivers/nvme/host/nvme-hwmon.c
>
> diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
> index 2b36f052bfb9..aeb49e16e386 100644
> --- a/drivers/nvme/host/Kconfig
> +++ b/drivers/nvme/host/Kconfig
> @@ -23,6 +23,16 @@ config NVME_MULTIPATH
>            /dev/nvmeXnY device will show up for each NVMe namespaces,
>            even if it is accessible through multiple controllers.
>
> +config NVME_HWMON
> +       bool "NVME hardware monitoring"
> +       depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
> +       help
> +         This provides support for NVME hardware monitoring. If enabled,
> +         a hardware monitoring device will be created for each NVME drive
> +         in the system.
> +
> +         If unsure, say N.
> +
>  config NVME_FABRICS
>         tristate
>
> diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
> index 8a4b671c5f0c..03de4797a877 100644
> --- a/drivers/nvme/host/Makefile
> +++ b/drivers/nvme/host/Makefile
> @@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING)           += trace.o
>  nvme-core-$(CONFIG_NVME_MULTIPATH)     += multipath.o
>  nvme-core-$(CONFIG_NVM)                        += lightnvm.o
>  nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS)   += fault_inject.o
> +nvme-core-$(CONFIG_NVME_HWMON)         += nvme-hwmon.o
>
>  nvme-y                                 += pci.o
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index fa7ba09dca77..fc1d4b146717 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2796,6 +2796,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>         ctrl->oncs = le16_to_cpu(id->oncs);
>         ctrl->mtfa = le16_to_cpu(id->mtfa);
>         ctrl->oaes = le32_to_cpu(id->oaes);
> +       ctrl->wctemp = le16_to_cpu(id->wctemp);
> +       ctrl->cctemp = le16_to_cpu(id->cctemp);
> +
>         atomic_set(&ctrl->abort_limit, id->acl + 1);
>         ctrl->vwc = id->vwc;
>         if (id->mdts)
> @@ -2897,6 +2900,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>
>         ctrl->identified = true;
>
> +       nvme_hwmon_init(ctrl);
> +
>         return 0;
>
>  out_free:

The nvme_init_identify() can be called multiple time in nvme ctrl's
lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
should we need to prevent nvme_hwmon_init() from registering hwmon
device more than twice?

In the nvme thermal zone patchset[1], thernal zone is registered in
nvme_init_identify and unregistered in nvme_stop_ctrl().

[1] https://lore.kernel.org/linux-devicetree/1561990354-4084-2-git-send-email-akinobu.mita@gmail.com/

> diff --git a/drivers/nvme/host/nvme-hwmon.c b/drivers/nvme/host/nvme-hwmon.c
> new file mode 100644
> index 000000000000..af5eda326ec6
> --- /dev/null
> +++ b/drivers/nvme/host/nvme-hwmon.c
> @@ -0,0 +1,163 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * NVM Express hardware monitoring support
> + * Copyright (c) 2019, Guenter Roeck
> + */
> +
> +#include <linux/hwmon.h>
> +
> +#include "nvme.h"
> +
> +struct nvme_hwmon_data {
> +       struct nvme_ctrl *ctrl;
> +       struct nvme_smart_log log;
> +};
> +
> +static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
> +{
> +       return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
> +                           &data->log, sizeof(data->log), 0);
> +}

The 'data->log' is allocated per nvme_ctrl, so are there any locks to
prevent multiple callers of nvme_hwmon_get_smart_log() from breaking
the log buffer?

> +
> +static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> +                          u32 attr, int channel, long *val)
> +{
> +       struct nvme_hwmon_data *data = dev_get_drvdata(dev);
> +       struct nvme_smart_log *log = &data->log;
> +       int err;
> +       int temp;
> +
> +       err = nvme_hwmon_get_smart_log(data);
> +       if (err)
> +               return err < 0 ? err : -EPROTO;
> +
> +       switch (attr) {
> +       case hwmon_temp_max:
> +               *val = (data->ctrl->wctemp - 273) * 1000;
> +               break;
> +       case hwmon_temp_crit:
> +               *val = (data->ctrl->cctemp - 273) * 1000;
> +               break;

When this function is called with 'hwmon_temp_max' or 'hwmon_temp_crit',
we don't need to call nvme_hwmon_get_smart_log() at all, do we?

> +       case hwmon_temp_input:
> +               if (!channel)
> +                       temp = le16_to_cpup((__le16 *)log->temperature);
> +               else
> +                       temp = le16_to_cpu(log->temp_sensor[channel - 1]);
> +               *val = (temp - 273) * 1000;
> +               break;
> +       case hwmon_temp_crit_alarm:
> +               *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
> +               break;
> +       default:
> +               err = -EOPNOTSUPP;
> +               break;
> +       }
> +       return err;
> +}

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-30 11:16 ` Akinobu Mita
@ 2019-10-30 14:05   ` Christoph Hellwig
  2019-10-31  2:54     ` Guenter Roeck
  2019-10-31 13:46     ` Akinobu Mita
  2019-10-31  2:20   ` Guenter Roeck
  1 sibling, 2 replies; 16+ messages in thread
From: Christoph Hellwig @ 2019-10-30 14:05 UTC (permalink / raw)
  To: Akinobu Mita
  Cc: Sagi Grimberg, Linux PM, LKML, linux-nvme, Jens Axboe,
	Chris Healy, Keith Busch, Christoph Hellwig, Guenter Roeck

On Wed, Oct 30, 2019 at 08:16:48PM +0900, Akinobu Mita wrote:
> The nvme_init_identify() can be called multiple time in nvme ctrl's
> lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
> should we need to prevent nvme_hwmon_init() from registering hwmon
> device more than twice?
> 
> In the nvme thermal zone patchset[1], thernal zone is registered in
> nvme_init_identify and unregistered in nvme_stop_ctrl().

So Guenter said above the thermal subsystem could use the information
from hwmon as well.  Does this mean this patch would solve your needs
as well?

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-29 22:32 [PATCH v2] nvme: Add hardware monitoring support Guenter Roeck
  2019-10-30  0:53 ` Keith Busch
  2019-10-30 11:16 ` Akinobu Mita
@ 2019-10-30 14:12 ` Christoph Hellwig
  2019-10-30 23:40 ` Chris Healy
  3 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2019-10-30 14:12 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Sagi Grimberg, linux-pm, linux-kernel, linux-nvme, Akinobu Mita,
	Jens Axboe, Keith Busch, Christoph Hellwig, Chris Healy

On Tue, Oct 29, 2019 at 03:32:14PM -0700, Guenter Roeck wrote:
> This patch adds support to read NVME temperatures from the kernel using the
> hwmon API and adds temperature zones for NVME drives. The thermal subsystem
> can use this information to set thermal policies, and userspace can access
> it using libsensors and/or the "sensors" command.

Except in all upper case or all lower case identifier the speling should
always be "NVMe".  Thi also happens a few more places like in the Kconfig
text.

> +static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
> +{
> +	return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
> +			    &data->log, sizeof(data->log), 0);
> +}
> +
> +static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> +			   u32 attr, int channel, long *val)
> +{
> +	struct nvme_hwmon_data *data = dev_get_drvdata(dev);
> +	struct nvme_smart_log *log = &data->log;
> +	int err;
> +	int temp;
> +
> +	err = nvme_hwmon_get_smart_log(data);
> +	if (err)
> +		return err < 0 ? err : -EPROTO;

I think the handling of positive errnos fits better into
nvme_hwmon_get_smart_log.  Also EIO sounds like a better error for
generic NVMe level errors.

> +
> +	switch (attr) {
> +	case hwmon_temp_max:
> +		*val = (data->ctrl->wctemp - 273) * 1000;
> +		break;
> +	case hwmon_temp_crit:
> +		*val = (data->ctrl->cctemp - 273) * 1000;
> +		break;
> +	case hwmon_temp_input:
> +		if (!channel)
> +			temp = le16_to_cpup((__le16 *)log->temperature);

This needs to use get_unaligned_le16, otherwise you'll run into problems
on architectures that don't support unaligned loads.

> +static const struct hwmon_ops nvme_hwmon_ops = {
> +	.is_visible = nvme_hwmon_is_visible,
> +	.read = nvme_hwmon_read,
> +	.read_string = nvme_hwmon_read_string,
> +};
> +
> +static const struct hwmon_chip_info nvme_hwmon_chip_info = {
> +	.ops = &nvme_hwmon_ops,
> +	.info = nvme_hwmon_info,
> +};

Please use tabs to align all the = in an ops structure.

> +#if IS_ENABLED(CONFIG_NVME_HWMON)

No real need to use IS_ENABLED here, a plain ifdef should do it.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-29 22:32 [PATCH v2] nvme: Add hardware monitoring support Guenter Roeck
                   ` (2 preceding siblings ...)
  2019-10-30 14:12 ` Christoph Hellwig
@ 2019-10-30 23:40 ` Chris Healy
  3 siblings, 0 replies; 16+ messages in thread
From: Chris Healy @ 2019-10-30 23:40 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Sagi Grimberg, linux-pm, linux-kernel, linux-nvme, Akinobu Mita,
	Jens Axboe, Keith Busch, Christoph Hellwig

On an ARM64 system with Toshiba SSD:

Tested-by: Chris Healy <cphealy@gmail.com>

On Tue, Oct 29, 2019 at 3:32 PM Guenter Roeck <linux@roeck-us.net> wrote:
>
> nvme devices report temperature information in the controller information
> (for limits) and in the smart log. Currently, the only means to retrieve
> this information is the nvme command line interface, which requires
> super-user privileges.
>
> At the same time, it would be desirable to use NVME temperature information
> for thermal control.
>
> This patch adds support to read NVME temperatures from the kernel using the
> hwmon API and adds temperature zones for NVME drives. The thermal subsystem
> can use this information to set thermal policies, and userspace can access
> it using libsensors and/or the "sensors" command.
>
> Example output from the "sensors" command:
>
> nvme0-pci-0100
> Adapter: PCI adapter
> Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
> Sensor 1:     +39.0°C
> Sensor 2:     +41.0°C
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
> v2: Use devm_kfree() to release memory in error path
>
>  drivers/nvme/host/Kconfig      |  10 ++
>  drivers/nvme/host/Makefile     |   1 +
>  drivers/nvme/host/core.c       |   5 +
>  drivers/nvme/host/nvme-hwmon.c | 163 +++++++++++++++++++++++++++++++++
>  drivers/nvme/host/nvme.h       |   8 ++
>  5 files changed, 187 insertions(+)
>  create mode 100644 drivers/nvme/host/nvme-hwmon.c
>
> diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
> index 2b36f052bfb9..aeb49e16e386 100644
> --- a/drivers/nvme/host/Kconfig
> +++ b/drivers/nvme/host/Kconfig
> @@ -23,6 +23,16 @@ config NVME_MULTIPATH
>            /dev/nvmeXnY device will show up for each NVMe namespaces,
>            even if it is accessible through multiple controllers.
>
> +config NVME_HWMON
> +       bool "NVME hardware monitoring"
> +       depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
> +       help
> +         This provides support for NVME hardware monitoring. If enabled,
> +         a hardware monitoring device will be created for each NVME drive
> +         in the system.
> +
> +         If unsure, say N.
> +
>  config NVME_FABRICS
>         tristate
>
> diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
> index 8a4b671c5f0c..03de4797a877 100644
> --- a/drivers/nvme/host/Makefile
> +++ b/drivers/nvme/host/Makefile
> @@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING)           += trace.o
>  nvme-core-$(CONFIG_NVME_MULTIPATH)     += multipath.o
>  nvme-core-$(CONFIG_NVM)                        += lightnvm.o
>  nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS)   += fault_inject.o
> +nvme-core-$(CONFIG_NVME_HWMON)         += nvme-hwmon.o
>
>  nvme-y                                 += pci.o
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index fa7ba09dca77..fc1d4b146717 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2796,6 +2796,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>         ctrl->oncs = le16_to_cpu(id->oncs);
>         ctrl->mtfa = le16_to_cpu(id->mtfa);
>         ctrl->oaes = le32_to_cpu(id->oaes);
> +       ctrl->wctemp = le16_to_cpu(id->wctemp);
> +       ctrl->cctemp = le16_to_cpu(id->cctemp);
> +
>         atomic_set(&ctrl->abort_limit, id->acl + 1);
>         ctrl->vwc = id->vwc;
>         if (id->mdts)
> @@ -2897,6 +2900,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>
>         ctrl->identified = true;
>
> +       nvme_hwmon_init(ctrl);
> +
>         return 0;
>
>  out_free:
> diff --git a/drivers/nvme/host/nvme-hwmon.c b/drivers/nvme/host/nvme-hwmon.c
> new file mode 100644
> index 000000000000..af5eda326ec6
> --- /dev/null
> +++ b/drivers/nvme/host/nvme-hwmon.c
> @@ -0,0 +1,163 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * NVM Express hardware monitoring support
> + * Copyright (c) 2019, Guenter Roeck
> + */
> +
> +#include <linux/hwmon.h>
> +
> +#include "nvme.h"
> +
> +struct nvme_hwmon_data {
> +       struct nvme_ctrl *ctrl;
> +       struct nvme_smart_log log;
> +};
> +
> +static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
> +{
> +       return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
> +                           &data->log, sizeof(data->log), 0);
> +}
> +
> +static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> +                          u32 attr, int channel, long *val)
> +{
> +       struct nvme_hwmon_data *data = dev_get_drvdata(dev);
> +       struct nvme_smart_log *log = &data->log;
> +       int err;
> +       int temp;
> +
> +       err = nvme_hwmon_get_smart_log(data);
> +       if (err)
> +               return err < 0 ? err : -EPROTO;
> +
> +       switch (attr) {
> +       case hwmon_temp_max:
> +               *val = (data->ctrl->wctemp - 273) * 1000;
> +               break;
> +       case hwmon_temp_crit:
> +               *val = (data->ctrl->cctemp - 273) * 1000;
> +               break;
> +       case hwmon_temp_input:
> +               if (!channel)
> +                       temp = le16_to_cpup((__le16 *)log->temperature);
> +               else
> +                       temp = le16_to_cpu(log->temp_sensor[channel - 1]);
> +               *val = (temp - 273) * 1000;
> +               break;
> +       case hwmon_temp_crit_alarm:
> +               *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
> +               break;
> +       default:
> +               err = -EOPNOTSUPP;
> +               break;
> +       }
> +       return err;
> +}
> +
> +static const char * const nvme_hwmon_sensor_names[] = {
> +       "Composite",
> +       "Sensor 1",
> +       "Sensor 2",
> +       "Sensor 3",
> +       "Sensor 4",
> +       "Sensor 5",
> +       "Sensor 6",
> +       "Sensor 7",
> +       "Sensor 8",
> +};
> +
> +static int nvme_hwmon_read_string(struct device *dev,
> +                                 enum hwmon_sensor_types type, u32 attr,
> +                                 int channel, const char **str)
> +{
> +       *str = nvme_hwmon_sensor_names[channel];
> +       return 0;
> +}
> +
> +static umode_t nvme_hwmon_is_visible(const void *_data,
> +                                    enum hwmon_sensor_types type,
> +                                    u32 attr, int channel)
> +{
> +       const struct nvme_hwmon_data *data = _data;
> +
> +       switch (attr) {
> +       case hwmon_temp_crit:
> +               if (!channel && data->ctrl->cctemp)
> +                       return 0444;
> +               break;
> +       case hwmon_temp_max:
> +               if (!channel && data->ctrl->wctemp)
> +                       return 0444;
> +               break;
> +       case hwmon_temp_crit_alarm:
> +               if (!channel)
> +                       return 0444;
> +               break;
> +       case hwmon_temp_input:
> +       case hwmon_temp_label:
> +               if (!channel || data->log.temp_sensor[channel - 1])
> +                       return 0444;
> +               break;
> +       default:
> +               break;
> +       }
> +       return 0;
> +}
> +
> +static const struct hwmon_channel_info *nvme_hwmon_info[] = {
> +       HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
> +       HWMON_CHANNEL_INFO(temp,
> +                          HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
> +                               HWMON_T_LABEL | HWMON_T_CRIT_ALARM,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL),
> +       NULL
> +};
> +
> +static const struct hwmon_ops nvme_hwmon_ops = {
> +       .is_visible = nvme_hwmon_is_visible,
> +       .read = nvme_hwmon_read,
> +       .read_string = nvme_hwmon_read_string,
> +};
> +
> +static const struct hwmon_chip_info nvme_hwmon_chip_info = {
> +       .ops = &nvme_hwmon_ops,
> +       .info = nvme_hwmon_info,
> +};
> +
> +void nvme_hwmon_init(struct nvme_ctrl *ctrl)
> +{
> +       struct device *dev = ctrl->device;
> +       struct nvme_hwmon_data *data;
> +       struct device *hwmon;
> +       int err;
> +
> +       data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
> +       if (!data)
> +               return;
> +
> +       data->ctrl = ctrl;
> +
> +       err = nvme_hwmon_get_smart_log(data);
> +       if (err) {
> +               dev_warn(dev, "Failed to read smart log (error %d)\n", err);
> +               devm_kfree(dev, data);
> +               return;
> +       }
> +
> +       hwmon = devm_hwmon_device_register_with_info(dev, dev_name(dev),
> +                                                    data,
> +                                                    &nvme_hwmon_chip_info,
> +                                                    NULL);
> +       if (IS_ERR(hwmon)) {
> +               dev_warn(dev, "Failed to instantiate hwmon device\n");
> +               devm_kfree(dev, data);
> +       }
> +}
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 22e8401352c2..e6460c1216bc 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -231,6 +231,8 @@ struct nvme_ctrl {
>         u16 kas;
>         u8 npss;
>         u8 apsta;
> +       u16 wctemp;
> +       u16 cctemp;
>         u32 oaes;
>         u32 aen_result;
>         u32 ctratt;
> @@ -652,4 +654,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
>         return dev_to_disk(dev)->private_data;
>  }
>
> +#if IS_ENABLED(CONFIG_NVME_HWMON)
> +void nvme_hwmon_init(struct nvme_ctrl *ctrl);
> +#else
> +static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
> +#endif
> +
>  #endif /* _NVME_H */
> --
> 2.17.1
>

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-30 11:16 ` Akinobu Mita
  2019-10-30 14:05   ` Christoph Hellwig
@ 2019-10-31  2:20   ` Guenter Roeck
  2019-10-31 13:44     ` Akinobu Mita
  2019-10-31 13:45     ` Christoph Hellwig
  1 sibling, 2 replies; 16+ messages in thread
From: Guenter Roeck @ 2019-10-31  2:20 UTC (permalink / raw)
  To: Akinobu Mita
  Cc: Sagi Grimberg, Linux PM, LKML, linux-nvme, Jens Axboe,
	Keith Busch, Christoph Hellwig, Chris Healy

On 10/30/19 4:16 AM, Akinobu Mita wrote:
> 2019年10月30日(水) 7:32 Guenter Roeck <linux@roeck-us.net>:
>>
>> nvme devices report temperature information in the controller information
>> (for limits) and in the smart log. Currently, the only means to retrieve
>> this information is the nvme command line interface, which requires
>> super-user privileges.
>>
>> At the same time, it would be desirable to use NVME temperature information
>> for thermal control.
>>
>> This patch adds support to read NVME temperatures from the kernel using the
>> hwmon API and adds temperature zones for NVME drives. The thermal subsystem
>> can use this information to set thermal policies, and userspace can access
>> it using libsensors and/or the "sensors" command.
>>
>> Example output from the "sensors" command:
>>
>> nvme0-pci-0100
>> Adapter: PCI adapter
>> Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
>> Sensor 1:     +39.0°C
>> Sensor 2:     +41.0°C
>>
>> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
>> ---
>> v2: Use devm_kfree() to release memory in error path
>>
>>   drivers/nvme/host/Kconfig      |  10 ++
>>   drivers/nvme/host/Makefile     |   1 +
>>   drivers/nvme/host/core.c       |   5 +
>>   drivers/nvme/host/nvme-hwmon.c | 163 +++++++++++++++++++++++++++++++++
>>   drivers/nvme/host/nvme.h       |   8 ++
>>   5 files changed, 187 insertions(+)
>>   create mode 100644 drivers/nvme/host/nvme-hwmon.c
>>
>> diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
>> index 2b36f052bfb9..aeb49e16e386 100644
>> --- a/drivers/nvme/host/Kconfig
>> +++ b/drivers/nvme/host/Kconfig
>> @@ -23,6 +23,16 @@ config NVME_MULTIPATH
>>             /dev/nvmeXnY device will show up for each NVMe namespaces,
>>             even if it is accessible through multiple controllers.
>>
>> +config NVME_HWMON
>> +       bool "NVME hardware monitoring"
>> +       depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
>> +       help
>> +         This provides support for NVME hardware monitoring. If enabled,
>> +         a hardware monitoring device will be created for each NVME drive
>> +         in the system.
>> +
>> +         If unsure, say N.
>> +
>>   config NVME_FABRICS
>>          tristate
>>
>> diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
>> index 8a4b671c5f0c..03de4797a877 100644
>> --- a/drivers/nvme/host/Makefile
>> +++ b/drivers/nvme/host/Makefile
>> @@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING)           += trace.o
>>   nvme-core-$(CONFIG_NVME_MULTIPATH)     += multipath.o
>>   nvme-core-$(CONFIG_NVM)                        += lightnvm.o
>>   nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS)   += fault_inject.o
>> +nvme-core-$(CONFIG_NVME_HWMON)         += nvme-hwmon.o
>>
>>   nvme-y                                 += pci.o
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index fa7ba09dca77..fc1d4b146717 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -2796,6 +2796,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>>          ctrl->oncs = le16_to_cpu(id->oncs);
>>          ctrl->mtfa = le16_to_cpu(id->mtfa);
>>          ctrl->oaes = le32_to_cpu(id->oaes);
>> +       ctrl->wctemp = le16_to_cpu(id->wctemp);
>> +       ctrl->cctemp = le16_to_cpu(id->cctemp);
>> +
>>          atomic_set(&ctrl->abort_limit, id->acl + 1);
>>          ctrl->vwc = id->vwc;
>>          if (id->mdts)
>> @@ -2897,6 +2900,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>>
>>          ctrl->identified = true;
>>
>> +       nvme_hwmon_init(ctrl);
>> +
>>          return 0;
>>
>>   out_free:
> 
> The nvme_init_identify() can be called multiple time in nvme ctrl's
> lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
> should we need to prevent nvme_hwmon_init() from registering hwmon
> device more than twice?
> 
> In the nvme thermal zone patchset[1], thernal zone is registered in
> nvme_init_identify and unregistered in nvme_stop_ctrl().
> 

Doesn't that mean that the initialization should happen in nvme_start_ctrl()
and not here ?

Either case, good point. Reason for calling the init function from here
is that I wanted to ensure that it is called after controller
identification. But then it is really undesirable to re-instantiate
the driver on each device reset. I'll have to think about that.

> [1] https://lore.kernel.org/linux-devicetree/1561990354-4084-2-git-send-email-akinobu.mita@gmail.com/
> 
>> diff --git a/drivers/nvme/host/nvme-hwmon.c b/drivers/nvme/host/nvme-hwmon.c
>> new file mode 100644
>> index 000000000000..af5eda326ec6
>> --- /dev/null
>> +++ b/drivers/nvme/host/nvme-hwmon.c
>> @@ -0,0 +1,163 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * NVM Express hardware monitoring support
>> + * Copyright (c) 2019, Guenter Roeck
>> + */
>> + >> +#include <linux/hwmon.h>
>> +
>> +#include "nvme.h"
>> +
>> +struct nvme_hwmon_data {
>> +       struct nvme_ctrl *ctrl;
>> +       struct nvme_smart_log log;
>> +};
>> +
>> +static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
>> +{
>> +       return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
>> +                           &data->log, sizeof(data->log), 0);
>> +}
> 
> The 'data->log' is allocated per nvme_ctrl, so are there any locks to
> prevent multiple callers of nvme_hwmon_get_smart_log() from breaking
> the log buffer?
> 
Good point. This needs either local memory like in your patch, or
I'll need a lock. I prefer a lock, though I am open to suggestions.

>> +
>> +static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
>> +                          u32 attr, int channel, long *val)
>> +{
>> +       struct nvme_hwmon_data *data = dev_get_drvdata(dev);
>> +       struct nvme_smart_log *log = &data->log;
>> +       int err;
>> +       int temp;
>> +
>> +       err = nvme_hwmon_get_smart_log(data);
>> +       if (err)
>> +               return err < 0 ? err : -EPROTO;
>> +
>> +       switch (attr) {
>> +       case hwmon_temp_max:
>> +               *val = (data->ctrl->wctemp - 273) * 1000;
>> +               break;
>> +       case hwmon_temp_crit:
>> +               *val = (data->ctrl->cctemp - 273) * 1000;
>> +               break;
> 
> When this function is called with 'hwmon_temp_max' or 'hwmon_temp_crit',
> we don't need to call nvme_hwmon_get_smart_log() at all, do we?
> 

Another good point.

Thanks,
Guenter

>> +       case hwmon_temp_input:
>> +               if (!channel)
>> +                       temp = le16_to_cpup((__le16 *)log->temperature);
>> +               else
>> +                       temp = le16_to_cpu(log->temp_sensor[channel - 1]);
>> +               *val = (temp - 273) * 1000;
>> +               break;
>> +       case hwmon_temp_crit_alarm:
>> +               *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
>> +               break;
>> +       default:
>> +               err = -EOPNOTSUPP;
>> +               break;
>> +       }
>> +       return err;
>> +}
> 


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-30 14:05   ` Christoph Hellwig
@ 2019-10-31  2:54     ` Guenter Roeck
  2019-10-31 13:46       ` Christoph Hellwig
  2019-10-31 13:46     ` Akinobu Mita
  1 sibling, 1 reply; 16+ messages in thread
From: Guenter Roeck @ 2019-10-31  2:54 UTC (permalink / raw)
  To: Christoph Hellwig, Akinobu Mita
  Cc: Sagi Grimberg, Linux PM, LKML, linux-nvme, Jens Axboe,
	Keith Busch, Chris Healy

On 10/30/19 7:05 AM, Christoph Hellwig wrote:
> On Wed, Oct 30, 2019 at 08:16:48PM +0900, Akinobu Mita wrote:
>> The nvme_init_identify() can be called multiple time in nvme ctrl's
>> lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
>> should we need to prevent nvme_hwmon_init() from registering hwmon
>> device more than twice?
>>
>> In the nvme thermal zone patchset[1], thernal zone is registered in
>> nvme_init_identify and unregistered in nvme_stop_ctrl().
> 
> So Guenter said above the thermal subsystem could use the information
> from hwmon as well.  Does this mean this patch would solve your needs
> as well?
> 
Depends on the requirements. Unlike hwmon/iio, we don't have clear
guidelines describing when thermal vs. hwmon would be a better choice.
There is some interconnect between thermal and hwmon, but quite often
it is a one-way street (hwmon devices can easily register thermal
zones, for thermal zone devices it is a bit more difficult to register
associated hwmon devices).

For the most part, peripherals (memory, network devices, video
controllers, real time clocks, etc) are today handled by the hardware
monitoring subsystem. The one notable exception is the ath10k wireless
controller, but even that registers both a thermal device and a hardware
monitoring device. Sometimes peripheral devices tell the hardware
monitoring subsystem that it should also register thermal zones (I
would guess that ath10k doesn't do that because the mechanism didn't
exist back in 2014). On the other side, SoCs typically register
thermal zones and rarely register as hardware monitoring device.

Guenter

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-31  2:20   ` Guenter Roeck
@ 2019-10-31 13:44     ` Akinobu Mita
  2019-10-31 13:45     ` Christoph Hellwig
  1 sibling, 0 replies; 16+ messages in thread
From: Akinobu Mita @ 2019-10-31 13:44 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Sagi Grimberg, Linux PM, LKML, linux-nvme, Jens Axboe,
	Keith Busch, Christoph Hellwig, Chris Healy

2019年10月31日(木) 11:20 Guenter Roeck <linux@roeck-us.net>:
>
> On 10/30/19 4:16 AM, Akinobu Mita wrote:
> > 2019年10月30日(水) 7:32 Guenter Roeck <linux@roeck-us.net>:
> >>
> >> nvme devices report temperature information in the controller information
> >> (for limits) and in the smart log. Currently, the only means to retrieve
> >> this information is the nvme command line interface, which requires
> >> super-user privileges.
> >>
> >> At the same time, it would be desirable to use NVME temperature information
> >> for thermal control.
> >>
> >> This patch adds support to read NVME temperatures from the kernel using the
> >> hwmon API and adds temperature zones for NVME drives. The thermal subsystem
> >> can use this information to set thermal policies, and userspace can access
> >> it using libsensors and/or the "sensors" command.
> >>
> >> Example output from the "sensors" command:
> >>
> >> nvme0-pci-0100
> >> Adapter: PCI adapter
> >> Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
> >> Sensor 1:     +39.0°C
> >> Sensor 2:     +41.0°C
> >>
> >> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> >> ---
> >> v2: Use devm_kfree() to release memory in error path
> >>
> >>   drivers/nvme/host/Kconfig      |  10 ++
> >>   drivers/nvme/host/Makefile     |   1 +
> >>   drivers/nvme/host/core.c       |   5 +
> >>   drivers/nvme/host/nvme-hwmon.c | 163 +++++++++++++++++++++++++++++++++
> >>   drivers/nvme/host/nvme.h       |   8 ++
> >>   5 files changed, 187 insertions(+)
> >>   create mode 100644 drivers/nvme/host/nvme-hwmon.c
> >>
> >> diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
> >> index 2b36f052bfb9..aeb49e16e386 100644
> >> --- a/drivers/nvme/host/Kconfig
> >> +++ b/drivers/nvme/host/Kconfig
> >> @@ -23,6 +23,16 @@ config NVME_MULTIPATH
> >>             /dev/nvmeXnY device will show up for each NVMe namespaces,
> >>             even if it is accessible through multiple controllers.
> >>
> >> +config NVME_HWMON
> >> +       bool "NVME hardware monitoring"
> >> +       depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
> >> +       help
> >> +         This provides support for NVME hardware monitoring. If enabled,
> >> +         a hardware monitoring device will be created for each NVME drive
> >> +         in the system.
> >> +
> >> +         If unsure, say N.
> >> +
> >>   config NVME_FABRICS
> >>          tristate
> >>
> >> diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
> >> index 8a4b671c5f0c..03de4797a877 100644
> >> --- a/drivers/nvme/host/Makefile
> >> +++ b/drivers/nvme/host/Makefile
> >> @@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING)           += trace.o
> >>   nvme-core-$(CONFIG_NVME_MULTIPATH)     += multipath.o
> >>   nvme-core-$(CONFIG_NVM)                        += lightnvm.o
> >>   nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS)   += fault_inject.o
> >> +nvme-core-$(CONFIG_NVME_HWMON)         += nvme-hwmon.o
> >>
> >>   nvme-y                                 += pci.o
> >>
> >> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> >> index fa7ba09dca77..fc1d4b146717 100644
> >> --- a/drivers/nvme/host/core.c
> >> +++ b/drivers/nvme/host/core.c
> >> @@ -2796,6 +2796,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
> >>          ctrl->oncs = le16_to_cpu(id->oncs);
> >>          ctrl->mtfa = le16_to_cpu(id->mtfa);
> >>          ctrl->oaes = le32_to_cpu(id->oaes);
> >> +       ctrl->wctemp = le16_to_cpu(id->wctemp);
> >> +       ctrl->cctemp = le16_to_cpu(id->cctemp);
> >> +
> >>          atomic_set(&ctrl->abort_limit, id->acl + 1);
> >>          ctrl->vwc = id->vwc;
> >>          if (id->mdts)
> >> @@ -2897,6 +2900,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
> >>
> >>          ctrl->identified = true;
> >>
> >> +       nvme_hwmon_init(ctrl);
> >> +
> >>          return 0;
> >>
> >>   out_free:
> >
> > The nvme_init_identify() can be called multiple time in nvme ctrl's
> > lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
> > should we need to prevent nvme_hwmon_init() from registering hwmon
> > device more than twice?
> >
> > In the nvme thermal zone patchset[1], thernal zone is registered in
> > nvme_init_identify and unregistered in nvme_stop_ctrl().
> >
>
> Doesn't that mean that the initialization should happen in nvme_start_ctrl()
> and not here ?

Seems possible.  But I would like to ask maintainers' opinion.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-31  2:20   ` Guenter Roeck
  2019-10-31 13:44     ` Akinobu Mita
@ 2019-10-31 13:45     ` Christoph Hellwig
  2019-10-31 17:54       ` Guenter Roeck
  1 sibling, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2019-10-31 13:45 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Sagi Grimberg, Linux PM, Akinobu Mita, linux-nvme, LKML,
	Jens Axboe, Keith Busch, Christoph Hellwig, Chris Healy

On Wed, Oct 30, 2019 at 07:20:37PM -0700, Guenter Roeck wrote:
>> The nvme_init_identify() can be called multiple time in nvme ctrl's
>> lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
>> should we need to prevent nvme_hwmon_init() from registering hwmon
>> device more than twice?
>>
>> In the nvme thermal zone patchset[1], thernal zone is registered in
>> nvme_init_identify and unregistered in nvme_stop_ctrl().
>>
>
> Doesn't that mean that the initialization should happen in nvme_start_ctrl()
> and not here ?

I think calling it from nvme_init_identify is fine, it just needs to
be in the "if (!ctrl->identified)" section of that function.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-31  2:54     ` Guenter Roeck
@ 2019-10-31 13:46       ` Christoph Hellwig
  0 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2019-10-31 13:46 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Sagi Grimberg, Linux PM, LKML, linux-nvme, Akinobu Mita,
	Jens Axboe, Keith Busch, Christoph Hellwig, Chris Healy

On Wed, Oct 30, 2019 at 07:54:47PM -0700, Guenter Roeck wrote:
> On 10/30/19 7:05 AM, Christoph Hellwig wrote:
>> On Wed, Oct 30, 2019 at 08:16:48PM +0900, Akinobu Mita wrote:
>>> The nvme_init_identify() can be called multiple time in nvme ctrl's
>>> lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
>>> should we need to prevent nvme_hwmon_init() from registering hwmon
>>> device more than twice?
>>>
>>> In the nvme thermal zone patchset[1], thernal zone is registered in
>>> nvme_init_identify and unregistered in nvme_stop_ctrl().
>>
>> So Guenter said above the thermal subsystem could use the information
>> from hwmon as well.  Does this mean this patch would solve your needs
>> as well?
>>
> Depends on the requirements. Unlike hwmon/iio, we don't have clear
> guidelines describing when thermal vs. hwmon would be a better choice.
> There is some interconnect between thermal and hwmon, but quite often
> it is a one-way street (hwmon devices can easily register thermal
> zones, for thermal zone devices it is a bit more difficult to register
> associated hwmon devices).

I'd like to hear from Akinobu-san if this also solves the thermal zone
use case.  If so I'd be much happier as we at least solve two use cases
with one patch.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-30 14:05   ` Christoph Hellwig
  2019-10-31  2:54     ` Guenter Roeck
@ 2019-10-31 13:46     ` Akinobu Mita
  1 sibling, 0 replies; 16+ messages in thread
From: Akinobu Mita @ 2019-10-31 13:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Sagi Grimberg, Linux PM, LKML, linux-nvme, Jens Axboe,
	Chris Healy, Keith Busch, Guenter Roeck

2019年10月30日(水) 23:05 Christoph Hellwig <hch@lst.de>:
>
> On Wed, Oct 30, 2019 at 08:16:48PM +0900, Akinobu Mita wrote:
> > The nvme_init_identify() can be called multiple time in nvme ctrl's
> > lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
> > should we need to prevent nvme_hwmon_init() from registering hwmon
> > device more than twice?
> >
> > In the nvme thermal zone patchset[1], thernal zone is registered in
> > nvme_init_identify and unregistered in nvme_stop_ctrl().
>
> So Guenter said above the thermal subsystem could use the information
> from hwmon as well.  Does this mean this patch would solve your needs
> as well?

The main reason I chose thermal framework was to utilize the temperature
threshold feature for notification on crossing a trip point temperature
without polling for smart log.

But the device I used for testing doesn't seem to report asynchronous
event immediately, so I'm not fully sure that's useful for now.

I have no problem with this nvme hwmon patch.  Maybe we can integrate
the temperature threshold feature into the nvme hwmon afterward.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-31 13:45     ` Christoph Hellwig
@ 2019-10-31 17:54       ` Guenter Roeck
  0 siblings, 0 replies; 16+ messages in thread
From: Guenter Roeck @ 2019-10-31 17:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Sagi Grimberg, Linux PM, Akinobu Mita, linux-nvme, LKML,
	Jens Axboe, Keith Busch, Chris Healy

On Thu, Oct 31, 2019 at 02:45:49PM +0100, Christoph Hellwig wrote:
> On Wed, Oct 30, 2019 at 07:20:37PM -0700, Guenter Roeck wrote:
> >> The nvme_init_identify() can be called multiple time in nvme ctrl's
> >> lifetime (e.g 'nvme reset /dev/nvme*' or suspend/resume paths), so
> >> should we need to prevent nvme_hwmon_init() from registering hwmon
> >> device more than twice?
> >>
> >> In the nvme thermal zone patchset[1], thernal zone is registered in
> >> nvme_init_identify and unregistered in nvme_stop_ctrl().
> >>
> >
> > Doesn't that mean that the initialization should happen in nvme_start_ctrl()
> > and not here ?
> 
> I think calling it from nvme_init_identify is fine, it just needs to
> be in the "if (!ctrl->identified)" section of that function.

Excellent, I'll do that. Thanks a lot for the hint!

Guenter

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-10-30  0:53 ` Keith Busch
@ 2019-11-06 21:29   ` Pavel Machek
  2019-11-06 22:30     ` Guenter Roeck
  2019-11-06 23:58     ` Chris Healy
  0 siblings, 2 replies; 16+ messages in thread
From: Pavel Machek @ 2019-11-06 21:29 UTC (permalink / raw)
  To: Keith Busch
  Cc: Sagi Grimberg, linux-pm, linux-kernel, linux-nvme, Akinobu Mita,
	Jens Axboe, Chris Healy, Christoph Hellwig, Guenter Roeck

[-- Attachment #1.1: Type: text/plain, Size: 1654 bytes --]

Hi!

> > nvme devices report temperature information in the controller information
> > (for limits) and in the smart log. Currently, the only means to retrieve
> > this information is the nvme command line interface, which requires
> > super-user privileges.
> > 
> > At the same time, it would be desirable to use NVME temperature information
> > for thermal control.
> > 
> > This patch adds support to read NVME temperatures from the kernel using the
> > hwmon API and adds temperature zones for NVME drives. The thermal subsystem
> > can use this information to set thermal policies, and userspace can access
> > it using libsensors and/or the "sensors" command.
> > 
> > Example output from the "sensors" command:
> > 
> > nvme0-pci-0100
> > Adapter: PCI adapter
> > Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
> > Sensor 1:     +39.0°C
> > Sensor 2:     +41.0°C
> > 
> > Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> 
> This looks fine to me, but I'll wait a few more days to see if there are
> any additional comments..

User wants to know temperature of /dev/sda... and we already have an
userspace tools knowing about smart, etc...

pavel@amd:/data/film$ sudo hddtemp /dev/sda
/dev/sda: ST1000LM014-1EJ164: 48°C

I see we also have sensors framework but it does _not_ handle
harddrive temperatures.

Does it need some kind of unification? Should NVMe devices expose
"SMART" information in the same way other SSDs do?

Best regards,
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #1.2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

[-- Attachment #2: Type: text/plain, Size: 158 bytes --]

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-11-06 21:29   ` Pavel Machek
@ 2019-11-06 22:30     ` Guenter Roeck
  2019-11-06 23:58     ` Chris Healy
  1 sibling, 0 replies; 16+ messages in thread
From: Guenter Roeck @ 2019-11-06 22:30 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Sagi Grimberg, linux-pm, linux-kernel, linux-nvme, Akinobu Mita,
	Jens Axboe, Keith Busch, Christoph Hellwig, Chris Healy

On Wed, Nov 06, 2019 at 10:29:21PM +0100, Pavel Machek wrote:
> Hi!
> 
> > > nvme devices report temperature information in the controller information
> > > (for limits) and in the smart log. Currently, the only means to retrieve
> > > this information is the nvme command line interface, which requires
> > > super-user privileges.
> > > 
> > > At the same time, it would be desirable to use NVME temperature information
> > > for thermal control.
> > > 
> > > This patch adds support to read NVME temperatures from the kernel using the
> > > hwmon API and adds temperature zones for NVME drives. The thermal subsystem
> > > can use this information to set thermal policies, and userspace can access
> > > it using libsensors and/or the "sensors" command.
> > > 
> > > Example output from the "sensors" command:
> > > 
> > > nvme0-pci-0100
> > > Adapter: PCI adapter
> > > Composite:    +39.0°C  (high = +85.0°C, crit = +85.0°C)
> > > Sensor 1:     +39.0°C
> > > Sensor 2:     +41.0°C
> > > 
> > > Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> > 
> > This looks fine to me, but I'll wait a few more days to see if there are
> > any additional comments..
> 
> User wants to know temperature of /dev/sda... and we already have an
> userspace tools knowing about smart, etc...
> 
> pavel@amd:/data/film$ sudo hddtemp /dev/sda
> /dev/sda: ST1000LM014-1EJ164: 48°C
> 
> I see we also have sensors framework but it does _not_ handle
> harddrive temperatures.
> 
> Does it need some kind of unification? Should NVMe devices expose
> "SMART" information in the same way other SSDs do?
> 

The unification to report hardware monitoring information to userspace
is called the sensors framework. Also, users in general prefer to not
have to run "sudo" to get such information.

Guenter

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] nvme: Add hardware monitoring support
  2019-11-06 21:29   ` Pavel Machek
  2019-11-06 22:30     ` Guenter Roeck
@ 2019-11-06 23:58     ` Chris Healy
  1 sibling, 0 replies; 16+ messages in thread
From: Chris Healy @ 2019-11-06 23:58 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Sagi Grimberg, linux-pm, linux-kernel, linux-nvme, Akinobu Mita,
	Jens Axboe, Keith Busch, Christoph Hellwig, Guenter Roeck

> > > Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> >
> > This looks fine to me, but I'll wait a few more days to see if there are
> > any additional comments..
>
> User wants to know temperature of /dev/sda... and we already have an
> userspace tools knowing about smart, etc...

At least for my use cases, being able to use the thermal subsystem of
the kernel to cool things down when the SSD gets too hot is important.
Is there a way to do this with userspace tools feeding back to the
kernel's thermal subsystem?

>
> pavel@amd:/data/film$ sudo hddtemp /dev/sda
> /dev/sda: ST1000LM014-1EJ164: 48°C
>
> I see we also have sensors framework but it does _not_ handle
> harddrive temperatures.
>
> Does it need some kind of unification? Should NVMe devices expose
> "SMART" information in the same way other SSDs do?
>
> Best regards,
>                                                                 Pavel
> --
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, back to index

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-29 22:32 [PATCH v2] nvme: Add hardware monitoring support Guenter Roeck
2019-10-30  0:53 ` Keith Busch
2019-11-06 21:29   ` Pavel Machek
2019-11-06 22:30     ` Guenter Roeck
2019-11-06 23:58     ` Chris Healy
2019-10-30 11:16 ` Akinobu Mita
2019-10-30 14:05   ` Christoph Hellwig
2019-10-31  2:54     ` Guenter Roeck
2019-10-31 13:46       ` Christoph Hellwig
2019-10-31 13:46     ` Akinobu Mita
2019-10-31  2:20   ` Guenter Roeck
2019-10-31 13:44     ` Akinobu Mita
2019-10-31 13:45     ` Christoph Hellwig
2019-10-31 17:54       ` Guenter Roeck
2019-10-30 14:12 ` Christoph Hellwig
2019-10-30 23:40 ` Chris Healy

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org
	public-inbox-index linux-nvme

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git