linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eddie James <eajames@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: linux-hwmon@vger.kernel.org, devicetree@vger.kernel.org,
	linux-doc@vger.kernel.org, linux@roeck-us.net, jdelvare@suse.com,
	corbet@lwn.net, mark.rutland@arm.com, robh+dt@kernel.org,
	cbostic@linux.vnet.ibm.com, jk@ozlabs.org, joel@jms.id.au,
	andrew@aj.id.au, eajames@linux.vnet.ibm.com,
	"Edward A. James" <eajames@us.ibm.com>
Subject: [PATCH v2 07/10] drivers/hwmon/occ: Add error handling
Date: Thu, 27 Jul 2017 09:30:26 -0500	[thread overview]
Message-ID: <1501165829-12395-8-git-send-email-eajames@linux.vnet.ibm.com> (raw)
In-Reply-To: <1501165829-12395-1-git-send-email-eajames@linux.vnet.ibm.com>

From: "Edward A. James" <eajames@us.ibm.com>

Add logic to detect a number of error scenarios on the OCC. Export any
errors through an additional non-hwmon device attribute. The error
counting and state verification are required by the OCC hardware
specification.

Signed-off-by: Edward A. James <eajames@us.ibm.com>
---
 drivers/hwmon/occ/common.c | 82 ++++++++++++++++++++++++++++++++++++++++++++--
 drivers/hwmon/occ/common.h |  4 +++
 2 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index 2459e12..970b31f 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -17,6 +17,11 @@
 
 #include "common.h"
 
+#define OCC_ERROR_COUNT_THRESHOLD	2	/* OCC HW defined */
+
+#define OCC_STATE_SAFE			4
+#define OCC_SAFE_TIMEOUT		msecs_to_jiffies(60000) /* 1 min */
+
 #define OCC_UPDATE_FREQUENCY		msecs_to_jiffies(1000)
 
 /* OCC status bits */
@@ -126,10 +131,33 @@ struct extended_sensor {
 	u8 data[6];
 } __packed;
 
+static atomic_t occs_present = ATOMIC_INIT(0);
+
+static ssize_t occ_show_error(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct occ *occ = dev_get_drvdata(dev);
+
+	return snprintf(buf, PAGE_SIZE - 1, "%d\n", occ->error);
+}
+
+static DEVICE_ATTR(occ_error, 0444, occ_show_error, NULL);
+
+/* Notify user if we have an error and a change in error state. */
+static void occ_notify_error(struct occ *occ, int old_error)
+{
+	/* check hwmon pointer to verify error attribute has been added */
+	if (occ->error != old_error && occ->error && occ->hwmon)
+		sysfs_notify(&occ->bus_dev->kobj, NULL,
+			     dev_attr_occ_error.attr.name);
+}
+
 static int occ_poll(struct occ *occ)
 {
+	struct occ_poll_response_header *header;
 	u16 checksum = occ->poll_cmd_data + 1;
 	u8 cmd[8];
+	int rc, old_error = occ->error;
 
 	/* big endian */
 	cmd[0] = 0;			/* sequence number */
@@ -142,12 +170,47 @@ static int occ_poll(struct occ *occ)
 	cmd[7] = 0;
 
 	/* mutex should already be locked if necessary */
-	return occ->send_cmd(occ, cmd);
+	rc = occ->send_cmd(occ, cmd);
+	if (rc) {
+		if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD)
+			occ->error = rc;
+
+		goto done;
+	}
+
+	/* clear error since communication was successful */
+	occ->error_count = 0;
+	occ->error = 0;
+
+	header = (struct occ_poll_response_header *)occ->resp.data;
+	/* check for safe state */
+	if (header->occ_state == OCC_STATE_SAFE) {
+		if (occ->last_safe) {
+			if (time_after(jiffies,
+				       occ->last_safe + OCC_SAFE_TIMEOUT))
+				occ->error = -EHOSTDOWN;
+		} else {
+			occ->last_safe = jiffies;
+		}
+	} else {
+		occ->last_safe = 0;
+	}
+
+	if (header->status & OCC_STAT_MASTER) {
+		/* check if we're missing any OCCs */
+		if (hweight8(header->occs_present) !=
+		    atomic_read(&occs_present))
+			occ->error = -ENXIO;
+	}
+
+done:
+	occ_notify_error(occ, old_error);
+	return rc;
 }
 
 static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
 {
-	int rc;
+	int rc, old_error = occ->error;
 	u8 cmd[8];
 	u16 checksum = 0x24;
 	__be16 user_power_cap_be = cpu_to_be16(user_power_cap);
@@ -171,6 +234,16 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
 
 	mutex_unlock(&occ->lock);
 
+	if (rc) {
+		if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD)
+			occ->error = rc;
+	} else {
+		/* successful communication so clear the error */
+		occ->error_count = 0;
+		occ->error = 0;
+	}
+
+	occ_notify_error(occ, old_error);
 	return rc;
 }
 
@@ -1110,6 +1183,7 @@ static ssize_t occ_show_status(struct device *dev,
 	&sensor_dev_attr_occ_mem_throttle.dev_attr.attr,
 	&sensor_dev_attr_occ_quick_drop.dev_attr.attr,
 	&sensor_dev_attr_occ_status.dev_attr.attr,
+	&dev_attr_occ_error.attr,
 	NULL
 };
 
@@ -1197,6 +1271,8 @@ int occ_setup(struct occ *occ, const char *name)
 		return rc;
 	}
 
+	atomic_inc(&occs_present);
+
 	rc = sysfs_create_group(&occ->bus_dev->kobj, &occ_attr_group);
 	if (rc)
 		dev_warn(occ->bus_dev, "failed to create status attrs: %d\n",
@@ -1208,4 +1284,6 @@ int occ_setup(struct occ *occ, const char *name)
 void occ_shutdown(struct occ *occ)
 {
 	sysfs_remove_group(&occ->bus_dev->kobj, &occ_attr_group);
+
+	atomic_dec(&occs_present);
 }
diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
index dc9e06d..cef2174 100644
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -107,6 +107,10 @@ struct occ {
 	struct occ_attribute *attrs;
 	struct attribute_group group;
 	const struct attribute_group *groups[2];
+
+	int error;
+	unsigned int error_count;	/* number of errors observed */
+	unsigned long last_safe;	/* time OCC entered safe state */
 };
 
 int occ_setup(struct occ *occ, const char *name);
-- 
1.8.3.1

  parent reply	other threads:[~2017-07-27 14:31 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-27 14:30 [PATCH v2 00/10] drivers/hwmon: Add On-Chip Controller (OCC) hwmon driver Eddie James
2017-07-27 14:30 ` [PATCH v2 01/10] " Eddie James
2017-07-27 14:30 ` [PATCH v2 02/10] drivers/hwmon/occ: Add command transport method for P8 and P9 Eddie James
2017-07-29 16:48   ` kbuild test robot
2017-07-27 14:30 ` [PATCH v2 03/10] drivers/hwmon/occ: Parse OCC poll response Eddie James
2017-07-27 14:30 ` [PATCH v2 04/10] drivers/hwmon/occ: Add sensor types and versions Eddie James
2017-07-27 14:30 ` [PATCH v2 05/10] drivers/hwmon/occ: Add sensor attributes and register hwmon device Eddie James
2017-07-27 14:30 ` [PATCH v2 06/10] drivers/hwmon/occ: Add non-hwmon attributes Eddie James
2017-07-27 14:30 ` Eddie James [this message]
2017-07-27 14:30 ` [PATCH v2 08/10] Documentation: hwmon: Add OCC documentation Eddie James
2017-07-27 14:30 ` [PATCH v2 09/10] Documentation: ABI: Add occ-hwmon driver sysfs documentation Eddie James
2017-07-27 14:30 ` [PATCH v2 10/10] dt-bindings: i2c: Add P8 OCC hwmon driver documentation Eddie James
2017-08-03 20:24   ` Rob Herring

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1501165829-12395-8-git-send-email-eajames@linux.vnet.ibm.com \
    --to=eajames@linux.vnet.ibm.com \
    --cc=andrew@aj.id.au \
    --cc=cbostic@linux.vnet.ibm.com \
    --cc=corbet@lwn.net \
    --cc=devicetree@vger.kernel.org \
    --cc=eajames@us.ibm.com \
    --cc=jdelvare@suse.com \
    --cc=jk@ozlabs.org \
    --cc=joel@jms.id.au \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-hwmon@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@roeck-us.net \
    --cc=mark.rutland@arm.com \
    --cc=robh+dt@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).