linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
@ 2021-05-31 12:04 sumesh.k.naduvalath
  2021-05-31 14:54 ` Hans de Goede
  0 siblings, 1 reply; 6+ messages in thread
From: sumesh.k.naduvalath @ 2021-05-31 12:04 UTC (permalink / raw)
  To: hdegoede, mgross, srinivas.pandruvada
  Cc: srinivas.pandruvada, platform-driver-x86, linux-kernel,
	ganapathi.chinnu, nachiketa.kumar, sumesh.k.naduvalath

From: "K Naduvalath, Sumesh" <sumesh.k.naduvalath@intel.com>

This driver is for accessing the PSE(Programmable Service Engine), an
Embedded Controller like IP, using ISHTP(Integratd Sensor Hub Transport
Protocol) to get battery, thermal and UCSI(USB Type-C Connector System
Software Interface) related data from the platform.

Signed-off-by: K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>
Reviewed-by: Mark Gross <mgross@linux.inel.com>
---
 MAINTAINERS                               |   6 +
 drivers/platform/x86/Kconfig              |  13 +
 drivers/platform/x86/Makefile             |   1 +
 drivers/platform/x86/intel_ishtp_eclite.c | 664 ++++++++++++++++++++++
 4 files changed, 684 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 503fd21901f1..cf32033cb754 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9242,6 +9242,12 @@ F:	Documentation/admin-guide/media/ipu3_rcb.svg
 F:	Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
 F:	drivers/staging/media/ipu3/
 
+INTEL ISHTP ECLITE DRIVER
+M:	Sumesh K Naduvalath <sumesh.k.naduvalath@intel.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Supported
+F:	drivers/platform/x86/intel_ishtp_eclite.c
+
 INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
 M:	Krzysztof Halasa <khalasa@piap.pl>
 S:	Maintained
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 60592fb88e7a..cfa2cb150909 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1180,6 +1180,19 @@ config INTEL_CHTDC_TI_PWRBTN
 	  To compile this driver as a module, choose M here: the module
 	  will be called intel_chtdc_ti_pwrbtn.
 
+config INTEL_ISHTP_ECLITE
+	tristate "Intel ISHTP eclite controller"
+	depends on INTEL_ISH_HID
+	depends on ACPI
+	help
+	  This driver is for accessing the PSE(Programmable Service Engine),
+	  an Embedded Controller like IP, using ISHTP(Integratd Sensor Hub
+	  Transport Protocol) to get battery, thermal and UCSI (USB Type-C
+          Connector System Software Interface) related data from the platform.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called intel_ishtp_eclite
+
 config INTEL_MRFLD_PWRBTN
 	tristate "Intel Merrifield Basin Cove power button driver"
 	depends on INTEL_SOC_PMIC_MRFLD
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index dcc8cdb95b4d..72ef4761b762 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_INTEL_INT0002_VGPIO)	+= intel_int0002_vgpio.o
 obj-$(CONFIG_INTEL_MENLOW)		+= intel_menlow.o
 obj-$(CONFIG_INTEL_OAKTRAIL)		+= intel_oaktrail.o
 obj-$(CONFIG_INTEL_VBTN)		+= intel-vbtn.o
+obj-$(CONFIG_INTEL_ISHTP_ECLITE)	+= intel_ishtp_eclite.o
 
 # MSI
 obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
diff --git a/drivers/platform/x86/intel_ishtp_eclite.c b/drivers/platform/x86/intel_ishtp_eclite.c
new file mode 100644
index 000000000000..2956d678a420
--- /dev/null
+++ b/drivers/platform/x86/intel_ishtp_eclite.c
@@ -0,0 +1,664 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel ECLite opregion driver for talking to ECLite firmware running on
+ * Intel Integrated Sensor Hub (ISH) using ISH Transport Protocol (ISHTP)
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/intel-ish-client-if.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/uaccess.h>
+
+#define ECLITE_DATA_OPREGION_ID	0x9E
+#define ECLITE_CMD_OPREGION_ID	0x9F
+
+#define ECL_MSG_DATA	0x1
+#define ECL_MSG_EVENT	0x2
+
+#define ECL_ISH_READ	0x1
+#define ECL_ISH_WRITE	0x2
+#define ECL_ISH_HEADER_VERSION	0
+
+#define ECL_CL_RX_RING_SIZE	16
+#define ECL_CL_TX_RING_SIZE	8
+
+#define ECL_DATA_OPR_BUFLEN	384
+#define ECL_EVENTS_NOTIFY	333
+
+#define cmd_opr_offsetof(element)	offsetof(struct opregion_cmd, element)
+#define cl_data_to_dev(opr_dev)	ishtp_device((opr_dev)->cl_device)
+
+#ifndef BITS_TO_BYTES
+#define BITS_TO_BYTES(x) ((x) / 8)
+#endif
+
+struct opregion_cmd {
+	unsigned int command;
+	unsigned int offset;
+	unsigned int length;
+	unsigned int event_id;
+};
+
+struct opregion_data {
+	char data[ECL_DATA_OPR_BUFLEN];
+};
+
+struct opregion_context {
+	struct opregion_cmd cmd_area;
+	struct opregion_data data_area;
+};
+
+struct ecl_message_header {
+	unsigned int version:2;
+	unsigned int data_type:2;
+	unsigned int request_type:2;
+	unsigned int offset:9;
+	unsigned int data_len:9;
+	unsigned int event:8;
+};
+
+struct ecl_message {
+	struct ecl_message_header header;
+	char payload[ECL_DATA_OPR_BUFLEN];
+};
+
+struct ishtp_opregion_dev {
+	struct opregion_context opr_context;
+	struct ishtp_cl *ecl_ishtp_cl;
+	struct ishtp_cl_device *cl_device;
+	struct ishtp_fw_client *fw_client;
+	struct ishtp_cl_rb *rb;
+	struct acpi_handle *acpi_handle;
+	unsigned int dsm_event_id;
+	unsigned int ish_link_ready;
+	unsigned int ish_read_done;
+	unsigned int acpi_init_done;
+	wait_queue_head_t read_wait;
+	struct work_struct event_work;
+	struct work_struct reset_work;
+};
+
+/* eclite ishtp client UUID: 6a19cc4b-d760-4de3-b14d-f25ebd0fbcd9 */
+static const guid_t ecl_ishtp_guid =
+	GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
+		  0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9);
+
+/* ACPI DSM UUID: 91d936a7-1f01-49c6-a6b4-72f00ad8d8a5 */
+static const guid_t ecl_acpi_guid =
+	GUID_INIT(0x91d936a7, 0x1f01, 0x49c6, 0xa6,
+		  0xb4, 0x72, 0xf0, 0x0a, 0xd8, 0xd8, 0xa5);
+
+/**
+ * ecl_ish_cl_read() - Read data from eclite FW
+ *
+ * @opr_dev:  pointer to opregion device
+ *
+ * This function issues a read request to eclite FW and waits until it
+ * receives a response. When response is received the read data is copied to
+ * opregion buffer.
+ */
+static int ecl_ish_cl_read(struct ishtp_opregion_dev *opr_dev)
+{
+	struct ecl_message_header header;
+	int len, rv;
+
+	if (!opr_dev->ish_link_ready)
+		return -EIO;
+
+	header.version = ECL_ISH_HEADER_VERSION;
+	header.data_type = ECL_MSG_DATA;
+	header.request_type = ECL_ISH_READ;
+	header.offset = opr_dev->opr_context.cmd_area.offset;
+	header.data_len = opr_dev->opr_context.cmd_area.length;
+	header.event = opr_dev->opr_context.cmd_area.event_id;
+	len = sizeof(header);
+
+	opr_dev->ish_read_done = false;
+	rv = ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&header, len);
+	if (rv) {
+		dev_err(cl_data_to_dev(opr_dev), "ish-read : send failed\n");
+		return -EIO;
+	}
+
+	dev_dbg(cl_data_to_dev(opr_dev),
+		"[ish_rd] Req: off : %x, len : %x\n",
+		header.offset,
+		header.data_len);
+
+	rv = wait_event_interruptible_timeout(opr_dev->read_wait,
+					      opr_dev->ish_read_done,
+					      2 * HZ);
+	if (!rv) {
+		dev_err(cl_data_to_dev(opr_dev),
+			"[ish_rd] No response from firmware\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ecl_ish_cl_write() - This function writes data to eclite FW.
+ *
+ * @opr_dev:  pointer to opregion device
+ *
+ * This function writes data to eclite FW.
+ */
+static int ecl_ish_cl_write(struct ishtp_opregion_dev *opr_dev)
+{
+	struct ecl_message message;
+	int len;
+
+	if (!opr_dev->ish_link_ready)
+		return -EIO;
+
+	message.header.version = ECL_ISH_HEADER_VERSION;
+	message.header.data_type = ECL_MSG_DATA;
+	message.header.request_type = ECL_ISH_WRITE;
+	message.header.offset = opr_dev->opr_context.cmd_area.offset;
+	message.header.data_len = opr_dev->opr_context.cmd_area.length;
+	message.header.event = opr_dev->opr_context.cmd_area.event_id;
+	len = sizeof(struct ecl_message_header) + message.header.data_len;
+
+	memcpy(message.payload,
+	       opr_dev->opr_context.data_area.data + message.header.offset,
+	       message.header.data_len);
+
+	dev_dbg(cl_data_to_dev(opr_dev),
+		"[ish_wr] off : %x, len : %x\n",
+		message.header.offset,
+		message.header.data_len);
+
+	return ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&message, len);
+}
+
+static acpi_status
+ecl_opregion_cmd_handler(u32 function, acpi_physical_address address,
+			 u32 bits, u64 *value64,
+			 void *handler_context, void *region_context)
+{
+	struct ishtp_opregion_dev *opr_dev;
+	struct opregion_cmd *cmd;
+
+	if (!region_context || !value64)
+		return AE_BAD_PARAMETER;
+
+	if (function == ACPI_READ)
+		return AE_ERROR;
+
+	opr_dev = (struct ishtp_opregion_dev *)region_context;
+	cmd = &opr_dev->opr_context.cmd_area;
+
+	switch (address) {
+	case cmd_opr_offsetof(command):
+		cmd->command = (u32)*value64;
+
+		if (cmd->command == ECL_ISH_READ)
+			return ecl_ish_cl_read(opr_dev);
+		else if (cmd->command == ECL_ISH_WRITE)
+			return ecl_ish_cl_write(opr_dev);
+
+		return AE_ERROR;
+
+	case cmd_opr_offsetof(offset):
+		cmd->offset = (u32)*value64;
+		break;
+	case cmd_opr_offsetof(length):
+		cmd->length = (u32)*value64;
+		break;
+	case cmd_opr_offsetof(event_id):
+		cmd->event_id = (u32)*value64;
+		break;
+	default:
+		return AE_ERROR;
+	}
+
+	return AE_OK;
+}
+
+static acpi_status
+ecl_opregion_data_handler(u32 function, acpi_physical_address address,
+			  u32 bits, u64 *value64,
+			  void *handler_context, void *region_context)
+{
+	struct ishtp_opregion_dev *opr_dev;
+	unsigned int bytes = BITS_TO_BYTES(bits);
+	void *data_addr;
+
+	if (!region_context || !value64)
+		return AE_BAD_PARAMETER;
+
+	if (address + bytes > ECL_DATA_OPR_BUFLEN)
+		return AE_BAD_PARAMETER;
+
+	opr_dev = (struct ishtp_opregion_dev *)region_context;
+	data_addr = &opr_dev->opr_context.data_area.data[address];
+
+	if (function == ACPI_READ)
+		memcpy(value64, data_addr, bytes);
+	else if (function == ACPI_WRITE)
+		memcpy(data_addr, value64, bytes);
+	else
+		return AE_BAD_PARAMETER;
+
+	return AE_OK;
+}
+
+static int acpi_opregion_init(struct ishtp_opregion_dev *opr_dev)
+{
+	acpi_status status;
+	struct acpi_device *adev;
+
+	/* Find ECLite device and install opregion handlers */
+	adev = acpi_dev_get_first_match_dev("INTC1035", NULL, -1);
+	if (!adev) {
+		dev_err(cl_data_to_dev(opr_dev), "eclite ACPI device not found\n");
+		return -ENODEV;
+	}
+
+	opr_dev->acpi_handle = adev->handle;
+	acpi_dev_put(adev);
+
+	status = acpi_install_address_space_handler(opr_dev->acpi_handle,
+						    ECLITE_CMD_OPREGION_ID,
+						    ecl_opregion_cmd_handler,
+						    NULL, opr_dev);
+	if (ACPI_FAILURE(status)) {
+		dev_err(cl_data_to_dev(opr_dev),
+			"cmd space handler install failed\n");
+		return -ENODEV;
+	}
+
+	status = acpi_install_address_space_handler(opr_dev->acpi_handle,
+						    ECLITE_DATA_OPREGION_ID,
+						    ecl_opregion_data_handler,
+						    NULL, opr_dev);
+	if (ACPI_FAILURE(status)) {
+		dev_err(cl_data_to_dev(opr_dev),
+			"data space handler install failed\n");
+
+		acpi_remove_address_space_handler(opr_dev->acpi_handle,
+						  ECLITE_CMD_OPREGION_ID,
+						  ecl_opregion_cmd_handler);
+		return -ENODEV;
+	}
+	opr_dev->acpi_init_done = true;
+
+	dev_dbg(cl_data_to_dev(opr_dev), "Opregion handlers are installed\n");
+
+	return 0;
+}
+
+static void acpi_opregion_deinit(struct ishtp_opregion_dev *opr_dev)
+{
+	acpi_remove_address_space_handler(opr_dev->acpi_handle,
+					  ECLITE_CMD_OPREGION_ID,
+					  ecl_opregion_cmd_handler);
+
+	acpi_remove_address_space_handler(opr_dev->acpi_handle,
+					  ECLITE_DATA_OPREGION_ID,
+					  ecl_opregion_data_handler);
+	opr_dev->acpi_init_done = false;
+}
+
+static void ecl_acpi_invoke_dsm(struct work_struct *work)
+{
+	struct ishtp_opregion_dev *opr_dev;
+	union acpi_object *obj;
+
+	opr_dev = container_of(work, struct ishtp_opregion_dev, event_work);
+	if (!opr_dev->acpi_init_done)
+		return;
+
+	obj = acpi_evaluate_dsm(opr_dev->acpi_handle, &ecl_acpi_guid, 0,
+				opr_dev->dsm_event_id, NULL);
+	if (!obj) {
+		dev_warn(cl_data_to_dev(opr_dev), "_DSM fn call failed\n");
+		return;
+	}
+
+	dev_dbg(cl_data_to_dev(opr_dev), "Exec DSM function code: %d success\n",
+		opr_dev->dsm_event_id);
+
+	ACPI_FREE(obj);
+}
+
+static void ecl_ish_process_rx_data(struct ishtp_opregion_dev *opr_dev)
+{
+	struct ecl_message *message =
+		(struct ecl_message *)opr_dev->rb->buffer.data;
+
+	dev_dbg(cl_data_to_dev(opr_dev),
+		"[ish_rd] Resp: off : %x, len : %x\n",
+		message->header.offset,
+		message->header.data_len);
+
+	memcpy(opr_dev->opr_context.data_area.data + message->header.offset,
+	       message->payload, message->header.data_len);
+
+	opr_dev->ish_read_done = true;
+	wake_up_interruptible(&opr_dev->read_wait);
+}
+
+static void ecl_ish_process_rx_event(struct ishtp_opregion_dev *opr_dev)
+{
+	struct ecl_message_header *header =
+		(struct ecl_message_header *)opr_dev->rb->buffer.data;
+
+	dev_dbg(cl_data_to_dev(opr_dev),
+		"[ish_ev] Evt received: %8x\n", header->event);
+
+	opr_dev->dsm_event_id = header->event;
+	schedule_work(&opr_dev->event_work);
+}
+
+static int ecl_ish_cl_enable_events(struct ishtp_opregion_dev *opr_dev,
+				    bool config_enable)
+{
+	struct ecl_message message;
+	int len;
+
+	message.header.version = ECL_ISH_HEADER_VERSION;
+	message.header.data_type = ECL_MSG_DATA;
+	message.header.request_type = ECL_ISH_WRITE;
+	message.header.offset = ECL_EVENTS_NOTIFY;
+	message.header.data_len = 1;
+	message.payload[0] = config_enable;
+
+	len = sizeof(struct ecl_message_header) + message.header.data_len;
+
+	return ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&message, len);
+}
+
+static void ecl_ishtp_cl_event_cb(struct ishtp_cl_device *cl_device)
+{
+	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
+	struct ishtp_opregion_dev *opr_dev;
+	struct ecl_message_header *header;
+	struct ishtp_cl_rb *rb;
+
+	opr_dev = ishtp_get_client_data(ecl_ishtp_cl);
+	while ((rb = ishtp_cl_rx_get_rb(opr_dev->ecl_ishtp_cl)) != NULL) {
+		opr_dev->rb = rb;
+		header = (struct ecl_message_header *)rb->buffer.data;
+
+		if (header->data_type == ECL_MSG_DATA)
+			ecl_ish_process_rx_data(opr_dev);
+		else if (header->data_type == ECL_MSG_EVENT)
+			ecl_ish_process_rx_event(opr_dev);
+		else
+			/* got an event with wrong data_type, ignore it */
+			dev_err(cl_data_to_dev(opr_dev),
+				"[ish_cb] Received wrong data_type\n");
+
+		ishtp_cl_io_rb_recycle(rb);
+	}
+}
+
+static int ecl_ishtp_cl_init(struct ishtp_cl *ecl_ishtp_cl)
+{
+	struct ishtp_opregion_dev *opr_dev =
+		ishtp_get_client_data(ecl_ishtp_cl);
+	struct ishtp_fw_client *fw_client;
+	struct ishtp_device *dev;
+	int rv;
+
+	rv = ishtp_cl_link(ecl_ishtp_cl);
+	if (rv) {
+		dev_err(cl_data_to_dev(opr_dev), "ishtp_cl_link failed\n");
+		return	rv;
+	}
+
+	dev = ishtp_get_ishtp_device(ecl_ishtp_cl);
+
+	/* Connect to FW client */
+	ishtp_set_tx_ring_size(ecl_ishtp_cl, ECL_CL_TX_RING_SIZE);
+	ishtp_set_rx_ring_size(ecl_ishtp_cl, ECL_CL_RX_RING_SIZE);
+
+	fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_guid);
+	if (!fw_client) {
+		dev_err(cl_data_to_dev(opr_dev), "fw client not found\n");
+		return -ENOENT;
+	}
+
+	ishtp_cl_set_fw_client_id(ecl_ishtp_cl,
+				  ishtp_get_fw_client_id(fw_client));
+
+	ishtp_set_connection_state(ecl_ishtp_cl, ISHTP_CL_CONNECTING);
+
+	rv = ishtp_cl_connect(ecl_ishtp_cl);
+	if (rv) {
+		dev_err(cl_data_to_dev(opr_dev), "client connect failed\n");
+
+		ishtp_cl_unlink(ecl_ishtp_cl);
+		return rv;
+	}
+
+	dev_dbg(cl_data_to_dev(opr_dev), "Host connected to fw client\n");
+
+	return 0;
+}
+
+static void ecl_ishtp_cl_deinit(struct ishtp_cl *ecl_ishtp_cl)
+{
+	ishtp_cl_unlink(ecl_ishtp_cl);
+	ishtp_cl_flush_queues(ecl_ishtp_cl);
+	ishtp_cl_free(ecl_ishtp_cl);
+}
+
+static void ecl_ishtp_cl_reset_handler(struct work_struct *work)
+{
+	struct ishtp_opregion_dev *opr_dev;
+	struct ishtp_cl_device *cl_device;
+	struct ishtp_cl *ecl_ishtp_cl;
+	int rv;
+	int retry;
+
+	opr_dev = container_of(work, struct ishtp_opregion_dev, reset_work);
+
+	opr_dev->ish_link_ready = false;
+
+	cl_device = opr_dev->cl_device;
+	ecl_ishtp_cl = opr_dev->ecl_ishtp_cl;
+
+	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
+
+	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
+	if (!ecl_ishtp_cl)
+		return;
+
+	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
+	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
+
+	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
+
+	for (retry = 0; retry < 3; ++retry) {
+		rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
+		if (!rv)
+			break;
+	}
+	if (rv) {
+		ishtp_cl_free(ecl_ishtp_cl);
+		opr_dev->ecl_ishtp_cl = NULL;
+		dev_err(cl_data_to_dev(opr_dev),
+			"[ish_rst] Reset failed. Link not ready.\n");
+		return;
+	}
+
+	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
+	dev_info(cl_data_to_dev(opr_dev),
+		 "[ish_rst] Reset Success. Link ready.\n");
+
+	opr_dev->ish_link_ready = true;
+
+	if (opr_dev->acpi_init_done)
+		return;
+
+	rv = acpi_opregion_init(opr_dev);
+	if (rv) {
+		dev_err(cl_data_to_dev(opr_dev),
+			"ACPI opregion init failed\n");
+	}
+}
+
+static int ecl_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
+{
+	struct ishtp_cl *ecl_ishtp_cl;
+	struct ishtp_opregion_dev *opr_dev;
+	int rv;
+
+	opr_dev = devm_kzalloc(ishtp_device(cl_device), sizeof(*opr_dev),
+			       GFP_KERNEL);
+	if (!opr_dev)
+		return -ENOMEM;
+
+	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
+	if (!ecl_ishtp_cl)
+		return -ENOMEM;
+
+	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
+	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
+	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
+	opr_dev->cl_device = cl_device;
+
+	init_waitqueue_head(&opr_dev->read_wait);
+	INIT_WORK(&opr_dev->event_work, ecl_acpi_invoke_dsm);
+	INIT_WORK(&opr_dev->reset_work, ecl_ishtp_cl_reset_handler);
+
+	/* Initialize ish client device */
+	rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
+	if (rv) {
+		dev_err(cl_data_to_dev(opr_dev), "Client init failed\n");
+		goto err_exit;
+	}
+
+	dev_dbg(cl_data_to_dev(opr_dev), "eclite-ishtp client initialised\n");
+
+	/* Register a handler for eclite fw events */
+	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
+
+	ishtp_get_device(cl_device);
+
+	opr_dev->ish_link_ready = true;
+
+	/* Now find ACPI device and init opregion handlers */
+	rv = acpi_opregion_init(opr_dev);
+	if (rv) {
+		dev_err(cl_data_to_dev(opr_dev), "ACPI opregion init failed\n");
+
+		goto err_exit;
+	}
+
+	/* Reprobe devices depending on ECLite - battery, fan, etc. */
+	acpi_walk_dep_device_list(opr_dev->acpi_handle);
+
+	return 0;
+err_exit:
+	ishtp_set_connection_state(ecl_ishtp_cl, ISHTP_CL_DISCONNECTING);
+	ishtp_cl_disconnect(ecl_ishtp_cl);
+	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
+
+	ishtp_put_device(cl_device);
+
+	return rv;
+}
+
+static int ecl_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
+{
+	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
+	struct ishtp_opregion_dev *opr_dev =
+		ishtp_get_client_data(ecl_ishtp_cl);
+
+	if (opr_dev->acpi_init_done)
+		acpi_opregion_deinit(opr_dev);
+
+	cancel_work_sync(&opr_dev->reset_work);
+	cancel_work_sync(&opr_dev->event_work);
+
+	ishtp_set_connection_state(ecl_ishtp_cl, ISHTP_CL_DISCONNECTING);
+	ishtp_cl_disconnect(ecl_ishtp_cl);
+	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
+
+	ishtp_put_device(cl_device);
+
+	return 0;
+}
+
+static int ecl_ishtp_cl_reset(struct ishtp_cl_device *cl_device)
+{
+	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
+	struct ishtp_opregion_dev *opr_dev =
+		ishtp_get_client_data(ecl_ishtp_cl);
+
+	schedule_work(&opr_dev->reset_work);
+
+	return 0;
+}
+
+static int ecl_ishtp_cl_suspend(struct device *device)
+{
+	struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
+	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
+	struct ishtp_opregion_dev *opr_dev =
+		ishtp_get_client_data(ecl_ishtp_cl);
+
+	if (acpi_target_system_state() == ACPI_STATE_S0)
+		return 0;
+
+	acpi_opregion_deinit(opr_dev);
+	ecl_ish_cl_enable_events(opr_dev, false);
+
+	return 0;
+}
+
+static int ecl_ishtp_cl_resume(struct device *device)
+{
+	/* A reset is expected to call after an Sx. At this point
+	 * we are not sure if the link is up or not to restore anything,
+	 * so do nothing in resume path
+	 */
+	return 0;
+}
+
+static const struct dev_pm_ops ecl_ishtp_pm_ops = {
+	.suspend = ecl_ishtp_cl_suspend,
+	.resume = ecl_ishtp_cl_resume,
+};
+
+static struct ishtp_cl_driver ecl_ishtp_cl_driver = {
+	.name = "ishtp-eclite",
+	.guid = &ecl_ishtp_guid,
+	.probe = ecl_ishtp_cl_probe,
+	.remove = ecl_ishtp_cl_remove,
+	.reset = ecl_ishtp_cl_reset,
+	.driver.pm = &ecl_ishtp_pm_ops,
+};
+
+static int __init ecl_ishtp_init(void)
+{
+	return ishtp_cl_driver_register(&ecl_ishtp_cl_driver, THIS_MODULE);
+}
+
+static void __exit ecl_ishtp_exit(void)
+{
+	return ishtp_cl_driver_unregister(&ecl_ishtp_cl_driver);
+}
+
+late_initcall(ecl_ishtp_init);
+module_exit(ecl_ishtp_exit);
+
+MODULE_DESCRIPTION("ISH ISHTP eclite client opregion driver");
+MODULE_AUTHOR("K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>");
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ishtp:*");
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
  2021-05-31 12:04 [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver sumesh.k.naduvalath
@ 2021-05-31 14:54 ` Hans de Goede
  2021-06-01 18:24   ` K Naduvalath, Sumesh
  0 siblings, 1 reply; 6+ messages in thread
From: Hans de Goede @ 2021-05-31 14:54 UTC (permalink / raw)
  To: sumesh.k.naduvalath, mgross, srinivas.pandruvada
  Cc: srinivas.pandruvada, platform-driver-x86, linux-kernel,
	ganapathi.chinnu, nachiketa.kumar

Hi,

Thank you, I've done a quick review, which has already spotted quite
a few issues. Note I will probably do a more thorough review later
which mind find more issues, but lets start with fixing the serious
issues which this review has found.

I've a whole bunch of review remarks inline, please post a new
version fixing these.

On 5/31/21 2:04 PM, sumesh.k.naduvalath@intel.com wrote:
> From: "K Naduvalath, Sumesh" <sumesh.k.naduvalath@intel.com>
> 
> This driver is for accessing the PSE(Programmable Service Engine), an

Put a space between "PSE" and the '(' please.

> Embedded Controller like IP, using ISHTP(Integratd Sensor Hub Transport

Put a space between "ISHTP" and the '(' please. Also replace "Integratd"
with "Integrated"

> Protocol) to get battery, thermal and UCSI(USB Type-C Connector System

Put a space between "UCSI" and the '(' please.

> Software Interface) related data from the platform.
> 
> Signed-off-by: K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>
> Reviewed-by: Mark Gross <mgross@linux.inel.com>

There is a typo in the email address of Mark, please correct this.

> ---
>  MAINTAINERS                               |   6 +
>  drivers/platform/x86/Kconfig              |  13 +
>  drivers/platform/x86/Makefile             |   1 +
>  drivers/platform/x86/intel_ishtp_eclite.c | 664 ++++++++++++++++++++++
>  4 files changed, 684 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 503fd21901f1..cf32033cb754 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9242,6 +9242,12 @@ F:	Documentation/admin-guide/media/ipu3_rcb.svg
>  F:	Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
>  F:	drivers/staging/media/ipu3/
>  
> +INTEL ISHTP ECLITE DRIVER
> +M:	Sumesh K Naduvalath <sumesh.k.naduvalath@intel.com>
> +L:	platform-driver-x86@vger.kernel.org
> +S:	Supported
> +F:	drivers/platform/x86/intel_ishtp_eclite.c
> +
>  INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
>  M:	Krzysztof Halasa <khalasa@piap.pl>
>  S:	Maintained
> diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
> index 60592fb88e7a..cfa2cb150909 100644
> --- a/drivers/platform/x86/Kconfig
> +++ b/drivers/platform/x86/Kconfig
> @@ -1180,6 +1180,19 @@ config INTEL_CHTDC_TI_PWRBTN
>  	  To compile this driver as a module, choose M here: the module
>  	  will be called intel_chtdc_ti_pwrbtn.
>  
> +config INTEL_ISHTP_ECLITE
> +	tristate "Intel ISHTP eclite controller"
> +	depends on INTEL_ISH_HID
> +	depends on ACPI
> +	help
> +	  This driver is for accessing the PSE(Programmable Service Engine),
> +	  an Embedded Controller like IP, using ISHTP(Integratd Sensor Hub
> +	  Transport Protocol) to get battery, thermal and UCSI (USB Type-C
> +          Connector System Software Interface) related data from the platform.

This text has all the same issues as the commit message. Also please explain
on what sort of systems this functionality is typically found/used so that
users will have a better idea if this is something which they should enable
on their systems.

> +
> +	  To compile this driver as a module, choose M here: the module
> +	  will be called intel_ishtp_eclite
> +
>  config INTEL_MRFLD_PWRBTN
>  	tristate "Intel Merrifield Basin Cove power button driver"
>  	depends on INTEL_SOC_PMIC_MRFLD
> diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
> index dcc8cdb95b4d..72ef4761b762 100644
> --- a/drivers/platform/x86/Makefile
> +++ b/drivers/platform/x86/Makefile
> @@ -77,6 +77,7 @@ obj-$(CONFIG_INTEL_INT0002_VGPIO)	+= intel_int0002_vgpio.o
>  obj-$(CONFIG_INTEL_MENLOW)		+= intel_menlow.o
>  obj-$(CONFIG_INTEL_OAKTRAIL)		+= intel_oaktrail.o
>  obj-$(CONFIG_INTEL_VBTN)		+= intel-vbtn.o
> +obj-$(CONFIG_INTEL_ISHTP_ECLITE)	+= intel_ishtp_eclite.o
>  
>  # MSI
>  obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
> diff --git a/drivers/platform/x86/intel_ishtp_eclite.c b/drivers/platform/x86/intel_ishtp_eclite.c
> new file mode 100644
> index 000000000000..2956d678a420
> --- /dev/null
> +++ b/drivers/platform/x86/intel_ishtp_eclite.c
> @@ -0,0 +1,664 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Intel ECLite opregion driver for talking to ECLite firmware running on
> + * Intel Integrated Sensor Hub (ISH) using ISH Transport Protocol (ISHTP)
> + *
> + * Copyright (c) 2021, Intel Corporation.
> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/bitops.h>
> +#include <linux/device.h>
> +#include <linux/errno.h>
> +#include <linux/intel-ish-client-if.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/suspend.h>
> +#include <linux/types.h>
> +#include <linux/uuid.h>
> +#include <linux/uaccess.h>
> +
> +#define ECLITE_DATA_OPREGION_ID	0x9E
> +#define ECLITE_CMD_OPREGION_ID	0x9F
> +
> +#define ECL_MSG_DATA	0x1
> +#define ECL_MSG_EVENT	0x2
> +
> +#define ECL_ISH_READ	0x1
> +#define ECL_ISH_WRITE	0x2
> +#define ECL_ISH_HEADER_VERSION	0
> +
> +#define ECL_CL_RX_RING_SIZE	16
> +#define ECL_CL_TX_RING_SIZE	8
> +
> +#define ECL_DATA_OPR_BUFLEN	384
> +#define ECL_EVENTS_NOTIFY	333
> +
> +#define cmd_opr_offsetof(element)	offsetof(struct opregion_cmd, element)
> +#define cl_data_to_dev(opr_dev)	ishtp_device((opr_dev)->cl_device)
> +
> +#ifndef BITS_TO_BYTES
> +#define BITS_TO_BYTES(x) ((x) / 8)
> +#endif
> +
> +struct opregion_cmd {
> +	unsigned int command;
> +	unsigned int offset;
> +	unsigned int length;
> +	unsigned int event_id;
> +};
> +
> +struct opregion_data {
> +	char data[ECL_DATA_OPR_BUFLEN];
> +};
> +
> +struct opregion_context {
> +	struct opregion_cmd cmd_area;
> +	struct opregion_data data_area;
> +};
> +
> +struct ecl_message_header {
> +	unsigned int version:2;
> +	unsigned int data_type:2;
> +	unsigned int request_type:2;
> +	unsigned int offset:9;
> +	unsigned int data_len:9;
> +	unsigned int event:8;
> +};
> +
> +struct ecl_message {
> +	struct ecl_message_header header;
> +	char payload[ECL_DATA_OPR_BUFLEN];
> +};
> +
> +struct ishtp_opregion_dev {
> +	struct opregion_context opr_context;
> +	struct ishtp_cl *ecl_ishtp_cl;
> +	struct ishtp_cl_device *cl_device;
> +	struct ishtp_fw_client *fw_client;
> +	struct ishtp_cl_rb *rb;
> +	struct acpi_handle *acpi_handle;
> +	unsigned int dsm_event_id;
> +	unsigned int ish_link_ready;
> +	unsigned int ish_read_done;
> +	unsigned int acpi_init_done;
> +	wait_queue_head_t read_wait;
> +	struct work_struct event_work;
> +	struct work_struct reset_work;
> +};
> +
> +/* eclite ishtp client UUID: 6a19cc4b-d760-4de3-b14d-f25ebd0fbcd9 */
> +static const guid_t ecl_ishtp_guid =
> +	GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
> +		  0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9);
> +
> +/* ACPI DSM UUID: 91d936a7-1f01-49c6-a6b4-72f00ad8d8a5 */
> +static const guid_t ecl_acpi_guid =
> +	GUID_INIT(0x91d936a7, 0x1f01, 0x49c6, 0xa6,
> +		  0xb4, 0x72, 0xf0, 0x0a, 0xd8, 0xd8, 0xa5);
> +
> +/**
> + * ecl_ish_cl_read() - Read data from eclite FW
> + *
> + * @opr_dev:  pointer to opregion device
> + *
> + * This function issues a read request to eclite FW and waits until it
> + * receives a response. When response is received the read data is copied to
> + * opregion buffer.
> + */
> +static int ecl_ish_cl_read(struct ishtp_opregion_dev *opr_dev)
> +{
> +	struct ecl_message_header header;
> +	int len, rv;
> +
> +	if (!opr_dev->ish_link_ready)
> +		return -EIO;
> +
> +	header.version = ECL_ISH_HEADER_VERSION;
> +	header.data_type = ECL_MSG_DATA;
> +	header.request_type = ECL_ISH_READ;
> +	header.offset = opr_dev->opr_context.cmd_area.offset;
> +	header.data_len = opr_dev->opr_context.cmd_area.length;

You should check here that offset + data_len does not exceed
ECL_DATA_OPR_BUFLEN.

> +	header.event = opr_dev->opr_context.cmd_area.event_id;
> +	len = sizeof(header);
> +
> +	opr_dev->ish_read_done = false;
> +	rv = ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&header, len);
> +	if (rv) {
> +		dev_err(cl_data_to_dev(opr_dev), "ish-read : send failed\n");
> +		return -EIO;
> +	}
> +
> +	dev_dbg(cl_data_to_dev(opr_dev),
> +		"[ish_rd] Req: off : %x, len : %x\n",
> +		header.offset,
> +		header.data_len);
> +
> +	rv = wait_event_interruptible_timeout(opr_dev->read_wait,
> +					      opr_dev->ish_read_done,
> +					      2 * HZ);
> +	if (!rv) {
> +		dev_err(cl_data_to_dev(opr_dev),
> +			"[ish_rd] No response from firmware\n");
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * ecl_ish_cl_write() - This function writes data to eclite FW.
> + *
> + * @opr_dev:  pointer to opregion device
> + *
> + * This function writes data to eclite FW.
> + */
> +static int ecl_ish_cl_write(struct ishtp_opregion_dev *opr_dev)
> +{
> +	struct ecl_message message;
> +	int len;
> +
> +	if (!opr_dev->ish_link_ready)
> +		return -EIO;
> +
> +	message.header.version = ECL_ISH_HEADER_VERSION;
> +	message.header.data_type = ECL_MSG_DATA;
> +	message.header.request_type = ECL_ISH_WRITE;
> +	message.header.offset = opr_dev->opr_context.cmd_area.offset;
> +	message.header.data_len = opr_dev->opr_context.cmd_area.length;
> +	message.header.event = opr_dev->opr_context.cmd_area.event_id;
> +	len = sizeof(struct ecl_message_header) + message.header.data_len;
> +

You should check here that offset + data_len does not exceed ECL_DATA_OPR_BUFLEN,
before doing the memcpy.

> +	memcpy(message.payload,
> +	       opr_dev->opr_context.data_area.data + message.header.offset,
> +	       message.header.data_len);
> +
> +	dev_dbg(cl_data_to_dev(opr_dev),
> +		"[ish_wr] off : %x, len : %x\n",
> +		message.header.offset,
> +		message.header.data_len);
> +
> +	return ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&message, len);
> +}
> +
> +static acpi_status
> +ecl_opregion_cmd_handler(u32 function, acpi_physical_address address,
> +			 u32 bits, u64 *value64,
> +			 void *handler_context, void *region_context)
> +{
> +	struct ishtp_opregion_dev *opr_dev;
> +	struct opregion_cmd *cmd;
> +
> +	if (!region_context || !value64)
> +		return AE_BAD_PARAMETER;
> +
> +	if (function == ACPI_READ)
> +		return AE_ERROR;
> +
> +	opr_dev = (struct ishtp_opregion_dev *)region_context;
> +	cmd = &opr_dev->opr_context.cmd_area;

This is shared between all threads, we have had issues with
sharing opregion context between threads like this in the past.

What is stopping ACPI code from trying to use the opr_context
from multiple threads at the same time, messing things up?

I'm especially worried about the offset + data_len used in
various places, even if we add checks for this, this could
be changed underneath us by another thread.

You should add a mutex to the opr_dev and lock it in this
function so that the cmd struct cannot be changed underneath
us while we are processing it.

Note this does not fully protect against races like this:

1. Thread a sets offset
2. Thread b sets a different offset
3. Thread a writes ECL_ISH_READ to command.

This will result in thread a getting the data at the offset
specified by thread b, rather then at the offset which it
requested. But there is nothing we can do about that, that needs
to be solved with synchronization at the AML level.





> +
> +	switch (address) {
> +	case cmd_opr_offsetof(command):
> +		cmd->command = (u32)*value64;
> +
> +		if (cmd->command == ECL_ISH_READ)
> +			return ecl_ish_cl_read(opr_dev);
> +		else if (cmd->command == ECL_ISH_WRITE)
> +			return ecl_ish_cl_write(opr_dev);
> +
> +		return AE_ERROR;
> +
> +	case cmd_opr_offsetof(offset):
> +		cmd->offset = (u32)*value64;
> +		break;
> +	case cmd_opr_offsetof(length):
> +		cmd->length = (u32)*value64;
> +		break;
> +	case cmd_opr_offsetof(event_id):
> +		cmd->event_id = (u32)*value64;
> +		break;
> +	default:
> +		return AE_ERROR;
> +	}
> +
> +	return AE_OK;
> +}
> +
> +static acpi_status
> +ecl_opregion_data_handler(u32 function, acpi_physical_address address,
> +			  u32 bits, u64 *value64,
> +			  void *handler_context, void *region_context)
> +{
> +	struct ishtp_opregion_dev *opr_dev;
> +	unsigned int bytes = BITS_TO_BYTES(bits);
> +	void *data_addr;
> +
> +	if (!region_context || !value64)
> +		return AE_BAD_PARAMETER;
> +
> +	if (address + bytes > ECL_DATA_OPR_BUFLEN)
> +		return AE_BAD_PARAMETER;
> +
> +	opr_dev = (struct ishtp_opregion_dev *)region_context;
> +	data_addr = &opr_dev->opr_context.data_area.data[address];
> +
> +	if (function == ACPI_READ)
> +		memcpy(value64, data_addr, bytes);
> +	else if (function == ACPI_WRITE)
> +		memcpy(data_addr, value64, bytes);

What if bits is not a multiple of 8? Then we have just overwritten
a bunch of bits which the caller did not request us to do.

Since the caller specifies bits, this should really do a read-modify-write
of the last byte when there are any "leftover" bits. ?  What does the
documentation say about this?

> +	else
> +		return AE_BAD_PARAMETER;
> +
> +	return AE_OK;
> +}
> +
> +static int acpi_opregion_init(struct ishtp_opregion_dev *opr_dev)
> +{
> +	acpi_status status;
> +	struct acpi_device *adev;
> +
> +	/* Find ECLite device and install opregion handlers */
> +	adev = acpi_dev_get_first_match_dev("INTC1035", NULL, -1);
> +	if (!adev) {
> +		dev_err(cl_data_to_dev(opr_dev), "eclite ACPI device not found\n");
> +		return -ENODEV;
> +	}
> +
> +	opr_dev->acpi_handle = adev->handle;

acpi_opregion_init() seem to get called on every resume, doing the
lookup is only necessary once, after that the cached value in opr_dev->acpi_handle
can be reused.

More importantly this whole dance of unregistering + re-registering the
opregion seems unnecessary. You already have ish_link_ready in case the
opregion gets called before things are ready; and if the opregion is called
when the link is not ready, still having the opregion handler in place
allows you to log a sensible error about what is going on which is what
we want.

> +	acpi_dev_put(adev);
> +
> +	status = acpi_install_address_space_handler(opr_dev->acpi_handle,
> +						    ECLITE_CMD_OPREGION_ID,
> +						    ecl_opregion_cmd_handler,
> +						    NULL, opr_dev);
> +	if (ACPI_FAILURE(status)) {
> +		dev_err(cl_data_to_dev(opr_dev),
> +			"cmd space handler install failed\n");
> +		return -ENODEV;
> +	}
> +
> +	status = acpi_install_address_space_handler(opr_dev->acpi_handle,
> +						    ECLITE_DATA_OPREGION_ID,
> +						    ecl_opregion_data_handler,
> +						    NULL, opr_dev);
> +	if (ACPI_FAILURE(status)) {
> +		dev_err(cl_data_to_dev(opr_dev),
> +			"data space handler install failed\n");
> +
> +		acpi_remove_address_space_handler(opr_dev->acpi_handle,
> +						  ECLITE_CMD_OPREGION_ID,
> +						  ecl_opregion_cmd_handler);
> +		return -ENODEV;
> +	}
> +	opr_dev->acpi_init_done = true;
> +
> +	dev_dbg(cl_data_to_dev(opr_dev), "Opregion handlers are installed\n");
> +
> +	return 0;
> +}
> +
> +static void acpi_opregion_deinit(struct ishtp_opregion_dev *opr_dev)
> +{
> +	acpi_remove_address_space_handler(opr_dev->acpi_handle,
> +					  ECLITE_CMD_OPREGION_ID,
> +					  ecl_opregion_cmd_handler);
> +
> +	acpi_remove_address_space_handler(opr_dev->acpi_handle,
> +					  ECLITE_DATA_OPREGION_ID,
> +					  ecl_opregion_data_handler);
> +	opr_dev->acpi_init_done = false;
> +}
> +
> +static void ecl_acpi_invoke_dsm(struct work_struct *work)
> +{
> +	struct ishtp_opregion_dev *opr_dev;
> +	union acpi_object *obj;
> +
> +	opr_dev = container_of(work, struct ishtp_opregion_dev, event_work);
> +	if (!opr_dev->acpi_init_done)

This should probably be replaced by ish_link_ready and you should probably
log an error when this happens.

> +		return;
> +
> +	obj = acpi_evaluate_dsm(opr_dev->acpi_handle, &ecl_acpi_guid, 0,
> +				opr_dev->dsm_event_id, NULL);
> +	if (!obj) {
> +		dev_warn(cl_data_to_dev(opr_dev), "_DSM fn call failed\n");
> +		return;
> +	}
> +
> +	dev_dbg(cl_data_to_dev(opr_dev), "Exec DSM function code: %d success\n",
> +		opr_dev->dsm_event_id);
> +
> +	ACPI_FREE(obj);
> +}
> +
> +static void ecl_ish_process_rx_data(struct ishtp_opregion_dev *opr_dev)
> +{
> +	struct ecl_message *message =
> +		(struct ecl_message *)opr_dev->rb->buffer.data;
> +
> +	dev_dbg(cl_data_to_dev(opr_dev),
> +		"[ish_rd] Resp: off : %x, len : %x\n",
> +		message->header.offset,
> +		message->header.data_len);
> +

You should check here that offset + data_len does not exceed ECL_DATA_OPR_BUFLEN,
before doing the memcpy.

> +	memcpy(opr_dev->opr_context.data_area.data + message->header.offset,
> +	       message->payload, message->header.data_len);
> +
> +	opr_dev->ish_read_done = true;
> +	wake_up_interruptible(&opr_dev->read_wait);
> +}
> +
> +static void ecl_ish_process_rx_event(struct ishtp_opregion_dev *opr_dev)
> +{
> +	struct ecl_message_header *header =
> +		(struct ecl_message_header *)opr_dev->rb->buffer.data;
> +
> +	dev_dbg(cl_data_to_dev(opr_dev),
> +		"[ish_ev] Evt received: %8x\n", header->event);
> +
> +	opr_dev->dsm_event_id = header->event;
> +	schedule_work(&opr_dev->event_work);
> +}
> +
> +static int ecl_ish_cl_enable_events(struct ishtp_opregion_dev *opr_dev,
> +				    bool config_enable)
> +{
> +	struct ecl_message message;
> +	int len;
> +
> +	message.header.version = ECL_ISH_HEADER_VERSION;
> +	message.header.data_type = ECL_MSG_DATA;
> +	message.header.request_type = ECL_ISH_WRITE;
> +	message.header.offset = ECL_EVENTS_NOTIFY;
> +	message.header.data_len = 1;
> +	message.payload[0] = config_enable;
> +
> +	len = sizeof(struct ecl_message_header) + message.header.data_len;
> +
> +	return ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&message, len);
> +}
> +
> +static void ecl_ishtp_cl_event_cb(struct ishtp_cl_device *cl_device)
> +{
> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> +	struct ishtp_opregion_dev *opr_dev;
> +	struct ecl_message_header *header;
> +	struct ishtp_cl_rb *rb;
> +
> +	opr_dev = ishtp_get_client_data(ecl_ishtp_cl);
> +	while ((rb = ishtp_cl_rx_get_rb(opr_dev->ecl_ishtp_cl)) != NULL) {
> +		opr_dev->rb = rb;
> +		header = (struct ecl_message_header *)rb->buffer.data;
> +
> +		if (header->data_type == ECL_MSG_DATA)
> +			ecl_ish_process_rx_data(opr_dev);
> +		else if (header->data_type == ECL_MSG_EVENT)
> +			ecl_ish_process_rx_event(opr_dev);
> +		else
> +			/* got an event with wrong data_type, ignore it */
> +			dev_err(cl_data_to_dev(opr_dev),
> +				"[ish_cb] Received wrong data_type\n");
> +
> +		ishtp_cl_io_rb_recycle(rb);
> +	}
> +}
> +
> +static int ecl_ishtp_cl_init(struct ishtp_cl *ecl_ishtp_cl)
> +{
> +	struct ishtp_opregion_dev *opr_dev =
> +		ishtp_get_client_data(ecl_ishtp_cl);
> +	struct ishtp_fw_client *fw_client;
> +	struct ishtp_device *dev;
> +	int rv;
> +
> +	rv = ishtp_cl_link(ecl_ishtp_cl);
> +	if (rv) {
> +		dev_err(cl_data_to_dev(opr_dev), "ishtp_cl_link failed\n");
> +		return	rv;
> +	}
> +
> +	dev = ishtp_get_ishtp_device(ecl_ishtp_cl);
> +
> +	/* Connect to FW client */
> +	ishtp_set_tx_ring_size(ecl_ishtp_cl, ECL_CL_TX_RING_SIZE);
> +	ishtp_set_rx_ring_size(ecl_ishtp_cl, ECL_CL_RX_RING_SIZE);
> +
> +	fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_guid);
> +	if (!fw_client) {
> +		dev_err(cl_data_to_dev(opr_dev), "fw client not found\n");
> +		return -ENOENT;
> +	}
> +
> +	ishtp_cl_set_fw_client_id(ecl_ishtp_cl,
> +				  ishtp_get_fw_client_id(fw_client));
> +
> +	ishtp_set_connection_state(ecl_ishtp_cl, ISHTP_CL_CONNECTING);
> +
> +	rv = ishtp_cl_connect(ecl_ishtp_cl);
> +	if (rv) {
> +		dev_err(cl_data_to_dev(opr_dev), "client connect failed\n");
> +
> +		ishtp_cl_unlink(ecl_ishtp_cl);
> +		return rv;
> +	}
> +
> +	dev_dbg(cl_data_to_dev(opr_dev), "Host connected to fw client\n");
> +
> +	return 0;
> +}
> +
> +static void ecl_ishtp_cl_deinit(struct ishtp_cl *ecl_ishtp_cl)
> +{
> +	ishtp_cl_unlink(ecl_ishtp_cl);
> +	ishtp_cl_flush_queues(ecl_ishtp_cl);
> +	ishtp_cl_free(ecl_ishtp_cl);
> +}
> +
> +static void ecl_ishtp_cl_reset_handler(struct work_struct *work)
> +{
> +	struct ishtp_opregion_dev *opr_dev;
> +	struct ishtp_cl_device *cl_device;
> +	struct ishtp_cl *ecl_ishtp_cl;
> +	int rv;
> +	int retry;
> +
> +	opr_dev = container_of(work, struct ishtp_opregion_dev, reset_work);
> +
> +	opr_dev->ish_link_ready = false;
> +
> +	cl_device = opr_dev->cl_device;
> +	ecl_ishtp_cl = opr_dev->ecl_ishtp_cl;
> +
> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> +
> +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
> +	if (!ecl_ishtp_cl)
> +		return;

Is this whole freeing + re-alloc of the ISHTP client here
really necessary ? This seems like overkill.

> +
> +	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
> +	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
> +
> +	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
> +
> +	for (retry = 0; retry < 3; ++retry) {
> +		rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
> +		if (!rv)
> +			break;
> +	}
> +	if (rv) {
> +		ishtp_cl_free(ecl_ishtp_cl);
> +		opr_dev->ecl_ishtp_cl = NULL;
> +		dev_err(cl_data_to_dev(opr_dev),
> +			"[ish_rst] Reset failed. Link not ready.\n");
> +		return;
> +	}
> +
> +	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
> +	dev_info(cl_data_to_dev(opr_dev),
> +		 "[ish_rst] Reset Success. Link ready.\n");
> +
> +	opr_dev->ish_link_ready = true;
> +
> +	if (opr_dev->acpi_init_done)
> +		return;
> +
> +	rv = acpi_opregion_init(opr_dev);
> +	if (rv) {
> +		dev_err(cl_data_to_dev(opr_dev),
> +			"ACPI opregion init failed\n");
> +	}
> +}
> +
> +static int ecl_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
> +{
> +	struct ishtp_cl *ecl_ishtp_cl;
> +	struct ishtp_opregion_dev *opr_dev;
> +	int rv;
> +
> +	opr_dev = devm_kzalloc(ishtp_device(cl_device), sizeof(*opr_dev),
> +			       GFP_KERNEL);
> +	if (!opr_dev)
> +		return -ENOMEM;
> +
> +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
> +	if (!ecl_ishtp_cl)
> +		return -ENOMEM;
> +
> +	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
> +	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
> +	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
> +	opr_dev->cl_device = cl_device;
> +
> +	init_waitqueue_head(&opr_dev->read_wait);
> +	INIT_WORK(&opr_dev->event_work, ecl_acpi_invoke_dsm);
> +	INIT_WORK(&opr_dev->reset_work, ecl_ishtp_cl_reset_handler);
> +
> +	/* Initialize ish client device */
> +	rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
> +	if (rv) {
> +		dev_err(cl_data_to_dev(opr_dev), "Client init failed\n");
> +		goto err_exit;
> +	}
> +
> +	dev_dbg(cl_data_to_dev(opr_dev), "eclite-ishtp client initialised\n");
> +
> +	/* Register a handler for eclite fw events */
> +	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
> +
> +	ishtp_get_device(cl_device);

This seems weird, normally in the device-model a driver being bound to
a device guarantees that that device cannot go away before the
remove callback of the driver is called.

So it seems to me that this call + the put in both the err_exit and
ecl_ishtp_cl_remove() cases can be dropped.

> +
> +	opr_dev->ish_link_ready = true;
> +
> +	/* Now find ACPI device and init opregion handlers */
> +	rv = acpi_opregion_init(opr_dev);
> +	if (rv) {
> +		dev_err(cl_data_to_dev(opr_dev), "ACPI opregion init failed\n");
> +
> +		goto err_exit;
> +	}
> +
> +	/* Reprobe devices depending on ECLite - battery, fan, etc. */
> +	acpi_walk_dep_device_list(opr_dev->acpi_handle);
> +
> +	return 0;
> +err_exit:
> +	ishtp_set_connection_state(ecl_ishtp_cl, ISHTP_CL_DISCONNECTING);
> +	ishtp_cl_disconnect(ecl_ishtp_cl);
> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> +
> +	ishtp_put_device(cl_device);
> +
> +	return rv;
> +}
> +
> +static int ecl_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
> +{
> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> +	struct ishtp_opregion_dev *opr_dev =
> +		ishtp_get_client_data(ecl_ishtp_cl);
> +
> +	if (opr_dev->acpi_init_done)
> +		acpi_opregion_deinit(opr_dev);
> +
> +	cancel_work_sync(&opr_dev->reset_work);
> +	cancel_work_sync(&opr_dev->event_work);
> +
> +	ishtp_set_connection_state(ecl_ishtp_cl, ISHTP_CL_DISCONNECTING);
> +	ishtp_cl_disconnect(ecl_ishtp_cl);
> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> +
> +	ishtp_put_device(cl_device);
> +
> +	return 0;
> +}
> +
> +static int ecl_ishtp_cl_reset(struct ishtp_cl_device *cl_device)
> +{
> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> +	struct ishtp_opregion_dev *opr_dev =
> +		ishtp_get_client_data(ecl_ishtp_cl);
> +
> +	schedule_work(&opr_dev->reset_work);
> +
> +	return 0;
> +}
> +
> +static int ecl_ishtp_cl_suspend(struct device *device)
> +{
> +	struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> +	struct ishtp_opregion_dev *opr_dev =
> +		ishtp_get_client_data(ecl_ishtp_cl);
> +
> +	if (acpi_target_system_state() == ACPI_STATE_S0)
> +		return 0;
> +
> +	acpi_opregion_deinit(opr_dev);
> +	ecl_ish_cl_enable_events(opr_dev, false);
> +
> +	return 0;
> +}
> +
> +static int ecl_ishtp_cl_resume(struct device *device)
> +{
> +	/* A reset is expected to call after an Sx. At this point
> +	 * we are not sure if the link is up or not to restore anything,
> +	 * so do nothing in resume path
> +	 */
> +	return 0;

This seems very wrong, this means that there are no resume ordering
guarantees meaning that drivers which are ordered to resume
after this driver, because they rely on the opregion may end up not being
able to use the opregion leading to all kind of issues.

IMHO what should happen here is that this driver waits for the
EClite to become ready here. Which probably means that it itself
should be only resumed after the ISH HID driver is, but I assume
that the ISH device is a parent of this device, so that ordering should
be correct automatically.

TBH the whole lets just not resume and do a reset instead and
then just tearing down the entire ISHTP client struct and setting
up a new one from scratch, just feels very wrong.  At a minimum the
teardown + bringup needs to happen before the resume callback completes,
but ideally this would be replaced by a much lighter resume solution.

Till this is fixed so that the device is guaranteed to be fully functional
once its resume callback has completed this code can not be merged,
so NACK to this patch for now.

Regards,

Hans




> +}
> +
> +static const struct dev_pm_ops ecl_ishtp_pm_ops = {
> +	.suspend = ecl_ishtp_cl_suspend,
> +	.resume = ecl_ishtp_cl_resume,
> +};
> +
> +static struct ishtp_cl_driver ecl_ishtp_cl_driver = {
> +	.name = "ishtp-eclite",
> +	.guid = &ecl_ishtp_guid,
> +	.probe = ecl_ishtp_cl_probe,
> +	.remove = ecl_ishtp_cl_remove,
> +	.reset = ecl_ishtp_cl_reset,
> +	.driver.pm = &ecl_ishtp_pm_ops,
> +};
> +
> +static int __init ecl_ishtp_init(void)
> +{
> +	return ishtp_cl_driver_register(&ecl_ishtp_cl_driver, THIS_MODULE);
> +}
> +
> +static void __exit ecl_ishtp_exit(void)
> +{
> +	return ishtp_cl_driver_unregister(&ecl_ishtp_cl_driver);
> +}
> +
> +late_initcall(ecl_ishtp_init);
> +module_exit(ecl_ishtp_exit);
> +
> +MODULE_DESCRIPTION("ISH ISHTP eclite client opregion driver");
> +MODULE_AUTHOR("K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>");
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_ALIAS("ishtp:*");
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
  2021-05-31 14:54 ` Hans de Goede
@ 2021-06-01 18:24   ` K Naduvalath, Sumesh
  2021-06-02 11:19     ` Hans de Goede
  0 siblings, 1 reply; 6+ messages in thread
From: K Naduvalath, Sumesh @ 2021-06-01 18:24 UTC (permalink / raw)
  To: Hans de Goede, mgross, srinivas.pandruvada
  Cc: Pandruvada, Srinivas, platform-driver-x86, linux-kernel, Chinnu,
	Ganapathi, Kumar, Nachiketa

Thank you Hans for the review comments. Please find the reply inline -

> -----Original Message-----
> From: Hans de Goede <hdegoede@redhat.com>
> Sent: Monday, May 31, 2021 8:25 PM
> To: K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>;
> mgross@linux.intel.com; srinivas.pandruvada@linux.intel.com
> Cc: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>; platform-driver-
> x86@vger.kernel.org; linux-kernel@vger.kernel.org; Chinnu, Ganapathi
> <ganapathi.chinnu@intel.com>; Kumar, Nachiketa
> <nachiketa.kumar@intel.com>
> Subject: Re: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
> 
> Hi,
> 
> Thank you, I've done a quick review, which has already spotted quite a few
> issues. Note I will probably do a more thorough review later which mind find
> more issues, but lets start with fixing the serious issues which this review has
> found.
> 
> I've a whole bunch of review remarks inline, please post a new version fixing
> these.
> 
> On 5/31/21 2:04 PM, sumesh.k.naduvalath@intel.com wrote:
> > From: "K Naduvalath, Sumesh" <sumesh.k.naduvalath@intel.com>
> >
> > This driver is for accessing the PSE(Programmable Service Engine), an
> 
> Put a space between "PSE" and the '(' please.
> 

Sure, Will correct in V2.

> > Embedded Controller like IP, using ISHTP(Integratd Sensor Hub
> > Transport
> 
> Put a space between "ISHTP" and the '(' please. Also replace "Integratd"
> with "Integrated"

Sure, Will correct in V2.

> 
> > Protocol) to get battery, thermal and UCSI(USB Type-C Connector System
> 
> Put a space between "UCSI" and the '(' please.

Sure, Will correct in V2.

> 
> > Software Interface) related data from the platform.
> >
> > Signed-off-by: K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>
> > Reviewed-by: Mark Gross <mgross@linux.inel.com>
> 
> There is a typo in the email address of Mark, please correct this.

Thanks, Will correct.

> 
> > ---
> >  MAINTAINERS                               |   6 +
> >  drivers/platform/x86/Kconfig              |  13 +
> >  drivers/platform/x86/Makefile             |   1 +
> >  drivers/platform/x86/intel_ishtp_eclite.c | 664
> > ++++++++++++++++++++++
> >  4 files changed, 684 insertions(+)
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS index
> > 503fd21901f1..cf32033cb754 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -9242,6 +9242,12 @@ F:	Documentation/admin-
> guide/media/ipu3_rcb.svg
> >  F:	Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
> >  F:	drivers/staging/media/ipu3/
> >
> > +INTEL ISHTP ECLITE DRIVER
> > +M:	Sumesh K Naduvalath <sumesh.k.naduvalath@intel.com>
> > +L:	platform-driver-x86@vger.kernel.org
> > +S:	Supported
> > +F:	drivers/platform/x86/intel_ishtp_eclite.c
> > +
> >  INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
> >  M:	Krzysztof Halasa <khalasa@piap.pl>
> >  S:	Maintained
> > diff --git a/drivers/platform/x86/Kconfig
> > b/drivers/platform/x86/Kconfig index 60592fb88e7a..cfa2cb150909 100644
> > --- a/drivers/platform/x86/Kconfig
> > +++ b/drivers/platform/x86/Kconfig
> > @@ -1180,6 +1180,19 @@ config INTEL_CHTDC_TI_PWRBTN
> >  	  To compile this driver as a module, choose M here: the module
> >  	  will be called intel_chtdc_ti_pwrbtn.
> >
> > +config INTEL_ISHTP_ECLITE
> > +	tristate "Intel ISHTP eclite controller"
> > +	depends on INTEL_ISH_HID
> > +	depends on ACPI
> > +	help
> > +	  This driver is for accessing the PSE(Programmable Service Engine),
> > +	  an Embedded Controller like IP, using ISHTP(Integratd Sensor Hub
> > +	  Transport Protocol) to get battery, thermal and UCSI (USB Type-C
> > +          Connector System Software Interface) related data from the
> platform.
> 
> This text has all the same issues as the commit message. Also please explain
> on what sort of systems this functionality is typically found/used so that
> users will have a better idea if this is something which they should enable on
> their systems.
> 

This functionality is enabled for the first time for Intel's Elkhartlake platform.
Users who don't want to use discrete Embedded Controller on their platform,
they can leverage the integrated solution of ECLite which is part of 
Elkhartlake's PSE subsystem. I'll add more text for the config item.

> > +
> > +	  To compile this driver as a module, choose M here: the module
> > +	  will be called intel_ishtp_eclite
> > +
> >  config INTEL_MRFLD_PWRBTN
> >  	tristate "Intel Merrifield Basin Cove power button driver"
> >  	depends on INTEL_SOC_PMIC_MRFLD
> > diff --git a/drivers/platform/x86/Makefile
> > b/drivers/platform/x86/Makefile index dcc8cdb95b4d..72ef4761b762
> > 100644
> > --- a/drivers/platform/x86/Makefile
> > +++ b/drivers/platform/x86/Makefile
> > @@ -77,6 +77,7 @@ obj-$(CONFIG_INTEL_INT0002_VGPIO)	+=
> intel_int0002_vgpio.o
> >  obj-$(CONFIG_INTEL_MENLOW)		+= intel_menlow.o
> >  obj-$(CONFIG_INTEL_OAKTRAIL)		+= intel_oaktrail.o
> >  obj-$(CONFIG_INTEL_VBTN)		+= intel-vbtn.o
> > +obj-$(CONFIG_INTEL_ISHTP_ECLITE)	+= intel_ishtp_eclite.o
> >
> >  # MSI
> >  obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
> > diff --git a/drivers/platform/x86/intel_ishtp_eclite.c
> > b/drivers/platform/x86/intel_ishtp_eclite.c
> > new file mode 100644
> > index 000000000000..2956d678a420
> > --- /dev/null
> > +++ b/drivers/platform/x86/intel_ishtp_eclite.c
> > @@ -0,0 +1,664 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Intel ECLite opregion driver for talking to ECLite firmware
> > +running on
> > + * Intel Integrated Sensor Hub (ISH) using ISH Transport Protocol
> > +(ISHTP)
> > + *
> > + * Copyright (c) 2021, Intel Corporation.
> > + */
> > +
> > +#include <linux/acpi.h>
> > +#include <linux/bitops.h>
> > +#include <linux/device.h>
> > +#include <linux/errno.h>
> > +#include <linux/intel-ish-client-if.h> #include <linux/kernel.h>
> > +#include <linux/module.h> #include <linux/slab.h> #include
> > +<linux/suspend.h> #include <linux/types.h> #include <linux/uuid.h>
> > +#include <linux/uaccess.h>
> > +
> > +#define ECLITE_DATA_OPREGION_ID	0x9E
> > +#define ECLITE_CMD_OPREGION_ID	0x9F
> > +
> > +#define ECL_MSG_DATA	0x1
> > +#define ECL_MSG_EVENT	0x2
> > +
> > +#define ECL_ISH_READ	0x1
> > +#define ECL_ISH_WRITE	0x2
> > +#define ECL_ISH_HEADER_VERSION	0
> > +
> > +#define ECL_CL_RX_RING_SIZE	16
> > +#define ECL_CL_TX_RING_SIZE	8
> > +
> > +#define ECL_DATA_OPR_BUFLEN	384
> > +#define ECL_EVENTS_NOTIFY	333
> > +
> > +#define cmd_opr_offsetof(element)	offsetof(struct
> opregion_cmd, element)
> > +#define cl_data_to_dev(opr_dev)	ishtp_device((opr_dev)->cl_device)
> > +
> > +#ifndef BITS_TO_BYTES
> > +#define BITS_TO_BYTES(x) ((x) / 8)
> > +#endif
> > +
> > +struct opregion_cmd {
> > +	unsigned int command;
> > +	unsigned int offset;
> > +	unsigned int length;
> > +	unsigned int event_id;
> > +};
> > +
> > +struct opregion_data {
> > +	char data[ECL_DATA_OPR_BUFLEN];
> > +};
> > +
> > +struct opregion_context {
> > +	struct opregion_cmd cmd_area;
> > +	struct opregion_data data_area;
> > +};
> > +
> > +struct ecl_message_header {
> > +	unsigned int version:2;
> > +	unsigned int data_type:2;
> > +	unsigned int request_type:2;
> > +	unsigned int offset:9;
> > +	unsigned int data_len:9;
> > +	unsigned int event:8;
> > +};
> > +
> > +struct ecl_message {
> > +	struct ecl_message_header header;
> > +	char payload[ECL_DATA_OPR_BUFLEN];
> > +};
> > +
> > +struct ishtp_opregion_dev {
> > +	struct opregion_context opr_context;
> > +	struct ishtp_cl *ecl_ishtp_cl;
> > +	struct ishtp_cl_device *cl_device;
> > +	struct ishtp_fw_client *fw_client;
> > +	struct ishtp_cl_rb *rb;
> > +	struct acpi_handle *acpi_handle;
> > +	unsigned int dsm_event_id;
> > +	unsigned int ish_link_ready;
> > +	unsigned int ish_read_done;
> > +	unsigned int acpi_init_done;
> > +	wait_queue_head_t read_wait;
> > +	struct work_struct event_work;
> > +	struct work_struct reset_work;
> > +};
> > +
> > +/* eclite ishtp client UUID: 6a19cc4b-d760-4de3-b14d-f25ebd0fbcd9 */
> > +static const guid_t ecl_ishtp_guid =
> > +	GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
> > +		  0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9);
> > +
> > +/* ACPI DSM UUID: 91d936a7-1f01-49c6-a6b4-72f00ad8d8a5 */ static
> > +const guid_t ecl_acpi_guid =
> > +	GUID_INIT(0x91d936a7, 0x1f01, 0x49c6, 0xa6,
> > +		  0xb4, 0x72, 0xf0, 0x0a, 0xd8, 0xd8, 0xa5);
> > +
> > +/**
> > + * ecl_ish_cl_read() - Read data from eclite FW
> > + *
> > + * @opr_dev:  pointer to opregion device
> > + *
> > + * This function issues a read request to eclite FW and waits until
> > +it
> > + * receives a response. When response is received the read data is
> > +copied to
> > + * opregion buffer.
> > + */
> > +static int ecl_ish_cl_read(struct ishtp_opregion_dev *opr_dev) {
> > +	struct ecl_message_header header;
> > +	int len, rv;
> > +
> > +	if (!opr_dev->ish_link_ready)
> > +		return -EIO;
> > +
> > +	header.version = ECL_ISH_HEADER_VERSION;
> > +	header.data_type = ECL_MSG_DATA;
> > +	header.request_type = ECL_ISH_READ;
> > +	header.offset = opr_dev->opr_context.cmd_area.offset;
> > +	header.data_len = opr_dev->opr_context.cmd_area.length;
> 
> You should check here that offset + data_len does not exceed
> ECL_DATA_OPR_BUFLEN.
> 

Sure, will add in V2.

> > +	header.event = opr_dev->opr_context.cmd_area.event_id;
> > +	len = sizeof(header);
> > +
> > +	opr_dev->ish_read_done = false;
> > +	rv = ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&header, len);
> > +	if (rv) {
> > +		dev_err(cl_data_to_dev(opr_dev), "ish-read : send
> failed\n");
> > +		return -EIO;
> > +	}
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev),
> > +		"[ish_rd] Req: off : %x, len : %x\n",
> > +		header.offset,
> > +		header.data_len);
> > +
> > +	rv = wait_event_interruptible_timeout(opr_dev->read_wait,
> > +					      opr_dev->ish_read_done,
> > +					      2 * HZ);
> > +	if (!rv) {
> > +		dev_err(cl_data_to_dev(opr_dev),
> > +			"[ish_rd] No response from firmware\n");
> > +		return -EIO;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * ecl_ish_cl_write() - This function writes data to eclite FW.
> > + *
> > + * @opr_dev:  pointer to opregion device
> > + *
> > + * This function writes data to eclite FW.
> > + */
> > +static int ecl_ish_cl_write(struct ishtp_opregion_dev *opr_dev) {
> > +	struct ecl_message message;
> > +	int len;
> > +
> > +	if (!opr_dev->ish_link_ready)
> > +		return -EIO;
> > +
> > +	message.header.version = ECL_ISH_HEADER_VERSION;
> > +	message.header.data_type = ECL_MSG_DATA;
> > +	message.header.request_type = ECL_ISH_WRITE;
> > +	message.header.offset = opr_dev->opr_context.cmd_area.offset;
> > +	message.header.data_len = opr_dev-
> >opr_context.cmd_area.length;
> > +	message.header.event = opr_dev-
> >opr_context.cmd_area.event_id;
> > +	len = sizeof(struct ecl_message_header) +
> message.header.data_len;
> > +
> 
> You should check here that offset + data_len does not exceed
> ECL_DATA_OPR_BUFLEN, before doing the memcpy.

Sure, I'll add the check.

> 
> > +	memcpy(message.payload,
> > +	       opr_dev->opr_context.data_area.data + message.header.offset,
> > +	       message.header.data_len);
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev),
> > +		"[ish_wr] off : %x, len : %x\n",
> > +		message.header.offset,
> > +		message.header.data_len);
> > +
> > +	return ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&message,
> > +len); }
> > +
> > +static acpi_status
> > +ecl_opregion_cmd_handler(u32 function, acpi_physical_address address,
> > +			 u32 bits, u64 *value64,
> > +			 void *handler_context, void *region_context) {
> > +	struct ishtp_opregion_dev *opr_dev;
> > +	struct opregion_cmd *cmd;
> > +
> > +	if (!region_context || !value64)
> > +		return AE_BAD_PARAMETER;
> > +
> > +	if (function == ACPI_READ)
> > +		return AE_ERROR;
> > +
> > +	opr_dev = (struct ishtp_opregion_dev *)region_context;
> > +	cmd = &opr_dev->opr_context.cmd_area;
> 
> This is shared between all threads, we have had issues with sharing opregion
> context between threads like this in the past.
> 
> What is stopping ACPI code from trying to use the opr_context from multiple
> threads at the same time, messing things up?
> 

This will not happen. BIOS calls ACPI methods (cmd and data in this driver) in a
SERIALIZED manner. BIOS issues another call only after finishing the first one.

> I'm especially worried about the offset + data_len used in various places,
> even if we add checks for this, this could be changed underneath us by
> another thread.

There are checks in BIOS for offset + data, but I'll add them in the driver as well.
There is no other thread accessing it. Flow below -

ACPI method --> cmd --> 
<--end ACPI method <-- 

<here no other ACPI method will execute because of serialized method>

ACPI method --> data--> 
<--end ACPI method <-- 

> 
> You should add a mutex to the opr_dev and lock it in this function so that the
> cmd struct cannot be changed underneath us while we are processing it.
> 
> Note this does not fully protect against races like this:
> 
> 1. Thread a sets offset
> 2. Thread b sets a different offset
> 3. Thread a writes ECL_ISH_READ to command.

These race conditions won't occur. No structure elements are used from two paths
simultaneously. Only element from the opregion structure used outside ACPI path is 

opr_dev->dsm_event_id from event_work workqueque.

But this element not accessed from anywhere else including serialized ACPI path.

> 
> This will result in thread a getting the data at the offset specified by thread b,
> rather then at the offset which it requested. But there is nothing we can do
> about that, that needs to be solved with synchronization at the AML level.

There is ASL serialization defined, But are you suggesting to put locks for the fairness
of coding?

> 
> 
> 
> 
> 
> > +
> > +	switch (address) {
> > +	case cmd_opr_offsetof(command):
> > +		cmd->command = (u32)*value64;
> > +
> > +		if (cmd->command == ECL_ISH_READ)
> > +			return ecl_ish_cl_read(opr_dev);
> > +		else if (cmd->command == ECL_ISH_WRITE)
> > +			return ecl_ish_cl_write(opr_dev);
> > +
> > +		return AE_ERROR;
> > +
> > +	case cmd_opr_offsetof(offset):
> > +		cmd->offset = (u32)*value64;
> > +		break;
> > +	case cmd_opr_offsetof(length):
> > +		cmd->length = (u32)*value64;
> > +		break;
> > +	case cmd_opr_offsetof(event_id):
> > +		cmd->event_id = (u32)*value64;
> > +		break;
> > +	default:
> > +		return AE_ERROR;
> > +	}
> > +
> > +	return AE_OK;
> > +}
> > +
> > +static acpi_status
> > +ecl_opregion_data_handler(u32 function, acpi_physical_address address,
> > +			  u32 bits, u64 *value64,
> > +			  void *handler_context, void *region_context) {
> > +	struct ishtp_opregion_dev *opr_dev;
> > +	unsigned int bytes = BITS_TO_BYTES(bits);
> > +	void *data_addr;
> > +
> > +	if (!region_context || !value64)
> > +		return AE_BAD_PARAMETER;
> > +
> > +	if (address + bytes > ECL_DATA_OPR_BUFLEN)
> > +		return AE_BAD_PARAMETER;
> > +
> > +	opr_dev = (struct ishtp_opregion_dev *)region_context;
> > +	data_addr = &opr_dev->opr_context.data_area.data[address];
> > +
> > +	if (function == ACPI_READ)
> > +		memcpy(value64, data_addr, bytes);
> > +	else if (function == ACPI_WRITE)
> > +		memcpy(data_addr, value64, bytes);
> 
> What if bits is not a multiple of 8? Then we have just overwritten a bunch of
> bits which the caller did not request us to do.
> 
> Since the caller specifies bits, this should really do a read-modify-write of the
> last byte when there are any "leftover" bits. ?  What does the
> documentation say about this?

The request will always be multiple of 8 bits as per ASL definition/docs.

> 
> > +	else
> > +		return AE_BAD_PARAMETER;
> > +
> > +	return AE_OK;
> > +}
> > +
> > +static int acpi_opregion_init(struct ishtp_opregion_dev *opr_dev) {
> > +	acpi_status status;
> > +	struct acpi_device *adev;
> > +
> > +	/* Find ECLite device and install opregion handlers */
> > +	adev = acpi_dev_get_first_match_dev("INTC1035", NULL, -1);
> > +	if (!adev) {
> > +		dev_err(cl_data_to_dev(opr_dev), "eclite ACPI device not
> found\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	opr_dev->acpi_handle = adev->handle;
> 
> acpi_opregion_init() seem to get called on every resume, doing the lookup is
> only necessary once, after that the cached value in opr_dev->acpi_handle
> can be reused.
> 
> More importantly this whole dance of unregistering + re-registering the
> opregion seems unnecessary. You already have ish_link_ready in case the
> opregion gets called before things are ready; and if the opregion is called
> when the link is not ready, still having the opregion handler in place allows
> you to log a sensible error about what is going on which is what we want.

Initial approach was same as you suggested ( No uninstall, just wait in ACPI
Method). But after every resume, the driver gets ACPI write and read requests
for FAN and thermal controls which fails  because link is not ready. Also we 
can't wait_event_interruptible_timeout() in ACPI method until we get the 
link_ready( link can become ready much later until PSE fully boots up after every
Sx. Anything else gets executed after this wait_event timeout fail in ACPI method.
We can't afford to miss any critical thermal/FAN related setting from standby/
hibernation resume. No requests are missed by registering opregion only after
link_ready.

> 
> > +	acpi_dev_put(adev);
> > +
> > +	status = acpi_install_address_space_handler(opr_dev->acpi_handle,
> > +
> ECLITE_CMD_OPREGION_ID,
> > +
> ecl_opregion_cmd_handler,
> > +						    NULL, opr_dev);
> > +	if (ACPI_FAILURE(status)) {
> > +		dev_err(cl_data_to_dev(opr_dev),
> > +			"cmd space handler install failed\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	status = acpi_install_address_space_handler(opr_dev->acpi_handle,
> > +
> ECLITE_DATA_OPREGION_ID,
> > +
> ecl_opregion_data_handler,
> > +						    NULL, opr_dev);
> > +	if (ACPI_FAILURE(status)) {
> > +		dev_err(cl_data_to_dev(opr_dev),
> > +			"data space handler install failed\n");
> > +
> > +		acpi_remove_address_space_handler(opr_dev-
> >acpi_handle,
> > +
> ECLITE_CMD_OPREGION_ID,
> > +
> ecl_opregion_cmd_handler);
> > +		return -ENODEV;
> > +	}
> > +	opr_dev->acpi_init_done = true;
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev), "Opregion handlers are
> > +installed\n");
> > +
> > +	return 0;
> > +}
> > +
> > +static void acpi_opregion_deinit(struct ishtp_opregion_dev *opr_dev)
> > +{
> > +	acpi_remove_address_space_handler(opr_dev->acpi_handle,
> > +					  ECLITE_CMD_OPREGION_ID,
> > +					  ecl_opregion_cmd_handler);
> > +
> > +	acpi_remove_address_space_handler(opr_dev->acpi_handle,
> > +					  ECLITE_DATA_OPREGION_ID,
> > +					  ecl_opregion_data_handler);
> > +	opr_dev->acpi_init_done = false;
> > +}
> > +
> > +static void ecl_acpi_invoke_dsm(struct work_struct *work) {
> > +	struct ishtp_opregion_dev *opr_dev;
> > +	union acpi_object *obj;
> > +
> > +	opr_dev = container_of(work, struct ishtp_opregion_dev,
> event_work);
> > +	if (!opr_dev->acpi_init_done)
> 
> This should probably be replaced by ish_link_ready and you should probably
> log an error when this happens.
> 
> > +		return;
> > +
> > +	obj = acpi_evaluate_dsm(opr_dev->acpi_handle, &ecl_acpi_guid, 0,
> > +				opr_dev->dsm_event_id, NULL);
> > +	if (!obj) {
> > +		dev_warn(cl_data_to_dev(opr_dev), "_DSM fn call
> failed\n");
> > +		return;
> > +	}
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev), "Exec DSM function code: %d
> success\n",
> > +		opr_dev->dsm_event_id);
> > +
> > +	ACPI_FREE(obj);
> > +}
> > +
> > +static void ecl_ish_process_rx_data(struct ishtp_opregion_dev
> > +*opr_dev) {
> > +	struct ecl_message *message =
> > +		(struct ecl_message *)opr_dev->rb->buffer.data;
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev),
> > +		"[ish_rd] Resp: off : %x, len : %x\n",
> > +		message->header.offset,
> > +		message->header.data_len);
> > +
> 
> You should check here that offset + data_len does not exceed
> ECL_DATA_OPR_BUFLEN, before doing the memcpy.

Agree, will be in v2.
> 
> > +	memcpy(opr_dev->opr_context.data_area.data + message-
> >header.offset,
> > +	       message->payload, message->header.data_len);
> > +
> > +	opr_dev->ish_read_done = true;
> > +	wake_up_interruptible(&opr_dev->read_wait);
> > +}
> > +
> > +static void ecl_ish_process_rx_event(struct ishtp_opregion_dev
> > +*opr_dev) {
> > +	struct ecl_message_header *header =
> > +		(struct ecl_message_header *)opr_dev->rb->buffer.data;
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev),
> > +		"[ish_ev] Evt received: %8x\n", header->event);
> > +
> > +	opr_dev->dsm_event_id = header->event;
> > +	schedule_work(&opr_dev->event_work);
> > +}
> > +
> > +static int ecl_ish_cl_enable_events(struct ishtp_opregion_dev *opr_dev,
> > +				    bool config_enable)
> > +{
> > +	struct ecl_message message;
> > +	int len;
> > +
> > +	message.header.version = ECL_ISH_HEADER_VERSION;
> > +	message.header.data_type = ECL_MSG_DATA;
> > +	message.header.request_type = ECL_ISH_WRITE;
> > +	message.header.offset = ECL_EVENTS_NOTIFY;
> > +	message.header.data_len = 1;
> > +	message.payload[0] = config_enable;
> > +
> > +	len = sizeof(struct ecl_message_header) +
> message.header.data_len;
> > +
> > +	return ishtp_cl_send(opr_dev->ecl_ishtp_cl, (uint8_t *)&message,
> > +len); }
> > +
> > +static void ecl_ishtp_cl_event_cb(struct ishtp_cl_device *cl_device)
> > +{
> > +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> > +	struct ishtp_opregion_dev *opr_dev;
> > +	struct ecl_message_header *header;
> > +	struct ishtp_cl_rb *rb;
> > +
> > +	opr_dev = ishtp_get_client_data(ecl_ishtp_cl);
> > +	while ((rb = ishtp_cl_rx_get_rb(opr_dev->ecl_ishtp_cl)) != NULL) {
> > +		opr_dev->rb = rb;
> > +		header = (struct ecl_message_header *)rb->buffer.data;
> > +
> > +		if (header->data_type == ECL_MSG_DATA)
> > +			ecl_ish_process_rx_data(opr_dev);
> > +		else if (header->data_type == ECL_MSG_EVENT)
> > +			ecl_ish_process_rx_event(opr_dev);
> > +		else
> > +			/* got an event with wrong data_type, ignore it */
> > +			dev_err(cl_data_to_dev(opr_dev),
> > +				"[ish_cb] Received wrong data_type\n");
> > +
> > +		ishtp_cl_io_rb_recycle(rb);
> > +	}
> > +}
> > +
> > +static int ecl_ishtp_cl_init(struct ishtp_cl *ecl_ishtp_cl) {
> > +	struct ishtp_opregion_dev *opr_dev =
> > +		ishtp_get_client_data(ecl_ishtp_cl);
> > +	struct ishtp_fw_client *fw_client;
> > +	struct ishtp_device *dev;
> > +	int rv;
> > +
> > +	rv = ishtp_cl_link(ecl_ishtp_cl);
> > +	if (rv) {
> > +		dev_err(cl_data_to_dev(opr_dev), "ishtp_cl_link failed\n");
> > +		return	rv;
> > +	}
> > +
> > +	dev = ishtp_get_ishtp_device(ecl_ishtp_cl);
> > +
> > +	/* Connect to FW client */
> > +	ishtp_set_tx_ring_size(ecl_ishtp_cl, ECL_CL_TX_RING_SIZE);
> > +	ishtp_set_rx_ring_size(ecl_ishtp_cl, ECL_CL_RX_RING_SIZE);
> > +
> > +	fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_guid);
> > +	if (!fw_client) {
> > +		dev_err(cl_data_to_dev(opr_dev), "fw client not found\n");
> > +		return -ENOENT;
> > +	}
> > +
> > +	ishtp_cl_set_fw_client_id(ecl_ishtp_cl,
> > +				  ishtp_get_fw_client_id(fw_client));
> > +
> > +	ishtp_set_connection_state(ecl_ishtp_cl, ISHTP_CL_CONNECTING);
> > +
> > +	rv = ishtp_cl_connect(ecl_ishtp_cl);
> > +	if (rv) {
> > +		dev_err(cl_data_to_dev(opr_dev), "client connect
> failed\n");
> > +
> > +		ishtp_cl_unlink(ecl_ishtp_cl);
> > +		return rv;
> > +	}
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev), "Host connected to fw
> client\n");
> > +
> > +	return 0;
> > +}
> > +
> > +static void ecl_ishtp_cl_deinit(struct ishtp_cl *ecl_ishtp_cl) {
> > +	ishtp_cl_unlink(ecl_ishtp_cl);
> > +	ishtp_cl_flush_queues(ecl_ishtp_cl);
> > +	ishtp_cl_free(ecl_ishtp_cl);
> > +}
> > +
> > +static void ecl_ishtp_cl_reset_handler(struct work_struct *work) {
> > +	struct ishtp_opregion_dev *opr_dev;
> > +	struct ishtp_cl_device *cl_device;
> > +	struct ishtp_cl *ecl_ishtp_cl;
> > +	int rv;
> > +	int retry;
> > +
> > +	opr_dev = container_of(work, struct ishtp_opregion_dev,
> reset_work);
> > +
> > +	opr_dev->ish_link_ready = false;
> > +
> > +	cl_device = opr_dev->cl_device;
> > +	ecl_ishtp_cl = opr_dev->ecl_ishtp_cl;
> > +
> > +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> > +
> > +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
> > +	if (!ecl_ishtp_cl)
> > +		return;
> 
> Is this whole freeing + re-alloc of the ISHTP client here really necessary ? This
> seems like overkill.

This is required. reset is an asynchronous notification from ISH
(PSE in this case) firmware and current connection becomes stale and needs
to be reinitialized. All ISHTP client drivers are implemented same way. 
eg.
drivers/hid/intel-ish-hid/ishtp-hid-client.c
drivers/hid/intel-ish-hid/ishtp-fw-loader.c

> 
> > +
> > +	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
> > +	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
> > +
> > +	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
> > +
> > +	for (retry = 0; retry < 3; ++retry) {
> > +		rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
> > +		if (!rv)
> > +			break;
> > +	}
> > +	if (rv) {
> > +		ishtp_cl_free(ecl_ishtp_cl);
> > +		opr_dev->ecl_ishtp_cl = NULL;
> > +		dev_err(cl_data_to_dev(opr_dev),
> > +			"[ish_rst] Reset failed. Link not ready.\n");
> > +		return;
> > +	}
> > +
> > +	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
> > +	dev_info(cl_data_to_dev(opr_dev),
> > +		 "[ish_rst] Reset Success. Link ready.\n");
> > +
> > +	opr_dev->ish_link_ready = true;
> > +
> > +	if (opr_dev->acpi_init_done)
> > +		return;
> > +
> > +	rv = acpi_opregion_init(opr_dev);
> > +	if (rv) {
> > +		dev_err(cl_data_to_dev(opr_dev),
> > +			"ACPI opregion init failed\n");
> > +	}
> > +}
> > +
> > +static int ecl_ishtp_cl_probe(struct ishtp_cl_device *cl_device) {
> > +	struct ishtp_cl *ecl_ishtp_cl;
> > +	struct ishtp_opregion_dev *opr_dev;
> > +	int rv;
> > +
> > +	opr_dev = devm_kzalloc(ishtp_device(cl_device), sizeof(*opr_dev),
> > +			       GFP_KERNEL);
> > +	if (!opr_dev)
> > +		return -ENOMEM;
> > +
> > +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
> > +	if (!ecl_ishtp_cl)
> > +		return -ENOMEM;
> > +
> > +	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
> > +	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
> > +	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
> > +	opr_dev->cl_device = cl_device;
> > +
> > +	init_waitqueue_head(&opr_dev->read_wait);
> > +	INIT_WORK(&opr_dev->event_work, ecl_acpi_invoke_dsm);
> > +	INIT_WORK(&opr_dev->reset_work, ecl_ishtp_cl_reset_handler);
> > +
> > +	/* Initialize ish client device */
> > +	rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
> > +	if (rv) {
> > +		dev_err(cl_data_to_dev(opr_dev), "Client init failed\n");
> > +		goto err_exit;
> > +	}
> > +
> > +	dev_dbg(cl_data_to_dev(opr_dev), "eclite-ishtp client
> > +initialised\n");
> > +
> > +	/* Register a handler for eclite fw events */
> > +	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
> > +
> > +	ishtp_get_device(cl_device);
> 
> This seems weird, normally in the device-model a driver being bound to a
> device guarantees that that device cannot go away before the remove
> callback of the driver is called.
> 
> So it seems to me that this call + the put in both the err_exit and
> ecl_ishtp_cl_remove() cases can be dropped.
> 
This platform driver has two interfaces - a)ISH and b)ACPI.
ISH initializes first and if successful we ishtp_get_device().
Then ACPI initializes after that. If ACPI init fails, driver gets cleaned with
ISHTP as well thus ishtp_get_device() in probe's err_exit. Both interface must be
Required and initialized for the functionality.

Do you still see a problem?

> > +
> > +	opr_dev->ish_link_ready = true;
> > +
> > +	/* Now find ACPI device and init opregion handlers */
> > +	rv = acpi_opregion_init(opr_dev);
> > +	if (rv) {
> > +		dev_err(cl_data_to_dev(opr_dev), "ACPI opregion init
> failed\n");
> > +
> > +		goto err_exit;
> > +	}
> > +
> > +	/* Reprobe devices depending on ECLite - battery, fan, etc. */
> > +	acpi_walk_dep_device_list(opr_dev->acpi_handle);
> > +
> > +	return 0;
> > +err_exit:
> > +	ishtp_set_connection_state(ecl_ishtp_cl,
> ISHTP_CL_DISCONNECTING);
> > +	ishtp_cl_disconnect(ecl_ishtp_cl);
> > +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> > +
> > +	ishtp_put_device(cl_device);
> > +
> > +	return rv;
> > +}
> > +
> > +static int ecl_ishtp_cl_remove(struct ishtp_cl_device *cl_device) {
> > +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> > +	struct ishtp_opregion_dev *opr_dev =
> > +		ishtp_get_client_data(ecl_ishtp_cl);
> > +
> > +	if (opr_dev->acpi_init_done)
> > +		acpi_opregion_deinit(opr_dev);
> > +
> > +	cancel_work_sync(&opr_dev->reset_work);
> > +	cancel_work_sync(&opr_dev->event_work);
> > +
> > +	ishtp_set_connection_state(ecl_ishtp_cl,
> ISHTP_CL_DISCONNECTING);
> > +	ishtp_cl_disconnect(ecl_ishtp_cl);
> > +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> > +
> > +	ishtp_put_device(cl_device);
> > +
> > +	return 0;
> > +}
> > +
> > +static int ecl_ishtp_cl_reset(struct ishtp_cl_device *cl_device) {
> > +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> > +	struct ishtp_opregion_dev *opr_dev =
> > +		ishtp_get_client_data(ecl_ishtp_cl);
> > +
> > +	schedule_work(&opr_dev->reset_work);
> > +
> > +	return 0;
> > +}
> > +
> > +static int ecl_ishtp_cl_suspend(struct device *device) {
> > +	struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
> > +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> > +	struct ishtp_opregion_dev *opr_dev =
> > +		ishtp_get_client_data(ecl_ishtp_cl);
> > +
> > +	if (acpi_target_system_state() == ACPI_STATE_S0)
> > +		return 0;
> > +
> > +	acpi_opregion_deinit(opr_dev);
> > +	ecl_ish_cl_enable_events(opr_dev, false);
> > +
> > +	return 0;
> > +}
> > +
> > +static int ecl_ishtp_cl_resume(struct device *device) {
> > +	/* A reset is expected to call after an Sx. At this point
> > +	 * we are not sure if the link is up or not to restore anything,
> > +	 * so do nothing in resume path
> > +	 */
> > +	return 0;
> 
> This seems very wrong, this means that there are no resume ordering
> guarantees meaning that drivers which are ordered to resume after this
> driver, because they rely on the opregion may end up not being able to use
> the opregion leading to all kind of issues.
> 
> IMHO what should happen here is that this driver waits for the EClite to
> become ready here. Which probably means that it itself should be only
> resumed after the ISH HID driver is, but I assume that the ISH device is a
> parent of this device, so that ordering should be correct automatically.

> 
> TBH the whole lets just not resume and do a reset instead and then just
> tearing down the entire ISHTP client struct and setting up a new one from
> scratch, just feels very wrong.  At a minimum the teardown + bringup needs
> to happen before the resume callback completes, but ideally this would be
> replaced by a much lighter resume solution.

ISH Firmware (the PSE subsystem) can boot up/reinitialize Every Sx based on
usecase or sometimes PSE is always-on. So the resume path is asynchronous 
and unpredictable in this case. Re-initialization and clean up required if PSE
also boot up every Sx and might take good amount of time (Host can come alive
before PSE comes up). Thus using asynchronous reset notification. 

> 
> Till this is fixed so that the device is guaranteed to be fully functional once its
> resume callback has completed this code can not be merged, so NACK to this
> patch for now.
> 
> Regards,
> 
> Hans
> 
> 
> 
> 
> > +}
> > +
> > +static const struct dev_pm_ops ecl_ishtp_pm_ops = {
> > +	.suspend = ecl_ishtp_cl_suspend,
> > +	.resume = ecl_ishtp_cl_resume,
> > +};
> > +
> > +static struct ishtp_cl_driver ecl_ishtp_cl_driver = {
> > +	.name = "ishtp-eclite",
> > +	.guid = &ecl_ishtp_guid,
> > +	.probe = ecl_ishtp_cl_probe,
> > +	.remove = ecl_ishtp_cl_remove,
> > +	.reset = ecl_ishtp_cl_reset,
> > +	.driver.pm = &ecl_ishtp_pm_ops,
> > +};
> > +
> > +static int __init ecl_ishtp_init(void) {
> > +	return ishtp_cl_driver_register(&ecl_ishtp_cl_driver,
> THIS_MODULE);
> > +}
> > +
> > +static void __exit ecl_ishtp_exit(void) {
> > +	return ishtp_cl_driver_unregister(&ecl_ishtp_cl_driver);
> > +}
> > +
> > +late_initcall(ecl_ishtp_init);
> > +module_exit(ecl_ishtp_exit);
> > +
> > +MODULE_DESCRIPTION("ISH ISHTP eclite client opregion driver");
> > +MODULE_AUTHOR("K Naduvalath, Sumesh
> > +<sumesh.k.naduvalath@intel.com>");
> > +
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_ALIAS("ishtp:*");
> >


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
  2021-06-01 18:24   ` K Naduvalath, Sumesh
@ 2021-06-02 11:19     ` Hans de Goede
  2021-06-03 16:48       ` K Naduvalath, Sumesh
  0 siblings, 1 reply; 6+ messages in thread
From: Hans de Goede @ 2021-06-02 11:19 UTC (permalink / raw)
  To: K Naduvalath, Sumesh, mgross, srinivas.pandruvada
  Cc: Pandruvada, Srinivas, platform-driver-x86, linux-kernel, Chinnu,
	Ganapathi, Kumar, Nachiketa

Hi Sumesh,

On 6/1/21 8:24 PM, K Naduvalath, Sumesh wrote:
> Thank you Hans for the review comments. Please find the reply inline -
> 
>> -----Original Message-----
>> From: Hans de Goede <hdegoede@redhat.com>
>> Sent: Monday, May 31, 2021 8:25 PM
>> To: K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>;
>> mgross@linux.intel.com; srinivas.pandruvada@linux.intel.com
>> Cc: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>; platform-driver-
>> x86@vger.kernel.org; linux-kernel@vger.kernel.org; Chinnu, Ganapathi
>> <ganapathi.chinnu@intel.com>; Kumar, Nachiketa
>> <nachiketa.kumar@intel.com>
>> Subject: Re: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
>>
>> Hi,
>>
>> Thank you, I've done a quick review, which has already spotted quite a few
>> issues. Note I will probably do a more thorough review later which mind find
>> more issues, but lets start with fixing the serious issues which this review has
>> found.

<snip>

>>> diff --git a/drivers/platform/x86/Kconfig
>>> b/drivers/platform/x86/Kconfig index 60592fb88e7a..cfa2cb150909 100644
>>> --- a/drivers/platform/x86/Kconfig
>>> +++ b/drivers/platform/x86/Kconfig
>>> @@ -1180,6 +1180,19 @@ config INTEL_CHTDC_TI_PWRBTN
>>>  	  To compile this driver as a module, choose M here: the module
>>>  	  will be called intel_chtdc_ti_pwrbtn.
>>>
>>> +config INTEL_ISHTP_ECLITE
>>> +	tristate "Intel ISHTP eclite controller"
>>> +	depends on INTEL_ISH_HID
>>> +	depends on ACPI
>>> +	help
>>> +	  This driver is for accessing the PSE(Programmable Service Engine),
>>> +	  an Embedded Controller like IP, using ISHTP(Integratd Sensor Hub
>>> +	  Transport Protocol) to get battery, thermal and UCSI (USB Type-C
>>> +          Connector System Software Interface) related data from the
>> platform.
>>
>> This text has all the same issues as the commit message. Also please explain
>> on what sort of systems this functionality is typically found/used so that
>> users will have a better idea if this is something which they should enable on
>> their systems.
>>
> 
> This functionality is enabled for the first time for Intel's Elkhartlake platform.
> Users who don't want to use discrete Embedded Controller on their platform,
> they can leverage the integrated solution of ECLite which is part of 
> Elkhartlake's PSE subsystem. I'll add more text for the config item.

Thank you for the info.

<snip>

>>> +static acpi_status
>>> +ecl_opregion_cmd_handler(u32 function, acpi_physical_address address,
>>> +			 u32 bits, u64 *value64,
>>> +			 void *handler_context, void *region_context) {
>>> +	struct ishtp_opregion_dev *opr_dev;
>>> +	struct opregion_cmd *cmd;
>>> +
>>> +	if (!region_context || !value64)
>>> +		return AE_BAD_PARAMETER;
>>> +
>>> +	if (function == ACPI_READ)
>>> +		return AE_ERROR;
>>> +
>>> +	opr_dev = (struct ishtp_opregion_dev *)region_context;
>>> +	cmd = &opr_dev->opr_context.cmd_area;
>>
>> This is shared between all threads, we have had issues with sharing opregion
>> context between threads like this in the past.
>>
>> What is stopping ACPI code from trying to use the opr_context from multiple
>> threads at the same time, messing things up?
>>
> 
> This will not happen. BIOS calls ACPI methods (cmd and data in this driver) in a
> SERIALIZED manner. BIOS issues another call only after finishing the first one.

You say that this will not happen, but the problem which I have with the
OpRegion API is that this may actually very well happen. There are no
guarantees of this not happening. We are relying on the AML code from
the BIOS adding the "Serialized" attribute to all functions accessing the
OpRegion, all that is necessary is one bug in the AML code and then this
will happen.

And it is typically very hard to get vendors to issue BIOS updates to fix
things like this :|

I must honestly say that I find the whole design of the OpRegion API
lacking. There are other options to interface AML code, like e.g. modelling
the ISHTP as a generic serial bus, then a single OpRegion access can do the
following:

1. AML fills a buffer
2. OpRegion call processes buffer and writes back results (status code +
   data read if it is a read) to buffer
3. AML processes buffer

This is e.g. used in some Microsoft Surface 3 devices, see:
drivers/platform/surface/surface3_power.c

This would make the calls more-or-less atomic, removing the need for the
ACPI Methods to all be Serialized.

This would also allow the Opregion to return an error status code when
the EClite is not ready, instead of unregistering + reregistering the
OpRegion, which in itself is another source of possible races (see below).

I assume that it is too late to change the OpRegion API now, so that
we are stuck with this ?

I must say I'm disappointed about the quality of the OpRegion API design
here. APIs should be designed so that it is easy / natural to use them in
a correct way, where as this API seems to be dessgned in such a way that
it is easy to use it the wrong way.

I really expect Intel to do better the next time the introduce a new
OpRegion for something. It would help greatly if Intel would send some
rough sketches of how the API is going to look like to the mailinglist,
so that we can actually correct issues like these, rather then the API
already being set in stone and that we end up having to live with a
not so good API.

>> I'm especially worried about the offset + data_len used in various places,
>> even if we add checks for this, this could be changed underneath us by
>> another thread.
> 
> There are checks in BIOS for offset + data, but I'll add them in the driver as well.
> There is no other thread accessing it. Flow below -
> 
> ACPI method --> cmd --> 
> <--end ACPI method <-- 
> 
> <here no other ACPI method will execute because of serialized method>
> 
> ACPI method --> data--> 
> <--end ACPI method <-- 
> 
>>
>> You should add a mutex to the opr_dev and lock it in this function so that the
>> cmd struct cannot be changed underneath us while we are processing it.
>>
>> Note this does not fully protect against races like this:
>>
>> 1. Thread a sets offset
>> 2. Thread b sets a different offset
>> 3. Thread a writes ECL_ISH_READ to command.
> 
> These race conditions won't occur. No structure elements are used from two paths
> simultaneously. Only element from the opregion structure used outside ACPI path is 
> 
> opr_dev->dsm_event_id from event_work workqueque.
> 
> But this element not accessed from anywhere else including serialized ACPI path.

I understand that the intention is for all the ACPI calls to be serialized, but
nothing is guaranteeing this. All it requires is one method not being marked as
serialized...

>> This will result in thread a getting the data at the offset specified by thread b,
>> rather then at the offset which it requested. But there is nothing we can do
>> about that, that needs to be solved with synchronization at the AML level.
> 
> There is ASL serialization defined, But are you suggesting to put locks for the fairness
> of coding?

I don't want the kernel to read / write beyond the end of kernel-owned memory
because the following happens (this assumes one AML method is missing the Serialized
attribute):

1. #define ECL_DATA_OPR_BUFLEN	384
2. Thread A sets offset to 0
3. Thread A sets len to 100
4. Thread A writes ECL_ISH_WRITE
5. Thread A kernel checks (offset + len) < ECL_DATA_OPR_BUFLEN, this is ok
6. Thread B sets offset to 300
7. Thread A kernel does memcpy(message.payload, opr_dev->opr_context.data_area.data + message.header.offset, message.header.data_len);

7. Will now end up reading 100 bytes from offset 300 of opr_context.data_area.data,
which is only 384 bytes big, which needless to say is not good.
I see now that the offset + len are copied into message.header and then the copied
values are used. It is tempting to think that this thus can be fixed by doing a
check on the copied values, but the message struct is a local variable, so the
compiler will alias the 2 values and might decide to do the check on the originals;
and the compiler is also free to re-order things. So this making of a local copy
does not help.

This is a classic time-of-check vs time-of-use problem. Since we cannot _guarantee_
that all users of the OpRegion are properly using the ACPI Serialized Method attribute,
we need some other mechanism to ensure that len + offset do not change between
checking and consuming them. The most straight forward solution here is to
protect opr_context with a mutex so that it cannot be changed by another thread
while one call is in progress. Since all accesses should be serialized at the AML
level anyways this will not add extra serialization, while it will help avoid
AML bugs turning into out-of-bounds memory accesses inside the kernel.

<snip>

>>> +static acpi_status
>>> +ecl_opregion_data_handler(u32 function, acpi_physical_address address,
>>> +			  u32 bits, u64 *value64,
>>> +			  void *handler_context, void *region_context) {
>>> +	struct ishtp_opregion_dev *opr_dev;
>>> +	unsigned int bytes = BITS_TO_BYTES(bits);
>>> +	void *data_addr;
>>> +
>>> +	if (!region_context || !value64)
>>> +		return AE_BAD_PARAMETER;
>>> +
>>> +	if (address + bytes > ECL_DATA_OPR_BUFLEN)
>>> +		return AE_BAD_PARAMETER;
>>> +
>>> +	opr_dev = (struct ishtp_opregion_dev *)region_context;
>>> +	data_addr = &opr_dev->opr_context.data_area.data[address];
>>> +
>>> +	if (function == ACPI_READ)
>>> +		memcpy(value64, data_addr, bytes);
>>> +	else if (function == ACPI_WRITE)
>>> +		memcpy(data_addr, value64, bytes);
>>
>> What if bits is not a multiple of 8? Then we have just overwritten a bunch of
>> bits which the caller did not request us to do.
>>
>> Since the caller specifies bits, this should really do a read-modify-write of the
>> last byte when there are any "leftover" bits. ?  What does the
>> documentation say about this?
> 
> The request will always be multiple of 8 bits as per ASL definition/docs.

Ok.


>>> +	else
>>> +		return AE_BAD_PARAMETER;
>>> +
>>> +	return AE_OK;
>>> +}
>>> +
>>> +static int acpi_opregion_init(struct ishtp_opregion_dev *opr_dev) {
>>> +	acpi_status status;
>>> +	struct acpi_device *adev;
>>> +
>>> +	/* Find ECLite device and install opregion handlers */
>>> +	adev = acpi_dev_get_first_match_dev("INTC1035", NULL, -1);
>>> +	if (!adev) {
>>> +		dev_err(cl_data_to_dev(opr_dev), "eclite ACPI device not
>> found\n");
>>> +		return -ENODEV;
>>> +	}
>>> +
>>> +	opr_dev->acpi_handle = adev->handle;
>>
>> acpi_opregion_init() seem to get called on every resume, doing the lookup is
>> only necessary once, after that the cached value in opr_dev->acpi_handle
>> can be reused.
>>
>> More importantly this whole dance of unregistering + re-registering the
>> opregion seems unnecessary. You already have ish_link_ready in case the
>> opregion gets called before things are ready; and if the opregion is called
>> when the link is not ready, still having the opregion handler in place allows
>> you to log a sensible error about what is going on which is what we want.
> 
> Initial approach was same as you suggested ( No uninstall, just wait in ACPI
> Method). But after every resume, the driver gets ACPI write and read requests
> for FAN and thermal controls which fails  because link is not ready. Also we 
> can't wait_event_interruptible_timeout() in ACPI method until we get the 
> link_ready( link can become ready much later until PSE fully boots up after every
> Sx. Anything else gets executed after this wait_event timeout fail in ACPI method.
> We can't afford to miss any critical thermal/FAN related setting from standby/
> hibernation resume. No requests are missed by registering opregion only after
> link_ready.

I see; and since the OpRegion API does not allow returning a status code to
communicate the link-not-ready thing you need to do this unregister + re-register
dance instead. See this is what I meant above when I wrote that the whole OpRegion
API seems lacking. This could all have been solved by having the OpRegion calls
communicate back a status code.

Now you are relying on communicating the availability of the link by calling
_REG instead. This means that we better hope that all users of the OpRegion will
correctly check the flag set by _REG, which in my experience with a lot of AML
code is something which is often forgotten, so this will be another source
of DSDT/AML bugs for AML code using this OpRegion <sigh>.

Also note that this is racy too, on suspend we unregister the OpRegion, calling
_REG in the process. But _REG methods typically are not Serialized so now the
following may happen:

1. Thread A is executing a function using the EClite OpRegion
2. Thread A checks the flag set by _REG, sees the region is available
3. Thread B is executing suspend for the EClite device, unregister the
   OpRegion, calling _REG to clear the flag
4. Thread A continues with actually accessing the no longer available
   OPRegion leading to an error because there is no Opregion handler
   registered.

So this means that _REG should be marked Serialized for this device,
I wonder if it actually is marked as such in the DSDT of your test hardware ?

I think we might get away with this regardless because 3 probably takes a
rw-lock on the ACPI namespace which it can only get if no other threads
are executing any AML code (I'm speculating here I did not check), but this
is yet another example about how this entire approach is less then ideal.

<snip>

>>> +static void ecl_ishtp_cl_deinit(struct ishtp_cl *ecl_ishtp_cl) {
>>> +	ishtp_cl_unlink(ecl_ishtp_cl);
>>> +	ishtp_cl_flush_queues(ecl_ishtp_cl);
>>> +	ishtp_cl_free(ecl_ishtp_cl);
>>> +}
>>> +
>>> +static void ecl_ishtp_cl_reset_handler(struct work_struct *work) {
>>> +	struct ishtp_opregion_dev *opr_dev;
>>> +	struct ishtp_cl_device *cl_device;
>>> +	struct ishtp_cl *ecl_ishtp_cl;
>>> +	int rv;
>>> +	int retry;
>>> +
>>> +	opr_dev = container_of(work, struct ishtp_opregion_dev,
>> reset_work);
>>> +
>>> +	opr_dev->ish_link_ready = false;
>>> +
>>> +	cl_device = opr_dev->cl_device;
>>> +	ecl_ishtp_cl = opr_dev->ecl_ishtp_cl;
>>> +
>>> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
>>> +
>>> +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
>>> +	if (!ecl_ishtp_cl)
>>> +		return;
>>
>> Is this whole freeing + re-alloc of the ISHTP client here really necessary ? This
>> seems like overkill.
> 
> This is required. reset is an asynchronous notification from ISH
> (PSE in this case) firmware and current connection becomes stale and needs
> to be reinitialized. All ISHTP client drivers are implemented same way. 
> eg.
> drivers/hid/intel-ish-hid/ishtp-hid-client.c
> drivers/hid/intel-ish-hid/ishtp-fw-loader.c

Ok.

<snip>

>>> +static int ecl_ishtp_cl_probe(struct ishtp_cl_device *cl_device) {
>>> +	struct ishtp_cl *ecl_ishtp_cl;
>>> +	struct ishtp_opregion_dev *opr_dev;
>>> +	int rv;
>>> +
>>> +	opr_dev = devm_kzalloc(ishtp_device(cl_device), sizeof(*opr_dev),
>>> +			       GFP_KERNEL);
>>> +	if (!opr_dev)
>>> +		return -ENOMEM;
>>> +
>>> +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
>>> +	if (!ecl_ishtp_cl)
>>> +		return -ENOMEM;
>>> +
>>> +	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
>>> +	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
>>> +	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
>>> +	opr_dev->cl_device = cl_device;
>>> +
>>> +	init_waitqueue_head(&opr_dev->read_wait);
>>> +	INIT_WORK(&opr_dev->event_work, ecl_acpi_invoke_dsm);
>>> +	INIT_WORK(&opr_dev->reset_work, ecl_ishtp_cl_reset_handler);
>>> +
>>> +	/* Initialize ish client device */
>>> +	rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
>>> +	if (rv) {
>>> +		dev_err(cl_data_to_dev(opr_dev), "Client init failed\n");
>>> +		goto err_exit;
>>> +	}
>>> +
>>> +	dev_dbg(cl_data_to_dev(opr_dev), "eclite-ishtp client
>>> +initialised\n");
>>> +
>>> +	/* Register a handler for eclite fw events */
>>> +	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
>>> +
>>> +	ishtp_get_device(cl_device);
>>
>> This seems weird, normally in the device-model a driver being bound to a
>> device guarantees that that device cannot go away before the remove
>> callback of the driver is called.
>>
>> So it seems to me that this call + the put in both the err_exit and
>> ecl_ishtp_cl_remove() cases can be dropped.
>>
> This platform driver has two interfaces - a)ISH and b)ACPI.
> ISH initializes first and if successful we ishtp_get_device().
> Then ACPI initializes after that. If ACPI init fails, driver gets cleaned with
> ISHTP as well thus ishtp_get_device() in probe's err_exit. Both interface must be
> Required and initialized for the functionality.
> 
> Do you still see a problem?

I don't see a problem, but I don't think that the ishtp_get_device() here
and the 2 ishtp_put_device() calls below are necessary.

This is the probe function for a ishtp_cl_driver as long as that driver
is bound to the cl_device (which gets passed as parameter to the probe())
the cl_device can never go away, not until the matching remove function
from the ishtp_cl_driver has been called, so the get at probe + the put
at remove are not necessary, since the cl_device already must stick
around for as long as the driver is bound.

Another way of looking at this is that the linux device-model/driver core
already does a get when it binds a driver to the device (and a put on
remove).


>>> +
>>> +	opr_dev->ish_link_ready = true;
>>> +
>>> +	/* Now find ACPI device and init opregion handlers */
>>> +	rv = acpi_opregion_init(opr_dev);
>>> +	if (rv) {
>>> +		dev_err(cl_data_to_dev(opr_dev), "ACPI opregion init
>> failed\n");
>>> +
>>> +		goto err_exit;
>>> +	}
>>> +
>>> +	/* Reprobe devices depending on ECLite - battery, fan, etc. */
>>> +	acpi_walk_dep_device_list(opr_dev->acpi_handle);
>>> +
>>> +	return 0;
>>> +err_exit:
>>> +	ishtp_set_connection_state(ecl_ishtp_cl,
>> ISHTP_CL_DISCONNECTING);
>>> +	ishtp_cl_disconnect(ecl_ishtp_cl);
>>> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
>>> +
>>> +	ishtp_put_device(cl_device);
>>> +
>>> +	return rv;
>>> +}
>>> +
>>> +static int ecl_ishtp_cl_remove(struct ishtp_cl_device *cl_device) {
>>> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
>>> +	struct ishtp_opregion_dev *opr_dev =
>>> +		ishtp_get_client_data(ecl_ishtp_cl);
>>> +
>>> +	if (opr_dev->acpi_init_done)
>>> +		acpi_opregion_deinit(opr_dev);
>>> +
>>> +	cancel_work_sync(&opr_dev->reset_work);
>>> +	cancel_work_sync(&opr_dev->event_work);
>>> +
>>> +	ishtp_set_connection_state(ecl_ishtp_cl,
>> ISHTP_CL_DISCONNECTING);
>>> +	ishtp_cl_disconnect(ecl_ishtp_cl);
>>> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
>>> +
>>> +	ishtp_put_device(cl_device);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int ecl_ishtp_cl_reset(struct ishtp_cl_device *cl_device) {
>>> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
>>> +	struct ishtp_opregion_dev *opr_dev =
>>> +		ishtp_get_client_data(ecl_ishtp_cl);
>>> +
>>> +	schedule_work(&opr_dev->reset_work);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int ecl_ishtp_cl_suspend(struct device *device) {
>>> +	struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
>>> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
>>> +	struct ishtp_opregion_dev *opr_dev =
>>> +		ishtp_get_client_data(ecl_ishtp_cl);
>>> +
>>> +	if (acpi_target_system_state() == ACPI_STATE_S0)
>>> +		return 0;
>>> +
>>> +	acpi_opregion_deinit(opr_dev);
>>> +	ecl_ish_cl_enable_events(opr_dev, false);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int ecl_ishtp_cl_resume(struct device *device) {
>>> +	/* A reset is expected to call after an Sx. At this point
>>> +	 * we are not sure if the link is up or not to restore anything,
>>> +	 * so do nothing in resume path
>>> +	 */
>>> +	return 0;
>>
>> This seems very wrong, this means that there are no resume ordering
>> guarantees meaning that drivers which are ordered to resume after this
>> driver, because they rely on the opregion may end up not being able to use
>> the opregion leading to all kind of issues.
>>
>> IMHO what should happen here is that this driver waits for the EClite to
>> become ready here. Which probably means that it itself should be only
>> resumed after the ISH HID driver is, but I assume that the ISH device is a
>> parent of this device, so that ordering should be correct automatically.
> 
>>
>> TBH the whole lets just not resume and do a reset instead and then just
>> tearing down the entire ISHTP client struct and setting up a new one from
>> scratch, just feels very wrong.  At a minimum the teardown + bringup needs
>> to happen before the resume callback completes, but ideally this would be
>> replaced by a much lighter resume solution.
> 
> ISH Firmware (the PSE subsystem) can boot up/reinitialize Every Sx based on
> usecase or sometimes PSE is always-on. So the resume path is asynchronous 
> and unpredictable in this case. Re-initialization and clean up required if PSE
> also boot up every Sx and might take good amount of time (Host can come alive
> before PSE comes up). Thus using asynchronous reset notification. 

Ok, so I guess we need to live with the ugly deregister + re-register
OpRegion dance for the devices where the PSE is shutdown during suspend.

You also write: "sometimes PSE is always-on", what about that case,
I assume in this case there will be no reset after resume? So then
unregistering the OpRegion handler at suspend (and setting link_ready=false)
will be wrong since without the reset the OpRegion handler will never
get reinstalled ?

Regards,

Hans


^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
  2021-06-02 11:19     ` Hans de Goede
@ 2021-06-03 16:48       ` K Naduvalath, Sumesh
  2021-06-03 17:05         ` Hans de Goede
  0 siblings, 1 reply; 6+ messages in thread
From: K Naduvalath, Sumesh @ 2021-06-03 16:48 UTC (permalink / raw)
  To: Hans de Goede, mgross, srinivas.pandruvada
  Cc: Pandruvada, Srinivas, platform-driver-x86, linux-kernel, Chinnu,
	Ganapathi, Kumar, Nachiketa

Thank you Hans for the scrutiny. Please find my comments inline.

> -----Original Message-----
> From: Hans de Goede <hdegoede@redhat.com>
> Sent: Wednesday, June 2, 2021 4:49 PM
> To: K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>;
> mgross@linux.intel.com; srinivas.pandruvada@linux.intel.com
> Cc: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>; platform-driver-
> x86@vger.kernel.org; linux-kernel@vger.kernel.org; Chinnu, Ganapathi
> <ganapathi.chinnu@intel.com>; Kumar, Nachiketa
> <nachiketa.kumar@intel.com>
> Subject: Re: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
> 
> Hi Sumesh,
> 
> On 6/1/21 8:24 PM, K Naduvalath, Sumesh wrote:
> > Thank you Hans for the review comments. Please find the reply inline -
> >
> >> -----Original Message-----
> >> From: Hans de Goede <hdegoede@redhat.com>
> >> Sent: Monday, May 31, 2021 8:25 PM
> >> To: K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>;
> >> mgross@linux.intel.com; srinivas.pandruvada@linux.intel.com
> >> Cc: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>;
> >> platform-driver- x86@vger.kernel.org; linux-kernel@vger.kernel.org;
> >> Chinnu, Ganapathi <ganapathi.chinnu@intel.com>; Kumar, Nachiketa
> >> <nachiketa.kumar@intel.com>
> >> Subject: Re: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite
> >> driver
> >>
> >> Hi,
> >>
> >> Thank you, I've done a quick review, which has already spotted quite
> >> a few issues. Note I will probably do a more thorough review later
> >> which mind find more issues, but lets start with fixing the serious
> >> issues which this review has found.
> 
> <snip>
> 
> >>> diff --git a/drivers/platform/x86/Kconfig
> >>> b/drivers/platform/x86/Kconfig index 60592fb88e7a..cfa2cb150909
> >>> 100644
> >>> --- a/drivers/platform/x86/Kconfig
> >>> +++ b/drivers/platform/x86/Kconfig
> >>> @@ -1180,6 +1180,19 @@ config INTEL_CHTDC_TI_PWRBTN
> >>>  	  To compile this driver as a module, choose M here: the module
> >>>  	  will be called intel_chtdc_ti_pwrbtn.
> >>>
> >>> +config INTEL_ISHTP_ECLITE
> >>> +	tristate "Intel ISHTP eclite controller"
> >>> +	depends on INTEL_ISH_HID
> >>> +	depends on ACPI
> >>> +	help
> >>> +	  This driver is for accessing the PSE(Programmable Service Engine),
> >>> +	  an Embedded Controller like IP, using ISHTP(Integratd Sensor Hub
> >>> +	  Transport Protocol) to get battery, thermal and UCSI (USB Type-C
> >>> +          Connector System Software Interface) related data from
> >>> +the
> >> platform.
> >>
> >> This text has all the same issues as the commit message. Also please
> >> explain on what sort of systems this functionality is typically
> >> found/used so that users will have a better idea if this is something
> >> which they should enable on their systems.
> >>
> >
> > This functionality is enabled for the first time for Intel's Elkhartlake
> platform.
> > Users who don't want to use discrete Embedded Controller on their
> > platform, they can leverage the integrated solution of ECLite which is
> > part of Elkhartlake's PSE subsystem. I'll add more text for the config item.
> 
> Thank you for the info.
> 
> <snip>
> 
> >>> +static acpi_status
> >>> +ecl_opregion_cmd_handler(u32 function, acpi_physical_address
> address,
> >>> +			 u32 bits, u64 *value64,
> >>> +			 void *handler_context, void *region_context) {
> >>> +	struct ishtp_opregion_dev *opr_dev;
> >>> +	struct opregion_cmd *cmd;
> >>> +
> >>> +	if (!region_context || !value64)
> >>> +		return AE_BAD_PARAMETER;
> >>> +
> >>> +	if (function == ACPI_READ)
> >>> +		return AE_ERROR;
> >>> +
> >>> +	opr_dev = (struct ishtp_opregion_dev *)region_context;
> >>> +	cmd = &opr_dev->opr_context.cmd_area;
> >>
> >> This is shared between all threads, we have had issues with sharing
> >> opregion context between threads like this in the past.
> >>
> >> What is stopping ACPI code from trying to use the opr_context from
> >> multiple threads at the same time, messing things up?
> >>
> >
> > This will not happen. BIOS calls ACPI methods (cmd and data in this
> > driver) in a SERIALIZED manner. BIOS issues another call only after finishing
> the first one.
> 
> You say that this will not happen, but the problem which I have with the
> OpRegion API is that this may actually very well happen. There are no
> guarantees of this not happening. We are relying on the AML code from the
> BIOS adding the "Serialized" attribute to all functions accessing the OpRegion,
> all that is necessary is one bug in the AML code and then this will happen.
> 
> And it is typically very hard to get vendors to issue BIOS updates to fix things
> like this :|
> 
> I must honestly say that I find the whole design of the OpRegion API lacking.
> There are other options to interface AML code, like e.g. modelling the ISHTP
> as a generic serial bus, then a single OpRegion access can do the
> following:
> 
> 1. AML fills a buffer
> 2. OpRegion call processes buffer and writes back results (status code +
>    data read if it is a read) to buffer
> 3. AML processes buffer
> 
> This is e.g. used in some Microsoft Surface 3 devices, see:
> drivers/platform/surface/surface3_power.c
> 
> This would make the calls more-or-less atomic, removing the need for the
> ACPI Methods to all be Serialized.
> 
> This would also allow the Opregion to return an error status code when the
> EClite is not ready, instead of unregistering + reregistering the OpRegion,
> which in itself is another source of possible races (see below).
> 
> I assume that it is too late to change the OpRegion API now, so that we are
> stuck with this ?
> 
> I must say I'm disappointed about the quality of the OpRegion API design
> here. APIs should be designed so that it is easy / natural to use them in a
> correct way, where as this API seems to be dessgned in such a way that it is
> easy to use it the wrong way.
> 
> I really expect Intel to do better the next time the introduce a new OpRegion
> for something. It would help greatly if Intel would send some rough sketches
> of how the API is going to look like to the mailinglist, so that we can actually
> correct issues like these, rather then the API already being set in stone and
> that we end up having to live with a not so good API.
> 
I understand your concern and will certainly take this feedback for the next gen/integration
of the product.  Similar approach is being discussed already for nextgen.
But timeline is a concern now here for this.

> >> I'm especially worried about the offset + data_len used in various
> >> places, even if we add checks for this, this could be changed
> >> underneath us by another thread.
> >
> > There are checks in BIOS for offset + data, but I'll add them in the driver as
> well.
> > There is no other thread accessing it. Flow below -
> >
> > ACPI method --> cmd -->
> > <--end ACPI method <--
> >
> > <here no other ACPI method will execute because of serialized method>
> >
> > ACPI method --> data-->
> > <--end ACPI method <--
> >
> >>
> >> You should add a mutex to the opr_dev and lock it in this function so
> >> that the cmd struct cannot be changed underneath us while we are
> processing it.
> >>
> >> Note this does not fully protect against races like this:
> >>
> >> 1. Thread a sets offset
> >> 2. Thread b sets a different offset
> >> 3. Thread a writes ECL_ISH_READ to command.
> >
> > These race conditions won't occur. No structure elements are used from
> > two paths simultaneously. Only element from the opregion structure
> > used outside ACPI path is
> >
> > opr_dev->dsm_event_id from event_work workqueque.
> >
> > But this element not accessed from anywhere else including serialized ACPI
> path.
> 
> I understand that the intention is for all the ACPI calls to be serialized, but
> nothing is guaranteeing this. All it requires is one method not being marked
> as serialized...
> 

I get your point here too. I'll fix in v2.

> >> This will result in thread a getting the data at the offset specified
> >> by thread b, rather then at the offset which it requested. But there
> >> is nothing we can do about that, that needs to be solved with
> synchronization at the AML level.
> >
> > There is ASL serialization defined, But are you suggesting to put
> > locks for the fairness of coding?
> 
> I don't want the kernel to read / write beyond the end of kernel-owned
> memory because the following happens (this assumes one AML method is
> missing the Serialized
> attribute):
> 
> 1. #define ECL_DATA_OPR_BUFLEN	384
> 2. Thread A sets offset to 0
> 3. Thread A sets len to 100
> 4. Thread A writes ECL_ISH_WRITE
> 5. Thread A kernel checks (offset + len) < ECL_DATA_OPR_BUFLEN, this is ok
> 6. Thread B sets offset to 300 7. Thread A kernel does
> memcpy(message.payload, opr_dev->opr_context.data_area.data +
> message.header.offset, message.header.data_len);
> 
> 7. Will now end up reading 100 bytes from offset 300 of
> opr_context.data_area.data, which is only 384 bytes big, which needless to
> say is not good.
> I see now that the offset + len are copied into message.header and then the
> copied values are used. It is tempting to think that this thus can be fixed by
> doing a check on the copied values, but the message struct is a local variable,
> so the compiler will alias the 2 values and might decide to do the check on the
> originals; and the compiler is also free to re-order things. So this making of a
> local copy does not help.
> 
> This is a classic time-of-check vs time-of-use problem. Since we cannot
> _guarantee_ that all users of the OpRegion are properly using the ACPI
> Serialized Method attribute, we need some other mechanism to ensure that
> len + offset do not change between checking and consuming them. The most
> straight forward solution here is to protect opr_context with a mutex so that
> it cannot be changed by another thread while one call is in progress. Since all
> accesses should be serialized at the AML level anyways this will not add extra
> serialization, while it will help avoid AML bugs turning into out-of-bounds
> memory accesses inside the kernel.
> 

Sure. Mutex and length checks will be added in v2.

> <snip>
> 
> >>> +static acpi_status
> >>> +ecl_opregion_data_handler(u32 function, acpi_physical_address
> address,
> >>> +			  u32 bits, u64 *value64,
> >>> +			  void *handler_context, void *region_context) {
> >>> +	struct ishtp_opregion_dev *opr_dev;
> >>> +	unsigned int bytes = BITS_TO_BYTES(bits);
> >>> +	void *data_addr;
> >>> +
> >>> +	if (!region_context || !value64)
> >>> +		return AE_BAD_PARAMETER;
> >>> +
> >>> +	if (address + bytes > ECL_DATA_OPR_BUFLEN)
> >>> +		return AE_BAD_PARAMETER;
> >>> +
> >>> +	opr_dev = (struct ishtp_opregion_dev *)region_context;
> >>> +	data_addr = &opr_dev->opr_context.data_area.data[address];
> >>> +
> >>> +	if (function == ACPI_READ)
> >>> +		memcpy(value64, data_addr, bytes);
> >>> +	else if (function == ACPI_WRITE)
> >>> +		memcpy(data_addr, value64, bytes);
> >>
> >> What if bits is not a multiple of 8? Then we have just overwritten a
> >> bunch of bits which the caller did not request us to do.
> >>
> >> Since the caller specifies bits, this should really do a
> >> read-modify-write of the last byte when there are any "leftover"
> >> bits. ?  What does the documentation say about this?
> >
> > The request will always be multiple of 8 bits as per ASL definition/docs.
> 
> Ok.
> 
> 
> >>> +	else
> >>> +		return AE_BAD_PARAMETER;
> >>> +
> >>> +	return AE_OK;
> >>> +}
> >>> +
> >>> +static int acpi_opregion_init(struct ishtp_opregion_dev *opr_dev) {
> >>> +	acpi_status status;
> >>> +	struct acpi_device *adev;
> >>> +
> >>> +	/* Find ECLite device and install opregion handlers */
> >>> +	adev = acpi_dev_get_first_match_dev("INTC1035", NULL, -1);
> >>> +	if (!adev) {
> >>> +		dev_err(cl_data_to_dev(opr_dev), "eclite ACPI device not
> >> found\n");
> >>> +		return -ENODEV;
> >>> +	}
> >>> +
> >>> +	opr_dev->acpi_handle = adev->handle;
> >>
> >> acpi_opregion_init() seem to get called on every resume, doing the
> >> lookup is only necessary once, after that the cached value in
> >> opr_dev->acpi_handle can be reused.
> >>
> >> More importantly this whole dance of unregistering + re-registering
> >> the opregion seems unnecessary. You already have ish_link_ready in
> >> case the opregion gets called before things are ready; and if the
> >> opregion is called when the link is not ready, still having the
> >> opregion handler in place allows you to log a sensible error about what is
> going on which is what we want.
> >
> > Initial approach was same as you suggested ( No uninstall, just wait
> > in ACPI Method). But after every resume, the driver gets ACPI write
> > and read requests for FAN and thermal controls which fails  because
> > link is not ready. Also we can't wait_event_interruptible_timeout() in
> > ACPI method until we get the link_ready( link can become ready much
> > later until PSE fully boots up after every Sx. Anything else gets executed
> after this wait_event timeout fail in ACPI method.
> > We can't afford to miss any critical thermal/FAN related setting from
> > standby/ hibernation resume. No requests are missed by registering
> > opregion only after link_ready.
> 
> I see; and since the OpRegion API does not allow returning a status code to
> communicate the link-not-ready thing you need to do this unregister + re-
> register dance instead. See this is what I meant above when I wrote that the
> whole OpRegion API seems lacking. This could all have been solved by having
> the OpRegion calls communicate back a status code.
> 
> Now you are relying on communicating the availability of the link by calling
> _REG instead. This means that we better hope that all users of the OpRegion
> will correctly check the flag set by _REG, which in my experience with a lot of
> AML code is something which is often forgotten, so this will be another
> source of DSDT/AML bugs for AML code using this OpRegion <sigh>.
> 
> Also note that this is racy too, on suspend we unregister the OpRegion,
> calling _REG in the process. But _REG methods typically are not Serialized so
> now the following may happen:
> 
> 1. Thread A is executing a function using the EClite OpRegion 2. Thread A
> checks the flag set by _REG, sees the region is available 3. Thread B is
> executing suspend for the EClite device, unregister the
>    OpRegion, calling _REG to clear the flag 4. Thread A continues with actually
> accessing the no longer available
>    OPRegion leading to an error because there is no Opregion handler
>    registered.
> 
> So this means that _REG should be marked Serialized for this device, I
> wonder if it actually is marked as such in the DSDT of your test hardware ?
> 
> I think we might get away with this regardless because 3 probably takes a rw-
> lock on the ACPI namespace which it can only get if no other threads are
> executing any AML code (I'm speculating here I did not check), but this is yet
> another example about how this entire approach is less then ideal.
> 

I'll check again and come back on this with a better approach to protect from race.
I'll incorporate the changes in v2 with better solution.
And sure, these points are taken as positive feedback. Thanks.

> <snip>
> 
> >>> +static void ecl_ishtp_cl_deinit(struct ishtp_cl *ecl_ishtp_cl) {
> >>> +	ishtp_cl_unlink(ecl_ishtp_cl);
> >>> +	ishtp_cl_flush_queues(ecl_ishtp_cl);
> >>> +	ishtp_cl_free(ecl_ishtp_cl);
> >>> +}
> >>> +
> >>> +static void ecl_ishtp_cl_reset_handler(struct work_struct *work) {
> >>> +	struct ishtp_opregion_dev *opr_dev;
> >>> +	struct ishtp_cl_device *cl_device;
> >>> +	struct ishtp_cl *ecl_ishtp_cl;
> >>> +	int rv;
> >>> +	int retry;
> >>> +
> >>> +	opr_dev = container_of(work, struct ishtp_opregion_dev,
> >> reset_work);
> >>> +
> >>> +	opr_dev->ish_link_ready = false;
> >>> +
> >>> +	cl_device = opr_dev->cl_device;
> >>> +	ecl_ishtp_cl = opr_dev->ecl_ishtp_cl;
> >>> +
> >>> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> >>> +
> >>> +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
> >>> +	if (!ecl_ishtp_cl)
> >>> +		return;
> >>
> >> Is this whole freeing + re-alloc of the ISHTP client here really
> >> necessary ? This seems like overkill.
> >
> > This is required. reset is an asynchronous notification from ISH (PSE
> > in this case) firmware and current connection becomes stale and needs
> > to be reinitialized. All ISHTP client drivers are implemented same way.
> > eg.
> > drivers/hid/intel-ish-hid/ishtp-hid-client.c
> > drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> 
> Ok.
> 
> <snip>
> 
> >>> +static int ecl_ishtp_cl_probe(struct ishtp_cl_device *cl_device) {
> >>> +	struct ishtp_cl *ecl_ishtp_cl;
> >>> +	struct ishtp_opregion_dev *opr_dev;
> >>> +	int rv;
> >>> +
> >>> +	opr_dev = devm_kzalloc(ishtp_device(cl_device), sizeof(*opr_dev),
> >>> +			       GFP_KERNEL);
> >>> +	if (!opr_dev)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	ecl_ishtp_cl = ishtp_cl_allocate(cl_device);
> >>> +	if (!ecl_ishtp_cl)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	ishtp_set_drvdata(cl_device, ecl_ishtp_cl);
> >>> +	ishtp_set_client_data(ecl_ishtp_cl, opr_dev);
> >>> +	opr_dev->ecl_ishtp_cl = ecl_ishtp_cl;
> >>> +	opr_dev->cl_device = cl_device;
> >>> +
> >>> +	init_waitqueue_head(&opr_dev->read_wait);
> >>> +	INIT_WORK(&opr_dev->event_work, ecl_acpi_invoke_dsm);
> >>> +	INIT_WORK(&opr_dev->reset_work, ecl_ishtp_cl_reset_handler);
> >>> +
> >>> +	/* Initialize ish client device */
> >>> +	rv = ecl_ishtp_cl_init(ecl_ishtp_cl);
> >>> +	if (rv) {
> >>> +		dev_err(cl_data_to_dev(opr_dev), "Client init failed\n");
> >>> +		goto err_exit;
> >>> +	}
> >>> +
> >>> +	dev_dbg(cl_data_to_dev(opr_dev), "eclite-ishtp client
> >>> +initialised\n");
> >>> +
> >>> +	/* Register a handler for eclite fw events */
> >>> +	ishtp_register_event_cb(cl_device, ecl_ishtp_cl_event_cb);
> >>> +
> >>> +	ishtp_get_device(cl_device);
> >>
> >> This seems weird, normally in the device-model a driver being bound
> >> to a device guarantees that that device cannot go away before the
> >> remove callback of the driver is called.
> >>
> >> So it seems to me that this call + the put in both the err_exit and
> >> ecl_ishtp_cl_remove() cases can be dropped.
> >>
> > This platform driver has two interfaces - a)ISH and b)ACPI.
> > ISH initializes first and if successful we ishtp_get_device().
> > Then ACPI initializes after that. If ACPI init fails, driver gets
> > cleaned with ISHTP as well thus ishtp_get_device() in probe's
> > err_exit. Both interface must be Required and initialized for the
> functionality.
> >
> > Do you still see a problem?
> 
> I don't see a problem, but I don't think that the ishtp_get_device() here and
> the 2 ishtp_put_device() calls below are necessary.
> 
> This is the probe function for a ishtp_cl_driver as long as that driver is bound
> to the cl_device (which gets passed as parameter to the probe()) the
> cl_device can never go away, not until the matching remove function from
> the ishtp_cl_driver has been called, so the get at probe + the put at remove
> are not necessary, since the cl_device already must stick around for as long
> as the driver is bound.
> 
> Another way of looking at this is that the linux device-model/driver core
> already does a get when it binds a driver to the device (and a put on
> remove).
> 
Ok. I'll cross check with ish gurus if there is any concern with this.
I'll remove them if there isn't any.
> 
> >>> +
> >>> +	opr_dev->ish_link_ready = true;
> >>> +
> >>> +	/* Now find ACPI device and init opregion handlers */
> >>> +	rv = acpi_opregion_init(opr_dev);
> >>> +	if (rv) {
> >>> +		dev_err(cl_data_to_dev(opr_dev), "ACPI opregion init
> >> failed\n");
> >>> +
> >>> +		goto err_exit;
> >>> +	}
> >>> +
> >>> +	/* Reprobe devices depending on ECLite - battery, fan, etc. */
> >>> +	acpi_walk_dep_device_list(opr_dev->acpi_handle);
> >>> +
> >>> +	return 0;
> >>> +err_exit:
> >>> +	ishtp_set_connection_state(ecl_ishtp_cl,
> >> ISHTP_CL_DISCONNECTING);
> >>> +	ishtp_cl_disconnect(ecl_ishtp_cl);
> >>> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> >>> +
> >>> +	ishtp_put_device(cl_device);
> >>> +
> >>> +	return rv;
> >>> +}
> >>> +
> >>> +static int ecl_ishtp_cl_remove(struct ishtp_cl_device *cl_device) {
> >>> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> >>> +	struct ishtp_opregion_dev *opr_dev =
> >>> +		ishtp_get_client_data(ecl_ishtp_cl);
> >>> +
> >>> +	if (opr_dev->acpi_init_done)
> >>> +		acpi_opregion_deinit(opr_dev);
> >>> +
> >>> +	cancel_work_sync(&opr_dev->reset_work);
> >>> +	cancel_work_sync(&opr_dev->event_work);
> >>> +
> >>> +	ishtp_set_connection_state(ecl_ishtp_cl,
> >> ISHTP_CL_DISCONNECTING);
> >>> +	ishtp_cl_disconnect(ecl_ishtp_cl);
> >>> +	ecl_ishtp_cl_deinit(ecl_ishtp_cl);
> >>> +
> >>> +	ishtp_put_device(cl_device);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int ecl_ishtp_cl_reset(struct ishtp_cl_device *cl_device) {
> >>> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> >>> +	struct ishtp_opregion_dev *opr_dev =
> >>> +		ishtp_get_client_data(ecl_ishtp_cl);
> >>> +
> >>> +	schedule_work(&opr_dev->reset_work);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int ecl_ishtp_cl_suspend(struct device *device) {
> >>> +	struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
> >>> +	struct ishtp_cl *ecl_ishtp_cl = ishtp_get_drvdata(cl_device);
> >>> +	struct ishtp_opregion_dev *opr_dev =
> >>> +		ishtp_get_client_data(ecl_ishtp_cl);
> >>> +
> >>> +	if (acpi_target_system_state() == ACPI_STATE_S0)
> >>> +		return 0;
> >>> +
> >>> +	acpi_opregion_deinit(opr_dev);
> >>> +	ecl_ish_cl_enable_events(opr_dev, false);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int ecl_ishtp_cl_resume(struct device *device) {
> >>> +	/* A reset is expected to call after an Sx. At this point
> >>> +	 * we are not sure if the link is up or not to restore anything,
> >>> +	 * so do nothing in resume path
> >>> +	 */
> >>> +	return 0;
> >>
> >> This seems very wrong, this means that there are no resume ordering
> >> guarantees meaning that drivers which are ordered to resume after
> >> this driver, because they rely on the opregion may end up not being
> >> able to use the opregion leading to all kind of issues.
> >>
> >> IMHO what should happen here is that this driver waits for the EClite
> >> to become ready here. Which probably means that it itself should be
> >> only resumed after the ISH HID driver is, but I assume that the ISH
> >> device is a parent of this device, so that ordering should be correct
> automatically.
> >
> >>
> >> TBH the whole lets just not resume and do a reset instead and then
> >> just tearing down the entire ISHTP client struct and setting up a new
> >> one from scratch, just feels very wrong.  At a minimum the teardown +
> >> bringup needs to happen before the resume callback completes, but
> >> ideally this would be replaced by a much lighter resume solution.
> >
> > ISH Firmware (the PSE subsystem) can boot up/reinitialize Every Sx
> > based on usecase or sometimes PSE is always-on. So the resume path is
> > asynchronous and unpredictable in this case. Re-initialization and
> > clean up required if PSE also boot up every Sx and might take good
> > amount of time (Host can come alive before PSE comes up). Thus using
> asynchronous reset notification.
> 
> Ok, so I guess we need to live with the ugly deregister + re-register OpRegion
> dance for the devices where the PSE is shutdown during suspend.
> 
> You also write: "sometimes PSE is always-on", what about that case, I assume
> in this case there will be no reset after resume? So then unregistering the
> OpRegion handler at suspend (and setting link_ready=false) will be wrong
> since without the reset the OpRegion handler will never get reinstalled ?

reset is called from bus after every resume. When PSE is always-on, the notification
comes quicky, where as there reset comes bit later in case of new boot.

> 
> Regards,
> 
> Hans

Thanks again for the comments. Let me know if you have further review comments. I'll
work on v2. 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver
  2021-06-03 16:48       ` K Naduvalath, Sumesh
@ 2021-06-03 17:05         ` Hans de Goede
  0 siblings, 0 replies; 6+ messages in thread
From: Hans de Goede @ 2021-06-03 17:05 UTC (permalink / raw)
  To: K Naduvalath, Sumesh, mgross, srinivas.pandruvada
  Cc: Pandruvada, Srinivas, platform-driver-x86, linux-kernel, Chinnu,
	Ganapathi, Kumar, Nachiketa

Hi,

On 6/3/21 6:48 PM, K Naduvalath, Sumesh wrote:
> Thank you Hans for the scrutiny. Please find my comments inline.

<snip>

>>> ISH Firmware (the PSE subsystem) can boot up/reinitialize Every Sx
>>> based on usecase or sometimes PSE is always-on. So the resume path is
>>> asynchronous and unpredictable in this case. Re-initialization and
>>> clean up required if PSE also boot up every Sx and might take good
>>> amount of time (Host can come alive before PSE comes up). Thus using
>> asynchronous reset notification.
>>
>> Ok, so I guess we need to live with the ugly deregister + re-register OpRegion
>> dance for the devices where the PSE is shutdown during suspend.
>>
>> You also write: "sometimes PSE is always-on", what about that case, I assume
>> in this case there will be no reset after resume? So then unregistering the
>> OpRegion handler at suspend (and setting link_ready=false) will be wrong
>> since without the reset the OpRegion handler will never get reinstalled ?
> 
> reset is called from bus after every resume. When PSE is always-on, the notification
> comes quicky, where as there reset comes bit later in case of new boot.

Ok, in that case it makes sense to treat the PSE always-on scenario the same
as the scenario where it gets turned off during suspend.

Regards,

Hans


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-06-03 17:05 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-31 12:04 [PATCH 1/1] ishtp: Add support for Intel ishtp eclite driver sumesh.k.naduvalath
2021-05-31 14:54 ` Hans de Goede
2021-06-01 18:24   ` K Naduvalath, Sumesh
2021-06-02 11:19     ` Hans de Goede
2021-06-03 16:48       ` K Naduvalath, Sumesh
2021-06-03 17:05         ` Hans de Goede

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).