All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alastair D'Silva" <alastair@d-silva.org>
To: alastair@d-silva.org
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>,
	"Benjamin Herrenschmidt" <benh@kernel.crashing.org>,
	"Paul Mackerras" <paulus@samba.org>,
	"Michael Ellerman" <mpe@ellerman.id.au>,
	"Frederic Barrat" <fbarrat@linux.ibm.com>,
	"Andrew Donnellan" <ajd@linux.ibm.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Mauro Carvalho Chehab" <mchehab+samsung@kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	"Rob Herring" <robh@kernel.org>,
	"Anton Blanchard" <anton@ozlabs.org>,
	"Krzysztof Kozlowski" <krzk@kernel.org>,
	"Mahesh Salgaonkar" <mahesh@linux.vnet.ibm.com>,
	"Madhavan Srinivasan" <maddy@linux.vnet.ibm.com>,
	"Cédric Le Goater" <clg@kaod.org>,
	"Anju T Sudhakar" <anju@linux.vnet.ibm.com>,
	"Hari Bathini" <hbathini@linux.ibm.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Greg Kurz" <groug@kaod.org>,
	"Nicholas Piggin" <npiggin@gmail.com>,
	"Masahiro Yamada" <yamada.masahiro@socionext.com>,
	"Alexey Kardashevskiy" <aik@ozlabs.r>
Subject: [PATCH v4 17/25] nvdimm/ocxl: Add controller dump IOCTLs
Date: Fri, 27 Mar 2020 18:11:54 +1100	[thread overview]
Message-ID: <20200327071202.2159885-18-alastair@d-silva.org> (raw)
In-Reply-To: <20200327071202.2159885-1-alastair@d-silva.org>

This patch adds IOCTLs to allow userspace to request & fetch dumps
of the internal controller state.

This is useful during debugging or when a fatal error on the controller
has occurred.

The expected flow of operations are:
1. IOCTL_OCXL_PMEM_CONTROLLER_DUMP to request the controller to take
   a dump. This IOCTL will complete after the dump is available for
   collection.
2. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA called repeatedly to fetch
   chunks from the buffer
3. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE to notify the controller
   that it can free any internal resources used for the dump

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 drivers/nvdimm/ocxl/main.c     | 161 +++++++++++++++++++++++++++++++++
 include/uapi/nvdimm/ocxlpmem.h |  16 ++++
 2 files changed, 177 insertions(+)

diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
index e6be0029f658..d0db358ded43 100644
--- a/drivers/nvdimm/ocxl/main.c
+++ b/drivers/nvdimm/ocxl/main.c
@@ -566,6 +566,153 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
 	return 0;
 }
 
+/**
+ * controller_dump_header_parse() - Parse the first 64 bits of the controller dump command response
+ * @ocxlpmem: the device metadata
+ * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
+ */
+static int controller_dump_header_parse(struct ocxlpmem *ocxlpmem, u16 *length)
+{
+	int rc;
+	u64 val;
+	u16 data_identifier;
+	u32 data_length;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset,
+				     OCXL_LITTLE_ENDIAN, &val);
+	if (rc)
+		return rc;
+
+	data_identifier = val >> 48;
+	data_length = val & 0xFFFF;
+
+	if (data_identifier != 0x4344) { // 'CD'
+		dev_err(&ocxlpmem->dev,
+			"Bad data identifier for error log data, expected 'CD', got '%2s' (%#x), data_length=%u\n",
+			(char *)&data_identifier,
+			(unsigned int)data_identifier, data_length);
+		return -EINVAL;
+	}
+
+	*length = data_length;
+	return 0;
+}
+
+static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
+				      struct ioctl_ocxlpmem_controller_dump_data __user *uarg)
+{
+	struct ioctl_ocxlpmem_controller_dump_data args;
+	u64 __user *buf;
+	u16 i, buf_size;
+	u64 val;
+	int rc;
+
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	if (args.buf_size % sizeof(u64))
+		return -EINVAL;
+
+	if (args.buf_size > ocxlpmem->admin_command.data_size)
+		return -EINVAL;
+
+	buf = (u64 *)args.buf_ptr;
+
+	mutex_lock(&ocxlpmem->admin_command.lock);
+
+	val = ((u64)args.offset) << 32;
+	val |= args.buf_size;
+	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
+				      ocxlpmem->admin_command.request_offset + 0x08,
+				      OCXL_LITTLE_ENDIAN, val);
+	if (rc)
+		goto out;
+
+	rc = admin_command_execute(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
+	if (rc)
+		goto out;
+	if (rc != STATUS_SUCCESS) {
+		warn_status(ocxlpmem,
+			    "Unexpected status from controller dump",
+			    rc);
+		goto out;
+	}
+
+	rc = controller_dump_header_parse(ocxlpmem, &buf_size);
+	if (rc)
+		goto out;
+
+	buf_size = min((u16)(buf_size + sizeof(u64)), args.buf_size);
+
+	for (i = 0; i < buf_size / sizeof(u64); i++) {
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     ocxlpmem->admin_command.data_offset +
+							i * sizeof(u64),
+					     OCXL_HOST_ENDIAN, &val);
+		if (rc)
+			goto out;
+
+		if (copy_to_user(&buf[i], &val, sizeof(u64))) {
+			rc = -EFAULT;
+			goto out;
+		}
+	}
+
+	args.buf_size = buf_size;
+
+	if (copy_to_user(uarg, &args, sizeof(args))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = admin_response_handled(ocxlpmem);
+	if (rc)
+		goto out;
+
+out:
+	mutex_unlock(&ocxlpmem->admin_command.lock);
+	return rc;
+}
+
+int request_controller_dump(struct ocxlpmem *ocxlpmem)
+{
+	int rc;
+	u64 busy = 1;
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_CHI_CDA);
+	if (rc)
+		return rc;
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
+	if (rc)
+		return rc;
+
+	while (busy) {
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     GLOBAL_MMIO_HCI,
+					     OCXL_LITTLE_ENDIAN, &busy);
+		if (rc)
+			return rc;
+
+		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
+		cond_resched();
+	}
+
+	return 0;
+}
+
+static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
+{
+	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
+}
+
 static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 {
 	struct ocxlpmem *ocxlpmem = file->private_data;
@@ -576,7 +723,21 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 		rc = ioctl_error_log(ocxlpmem,
 				     (struct ioctl_ocxlpmem_error_log __user *)args);
 		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP:
+		rc = request_controller_dump(ocxlpmem);
+		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA:
+		rc = ioctl_controller_dump_data(ocxlpmem,
+						(struct ioctl_ocxlpmem_controller_dump_data __user *)args);
+		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE:
+		rc = ioctl_controller_dump_complete(ocxlpmem);
+		break;
 	}
+
 	return rc;
 }
 
diff --git a/include/uapi/nvdimm/ocxlpmem.h b/include/uapi/nvdimm/ocxlpmem.h
index 5d3a03ea1e08..05e2b3f7b27c 100644
--- a/include/uapi/nvdimm/ocxlpmem.h
+++ b/include/uapi/nvdimm/ocxlpmem.h
@@ -38,9 +38,25 @@ struct ioctl_ocxlpmem_error_log {
 	__u64 reserved2[2];
 };
 
+struct ioctl_ocxlpmem_controller_dump_data {
+	__u64 buf_ptr; /* coerced pointer to output buffer */
+	__u16 buf_size; /* in/out, buffer size provided/required.
+			 * If required is greater than provided, the buffer
+			 * will be truncated to the amount provided. If its
+			 * less, then only the required bytes will be populated.
+			 * If it is 0, then there is no more dump data available.
+			 */
+	__u16 reserved0;
+	__u32 offset; /* in, Offset within the dump */
+	__u64 reserved[8];
+};
+
 /* ioctl numbers */
 #define OCXLPMEM_MAGIC 0xCA
 /* OpenCAPI Persistent memory devices */
 #define IOCTL_OCXLPMEM_ERROR_LOG			_IOWR(OCXLPMEM_MAGIC, 0x30, struct ioctl_ocxlpmem_error_log)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP			_IO(OCXLPMEM_MAGIC, 0x31)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA		_IOWR(OCXLPMEM_MAGIC, 0x32, struct ioctl_ocxlpmem_controller_dump_data)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE		_IO(OCXLPMEM_MAGIC, 0x33)
 
 #endif /* _UAPI_OCXL_SCM_H */
-- 
2.24.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

WARNING: multiple messages have this Message-ID (diff)
From: "Alastair D'Silva" <alastair@d-silva.org>
To: alastair@d-silva.org
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>,
	"Oliver O'Halloran" <oohall@gmail.com>,
	"Benjamin Herrenschmidt" <benh@kernel.crashing.org>,
	"Paul Mackerras" <paulus@samba.org>,
	"Michael Ellerman" <mpe@ellerman.id.au>,
	"Frederic Barrat" <fbarrat@linux.ibm.com>,
	"Andrew Donnellan" <ajd@linux.ibm.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"Vishal Verma" <vishal.l.verma@intel.com>,
	"Dave Jiang" <dave.jiang@intel.com>,
	"Ira Weiny" <ira.weiny@intel.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Mauro Carvalho Chehab" <mchehab+samsung@kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	"Rob Herring" <robh@kernel.org>,
	"Anton Blanchard" <anton@ozlabs.org>,
	"Krzysztof Kozlowski" <krzk@kernel.org>,
	"Mahesh Salgaonkar" <mahesh@linux.vnet.ibm.com>,
	"Madhavan Srinivasan" <maddy@linux.vnet.ibm.com>,
	"Cédric Le Goater" <clg@kaod.org>,
	"Anju T Sudhakar" <anju@linux.vnet.ibm.com>,
	"Hari Bathini" <hbathini@linux.ibm.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Greg Kurz" <groug@kaod.org>,
	"Nicholas Piggin" <npiggin@gmail.com>,
	"Masahiro Yamada" <yamada.masahiro@socionext.com>,
	"Alexey Kardashevskiy" <aik@ozlabs.ru>,
	linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-nvdimm@lists.01.org, linux-mm@kvack.org
Subject: [PATCH v4 17/25] nvdimm/ocxl: Add controller dump IOCTLs
Date: Fri, 27 Mar 2020 18:11:54 +1100	[thread overview]
Message-ID: <20200327071202.2159885-18-alastair@d-silva.org> (raw)
In-Reply-To: <20200327071202.2159885-1-alastair@d-silva.org>

This patch adds IOCTLs to allow userspace to request & fetch dumps
of the internal controller state.

This is useful during debugging or when a fatal error on the controller
has occurred.

The expected flow of operations are:
1. IOCTL_OCXL_PMEM_CONTROLLER_DUMP to request the controller to take
   a dump. This IOCTL will complete after the dump is available for
   collection.
2. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA called repeatedly to fetch
   chunks from the buffer
3. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE to notify the controller
   that it can free any internal resources used for the dump

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 drivers/nvdimm/ocxl/main.c     | 161 +++++++++++++++++++++++++++++++++
 include/uapi/nvdimm/ocxlpmem.h |  16 ++++
 2 files changed, 177 insertions(+)

diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
index e6be0029f658..d0db358ded43 100644
--- a/drivers/nvdimm/ocxl/main.c
+++ b/drivers/nvdimm/ocxl/main.c
@@ -566,6 +566,153 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
 	return 0;
 }
 
+/**
+ * controller_dump_header_parse() - Parse the first 64 bits of the controller dump command response
+ * @ocxlpmem: the device metadata
+ * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
+ */
+static int controller_dump_header_parse(struct ocxlpmem *ocxlpmem, u16 *length)
+{
+	int rc;
+	u64 val;
+	u16 data_identifier;
+	u32 data_length;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset,
+				     OCXL_LITTLE_ENDIAN, &val);
+	if (rc)
+		return rc;
+
+	data_identifier = val >> 48;
+	data_length = val & 0xFFFF;
+
+	if (data_identifier != 0x4344) { // 'CD'
+		dev_err(&ocxlpmem->dev,
+			"Bad data identifier for error log data, expected 'CD', got '%2s' (%#x), data_length=%u\n",
+			(char *)&data_identifier,
+			(unsigned int)data_identifier, data_length);
+		return -EINVAL;
+	}
+
+	*length = data_length;
+	return 0;
+}
+
+static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
+				      struct ioctl_ocxlpmem_controller_dump_data __user *uarg)
+{
+	struct ioctl_ocxlpmem_controller_dump_data args;
+	u64 __user *buf;
+	u16 i, buf_size;
+	u64 val;
+	int rc;
+
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	if (args.buf_size % sizeof(u64))
+		return -EINVAL;
+
+	if (args.buf_size > ocxlpmem->admin_command.data_size)
+		return -EINVAL;
+
+	buf = (u64 *)args.buf_ptr;
+
+	mutex_lock(&ocxlpmem->admin_command.lock);
+
+	val = ((u64)args.offset) << 32;
+	val |= args.buf_size;
+	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
+				      ocxlpmem->admin_command.request_offset + 0x08,
+				      OCXL_LITTLE_ENDIAN, val);
+	if (rc)
+		goto out;
+
+	rc = admin_command_execute(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
+	if (rc)
+		goto out;
+	if (rc != STATUS_SUCCESS) {
+		warn_status(ocxlpmem,
+			    "Unexpected status from controller dump",
+			    rc);
+		goto out;
+	}
+
+	rc = controller_dump_header_parse(ocxlpmem, &buf_size);
+	if (rc)
+		goto out;
+
+	buf_size = min((u16)(buf_size + sizeof(u64)), args.buf_size);
+
+	for (i = 0; i < buf_size / sizeof(u64); i++) {
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     ocxlpmem->admin_command.data_offset +
+							i * sizeof(u64),
+					     OCXL_HOST_ENDIAN, &val);
+		if (rc)
+			goto out;
+
+		if (copy_to_user(&buf[i], &val, sizeof(u64))) {
+			rc = -EFAULT;
+			goto out;
+		}
+	}
+
+	args.buf_size = buf_size;
+
+	if (copy_to_user(uarg, &args, sizeof(args))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = admin_response_handled(ocxlpmem);
+	if (rc)
+		goto out;
+
+out:
+	mutex_unlock(&ocxlpmem->admin_command.lock);
+	return rc;
+}
+
+int request_controller_dump(struct ocxlpmem *ocxlpmem)
+{
+	int rc;
+	u64 busy = 1;
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_CHI_CDA);
+	if (rc)
+		return rc;
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
+	if (rc)
+		return rc;
+
+	while (busy) {
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     GLOBAL_MMIO_HCI,
+					     OCXL_LITTLE_ENDIAN, &busy);
+		if (rc)
+			return rc;
+
+		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
+		cond_resched();
+	}
+
+	return 0;
+}
+
+static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
+{
+	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
+}
+
 static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 {
 	struct ocxlpmem *ocxlpmem = file->private_data;
@@ -576,7 +723,21 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 		rc = ioctl_error_log(ocxlpmem,
 				     (struct ioctl_ocxlpmem_error_log __user *)args);
 		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP:
+		rc = request_controller_dump(ocxlpmem);
+		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA:
+		rc = ioctl_controller_dump_data(ocxlpmem,
+						(struct ioctl_ocxlpmem_controller_dump_data __user *)args);
+		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE:
+		rc = ioctl_controller_dump_complete(ocxlpmem);
+		break;
 	}
+
 	return rc;
 }
 
diff --git a/include/uapi/nvdimm/ocxlpmem.h b/include/uapi/nvdimm/ocxlpmem.h
index 5d3a03ea1e08..05e2b3f7b27c 100644
--- a/include/uapi/nvdimm/ocxlpmem.h
+++ b/include/uapi/nvdimm/ocxlpmem.h
@@ -38,9 +38,25 @@ struct ioctl_ocxlpmem_error_log {
 	__u64 reserved2[2];
 };
 
+struct ioctl_ocxlpmem_controller_dump_data {
+	__u64 buf_ptr; /* coerced pointer to output buffer */
+	__u16 buf_size; /* in/out, buffer size provided/required.
+			 * If required is greater than provided, the buffer
+			 * will be truncated to the amount provided. If its
+			 * less, then only the required bytes will be populated.
+			 * If it is 0, then there is no more dump data available.
+			 */
+	__u16 reserved0;
+	__u32 offset; /* in, Offset within the dump */
+	__u64 reserved[8];
+};
+
 /* ioctl numbers */
 #define OCXLPMEM_MAGIC 0xCA
 /* OpenCAPI Persistent memory devices */
 #define IOCTL_OCXLPMEM_ERROR_LOG			_IOWR(OCXLPMEM_MAGIC, 0x30, struct ioctl_ocxlpmem_error_log)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP			_IO(OCXLPMEM_MAGIC, 0x31)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA		_IOWR(OCXLPMEM_MAGIC, 0x32, struct ioctl_ocxlpmem_controller_dump_data)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE		_IO(OCXLPMEM_MAGIC, 0x33)
 
 #endif /* _UAPI_OCXL_SCM_H */
-- 
2.24.1


WARNING: multiple messages have this Message-ID (diff)
From: "Alastair D'Silva" <alastair@d-silva.org>
To: alastair@d-silva.org
Cc: "Madhavan Srinivasan" <maddy@linux.vnet.ibm.com>,
	"Alexey Kardashevskiy" <aik@ozlabs.ru>,
	"Masahiro Yamada" <yamada.masahiro@socionext.com>,
	"Oliver O'Halloran" <oohall@gmail.com>,
	"Mauro Carvalho Chehab" <mchehab+samsung@kernel.org>,
	"Ira Weiny" <ira.weiny@intel.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Rob Herring" <robh@kernel.org>,
	"Dave Jiang" <dave.jiang@intel.com>,
	linux-nvdimm@lists.01.org,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>,
	"Krzysztof Kozlowski" <krzk@kernel.org>,
	"Anju T Sudhakar" <anju@linux.vnet.ibm.com>,
	"Mahesh Salgaonkar" <mahesh@linux.vnet.ibm.com>,
	"Andrew Donnellan" <ajd@linux.ibm.com>,
	"Arnd Bergmann" <arnd@arndb.de>, "Greg Kurz" <groug@kaod.org>,
	"Nicholas Piggin" <npiggin@gmail.com>,
	"Cédric Le Goater" <clg@kaod.org>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"Hari Bathini" <hbathini@linux.ibm.com>,
	linux-mm@kvack.org,
	"Greg Kroah-Hartman" <gregkh@linuxfoundation.org>,
	linux-kernel@vger.kernel.org,
	"Vishal Verma" <vishal.l.verma@intel.com>,
	"Frederic Barrat" <fbarrat@linux.ibm.com>,
	"Paul Mackerras" <paulus@samba.org>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	linuxppc-dev@lists.ozlabs.org,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH v4 17/25] nvdimm/ocxl: Add controller dump IOCTLs
Date: Fri, 27 Mar 2020 18:11:54 +1100	[thread overview]
Message-ID: <20200327071202.2159885-18-alastair@d-silva.org> (raw)
In-Reply-To: <20200327071202.2159885-1-alastair@d-silva.org>

This patch adds IOCTLs to allow userspace to request & fetch dumps
of the internal controller state.

This is useful during debugging or when a fatal error on the controller
has occurred.

The expected flow of operations are:
1. IOCTL_OCXL_PMEM_CONTROLLER_DUMP to request the controller to take
   a dump. This IOCTL will complete after the dump is available for
   collection.
2. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA called repeatedly to fetch
   chunks from the buffer
3. IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE to notify the controller
   that it can free any internal resources used for the dump

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 drivers/nvdimm/ocxl/main.c     | 161 +++++++++++++++++++++++++++++++++
 include/uapi/nvdimm/ocxlpmem.h |  16 ++++
 2 files changed, 177 insertions(+)

diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
index e6be0029f658..d0db358ded43 100644
--- a/drivers/nvdimm/ocxl/main.c
+++ b/drivers/nvdimm/ocxl/main.c
@@ -566,6 +566,153 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
 	return 0;
 }
 
+/**
+ * controller_dump_header_parse() - Parse the first 64 bits of the controller dump command response
+ * @ocxlpmem: the device metadata
+ * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
+ */
+static int controller_dump_header_parse(struct ocxlpmem *ocxlpmem, u16 *length)
+{
+	int rc;
+	u64 val;
+	u16 data_identifier;
+	u32 data_length;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset,
+				     OCXL_LITTLE_ENDIAN, &val);
+	if (rc)
+		return rc;
+
+	data_identifier = val >> 48;
+	data_length = val & 0xFFFF;
+
+	if (data_identifier != 0x4344) { // 'CD'
+		dev_err(&ocxlpmem->dev,
+			"Bad data identifier for error log data, expected 'CD', got '%2s' (%#x), data_length=%u\n",
+			(char *)&data_identifier,
+			(unsigned int)data_identifier, data_length);
+		return -EINVAL;
+	}
+
+	*length = data_length;
+	return 0;
+}
+
+static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
+				      struct ioctl_ocxlpmem_controller_dump_data __user *uarg)
+{
+	struct ioctl_ocxlpmem_controller_dump_data args;
+	u64 __user *buf;
+	u16 i, buf_size;
+	u64 val;
+	int rc;
+
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	if (args.buf_size % sizeof(u64))
+		return -EINVAL;
+
+	if (args.buf_size > ocxlpmem->admin_command.data_size)
+		return -EINVAL;
+
+	buf = (u64 *)args.buf_ptr;
+
+	mutex_lock(&ocxlpmem->admin_command.lock);
+
+	val = ((u64)args.offset) << 32;
+	val |= args.buf_size;
+	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
+				      ocxlpmem->admin_command.request_offset + 0x08,
+				      OCXL_LITTLE_ENDIAN, val);
+	if (rc)
+		goto out;
+
+	rc = admin_command_execute(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
+	if (rc)
+		goto out;
+	if (rc != STATUS_SUCCESS) {
+		warn_status(ocxlpmem,
+			    "Unexpected status from controller dump",
+			    rc);
+		goto out;
+	}
+
+	rc = controller_dump_header_parse(ocxlpmem, &buf_size);
+	if (rc)
+		goto out;
+
+	buf_size = min((u16)(buf_size + sizeof(u64)), args.buf_size);
+
+	for (i = 0; i < buf_size / sizeof(u64); i++) {
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     ocxlpmem->admin_command.data_offset +
+							i * sizeof(u64),
+					     OCXL_HOST_ENDIAN, &val);
+		if (rc)
+			goto out;
+
+		if (copy_to_user(&buf[i], &val, sizeof(u64))) {
+			rc = -EFAULT;
+			goto out;
+		}
+	}
+
+	args.buf_size = buf_size;
+
+	if (copy_to_user(uarg, &args, sizeof(args))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = admin_response_handled(ocxlpmem);
+	if (rc)
+		goto out;
+
+out:
+	mutex_unlock(&ocxlpmem->admin_command.lock);
+	return rc;
+}
+
+int request_controller_dump(struct ocxlpmem *ocxlpmem)
+{
+	int rc;
+	u64 busy = 1;
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_CHI_CDA);
+	if (rc)
+		return rc;
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
+	if (rc)
+		return rc;
+
+	while (busy) {
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     GLOBAL_MMIO_HCI,
+					     OCXL_LITTLE_ENDIAN, &busy);
+		if (rc)
+			return rc;
+
+		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
+		cond_resched();
+	}
+
+	return 0;
+}
+
+static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
+{
+	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
+}
+
 static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 {
 	struct ocxlpmem *ocxlpmem = file->private_data;
@@ -576,7 +723,21 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 		rc = ioctl_error_log(ocxlpmem,
 				     (struct ioctl_ocxlpmem_error_log __user *)args);
 		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP:
+		rc = request_controller_dump(ocxlpmem);
+		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA:
+		rc = ioctl_controller_dump_data(ocxlpmem,
+						(struct ioctl_ocxlpmem_controller_dump_data __user *)args);
+		break;
+
+	case IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE:
+		rc = ioctl_controller_dump_complete(ocxlpmem);
+		break;
 	}
+
 	return rc;
 }
 
diff --git a/include/uapi/nvdimm/ocxlpmem.h b/include/uapi/nvdimm/ocxlpmem.h
index 5d3a03ea1e08..05e2b3f7b27c 100644
--- a/include/uapi/nvdimm/ocxlpmem.h
+++ b/include/uapi/nvdimm/ocxlpmem.h
@@ -38,9 +38,25 @@ struct ioctl_ocxlpmem_error_log {
 	__u64 reserved2[2];
 };
 
+struct ioctl_ocxlpmem_controller_dump_data {
+	__u64 buf_ptr; /* coerced pointer to output buffer */
+	__u16 buf_size; /* in/out, buffer size provided/required.
+			 * If required is greater than provided, the buffer
+			 * will be truncated to the amount provided. If its
+			 * less, then only the required bytes will be populated.
+			 * If it is 0, then there is no more dump data available.
+			 */
+	__u16 reserved0;
+	__u32 offset; /* in, Offset within the dump */
+	__u64 reserved[8];
+};
+
 /* ioctl numbers */
 #define OCXLPMEM_MAGIC 0xCA
 /* OpenCAPI Persistent memory devices */
 #define IOCTL_OCXLPMEM_ERROR_LOG			_IOWR(OCXLPMEM_MAGIC, 0x30, struct ioctl_ocxlpmem_error_log)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP			_IO(OCXLPMEM_MAGIC, 0x31)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_DATA		_IOWR(OCXLPMEM_MAGIC, 0x32, struct ioctl_ocxlpmem_controller_dump_data)
+#define IOCTL_OCXLPMEM_CONTROLLER_DUMP_COMPLETE		_IO(OCXLPMEM_MAGIC, 0x33)
 
 #endif /* _UAPI_OCXL_SCM_H */
-- 
2.24.1


  parent reply	other threads:[~2020-03-31  8:59 UTC|newest]

Thread overview: 179+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-27  7:11 [PATCH v4 00/25] Add support for OpenCAPI Persistent Memory devices Alastair D'Silva
2020-03-27  7:11 ` Alastair D'Silva
2020-03-27  7:11 ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 01/25] powerpc/powernv: Add OPAL calls for LPC memory alloc/release Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:48   ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-01 22:51     ` Alastair D'Silva
2020-04-01 22:51       ` Alastair D'Silva
2020-04-01 22:51       ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 02/25] mm/memory_hotplug: Allow check_hotplug_memory_addressable to be called from drivers Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:48   ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-02  4:33     ` Alastair D'Silva
2020-04-02  4:33       ` Alastair D'Silva
2020-04-02  4:33       ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 03/25] powerpc/powernv: Map & release OpenCAPI LPC memory Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:48   ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-02  4:36     ` Alastair D'Silva
2020-04-02  4:36       ` Alastair D'Silva
2020-04-02  4:36       ` Alastair D'Silva
2020-04-02 10:41     ` Benjamin Herrenschmidt
2020-04-02 10:41       ` Benjamin Herrenschmidt
2020-04-03  4:27       ` Michael Ellerman
2020-04-03  4:27         ` Michael Ellerman
2020-04-03  4:27         ` Michael Ellerman
2020-03-27  7:11 ` [PATCH v4 04/25] ocxl: Remove unnecessary externs Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:48   ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 05/25] ocxl: Address kernel doc errors & warnings Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:49   ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 06/25] ocxl: Tally up the LPC memory on a link & allow it to be mapped Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:48   ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-01  8:48     ` Dan Williams
2020-04-02  6:21     ` Andrew Donnellan
2020-04-02  6:21       ` Andrew Donnellan
2020-04-02  6:21       ` Andrew Donnellan
2020-03-27  7:11 ` [PATCH v4 07/25] ocxl: Add functions to map/unmap LPC memory Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:49   ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-04-03  3:50     ` Alastair D'Silva
2020-04-03  3:50       ` Alastair D'Silva
2020-04-03  3:50       ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 08/25] ocxl: Emit a log message showing how much LPC memory was detected Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01  8:49   ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-04-02  1:29     ` Joe Perches
2020-04-02  1:29       ` Joe Perches
2020-04-02  1:29       ` Joe Perches
2020-04-03  3:52     ` Alastair D'Silva
2020-04-03  3:52       ` Alastair D'Silva
2020-04-03  3:52       ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 09/25] ocxl: Save the device serial number in ocxl_fn Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 10/25] nvdimm: Add driver for OpenCAPI Persistent Memory Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-29  2:56   ` Matthew Wilcox
2020-03-29  2:56     ` Matthew Wilcox
2020-03-29  2:56     ` Matthew Wilcox
2020-03-29  2:59     ` Matthew Wilcox
2020-03-29  2:59       ` Matthew Wilcox
2020-03-29  2:59       ` Matthew Wilcox
2020-04-01  8:49   ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-04-01  8:49     ` Dan Williams
2020-04-01 19:35     ` Dan Williams
2020-04-01 19:35       ` Dan Williams
2020-04-01 19:35       ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 11/25] powerpc: Enable the OpenCAPI Persistent Memory driver for powernv_defconfig Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01 20:26   ` Dan Williams
2020-04-01 20:26     ` Dan Williams
2020-04-01 20:26     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 12/25] nvdimm/ocxl: Add register addresses & status values to the header Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-01 20:27   ` Dan Williams
2020-04-01 20:27     ` Dan Williams
2020-04-01 20:27     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 13/25] nvdimm/ocxl: Read the capability registers & wait for device ready Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-02  0:20   ` Dan Williams
2020-04-02  0:20     ` Dan Williams
2020-04-02  0:20     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 14/25] nvdimm/ocxl: Add support for Admin commands Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-02  6:41   ` Dan Williams
2020-04-02  6:41     ` Dan Williams
2020-04-02  6:41     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 15/25] nvdimm/ocxl: Register a character device for userspace to interact with Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-02  0:27   ` Dan Williams
2020-04-02  0:27     ` Dan Williams
2020-04-02  0:27     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 16/25] nvdimm/ocxl: Implement the Read Error Log command Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-03  0:54   ` Dan Williams
2020-04-03  0:54     ` Dan Williams
2020-04-03  0:54     ` Dan Williams
2020-03-27  7:11 ` Alastair D'Silva [this message]
2020-03-27  7:11   ` [PATCH v4 17/25] nvdimm/ocxl: Add controller dump IOCTLs Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 18/25] nvdimm/ocxl: Add an IOCTL to report controller statistics Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 19/25] nvdimm/ocxl: Forward events to userspace Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-04-02  2:08   ` Dan Williams
2020-04-02  2:08     ` Dan Williams
2020-04-02  2:08     ` Dan Williams
2020-03-27  7:11 ` [PATCH v4 20/25] nvdimm/ocxl: Add an IOCTL to request controller health & perf data Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 21/25] nvdimm/ocxl: Implement the heartbeat command Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11 ` [PATCH v4 22/25] nvdimm/ocxl: Add debug IOCTLs Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:11   ` Alastair D'Silva
2020-03-27  7:12 ` [PATCH v4 23/25] nvdimm/ocxl: Expose SMART data via ndctl Alastair D'Silva
2020-03-27  7:12   ` Alastair D'Silva
2020-03-27  7:12   ` Alastair D'Silva
2020-03-27  7:12 ` [PATCH v4 24/25] nvdimm/ocxl: Expose the serial number & firmware version in sysfs Alastair D'Silva
2020-03-27  7:12   ` Alastair D'Silva
2020-03-27  7:12   ` Alastair D'Silva
2020-03-27  7:12 ` [PATCH v4 25/25] MAINTAINERS: Add myself & nvdimm/ocxl to ocxl Alastair D'Silva
2020-03-27  7:12   ` Alastair D'Silva
2020-03-27  7:12   ` Alastair D'Silva
2020-04-01  8:47 ` [PATCH v4 00/25] Add support for OpenCAPI Persistent Memory devices Dan Williams
2020-04-01  8:47   ` Dan Williams
2020-04-01  8:47   ` Dan Williams
2020-04-01 22:44   ` Alastair D'Silva
2020-04-01 22:44     ` Alastair D'Silva
2020-04-01 22:44     ` Alastair D'Silva
2020-04-02  3:42     ` Michael Ellerman
2020-04-02  3:42       ` Michael Ellerman
2020-04-02  3:42       ` Michael Ellerman
2020-04-02  3:50       ` Oliver O'Halloran
2020-04-02  3:50         ` Oliver O'Halloran
2020-04-02  3:50         ` Oliver O'Halloran
2020-04-02 10:06         ` Michael Ellerman
2020-04-02 10:06           ` Michael Ellerman
2020-04-02 10:06           ` Michael Ellerman
2020-04-02 11:10           ` Greg Kurz
2020-04-02 11:10             ` Greg Kurz
2020-04-02 11:10             ` Greg Kurz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200327071202.2159885-18-alastair@d-silva.org \
    --to=alastair@d-silva.org \
    --cc=aik@ozlabs.r \
    --cc=ajd@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=anju@linux.vnet.ibm.com \
    --cc=anton@ozlabs.org \
    --cc=arnd@arndb.de \
    --cc=benh@kernel.crashing.org \
    --cc=clg@kaod.org \
    --cc=davem@davemloft.net \
    --cc=fbarrat@linux.ibm.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=groug@kaod.org \
    --cc=hbathini@linux.ibm.com \
    --cc=krzk@kernel.org \
    --cc=maddy@linux.vnet.ibm.com \
    --cc=mahesh@linux.vnet.ibm.com \
    --cc=mchehab+samsung@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=paulus@samba.org \
    --cc=robh@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=yamada.masahiro@socionext.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.