All of lore.kernel.org
 help / color / mirror / Atom feed
* [v2 1/2] tests/nvdimm/ndtest: Enable smart tests
@ 2021-05-17  8:40 Santosh Sivaraj
  2021-05-17  8:40 ` [v2 2/2] nvdimm/ndtest: Add support for error injection tests Santosh Sivaraj
  0 siblings, 1 reply; 2+ messages in thread
From: Santosh Sivaraj @ 2021-05-17  8:40 UTC (permalink / raw)
  To: Linux NVDIMM
  Cc: Shivaprasad G Bhat, Harish Sriram, Aneesh Kumar K.V, Shivaprasad G Bhat

From: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>

The patch adds necessary health related dsm command implementations for
the ndctl inject-smart and monitor tests to pass.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>
---
 tools/testing/nvdimm/test/ndtest.c | 258 +++++++++++++++++++++++++++++
 tools/testing/nvdimm/test/ndtest.h | 129 +++++++++++++++
 2 files changed, 387 insertions(+)

diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 6862915f1fb0..bb47b145466d 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -30,6 +30,8 @@ enum {
 	((1ul << ND_CMD_GET_CONFIG_SIZE) | \
 	 (1ul << ND_CMD_GET_CONFIG_DATA) | \
 	 (1ul << ND_CMD_SET_CONFIG_DATA) | \
+	 (1ul << ND_CMD_SMART_THRESHOLD) | \
+	 (1uL << ND_CMD_SMART)           | \
 	 (1ul << ND_CMD_CALL))
 
 #define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm)			\
@@ -41,6 +43,21 @@ static struct ndtest_priv *instances[NUM_INSTANCES];
 static struct class *ndtest_dimm_class;
 static struct gen_pool *ndtest_pool;
 
+static const struct nd_papr_pdsm_health health_defaults = {
+	.dimm_unarmed = 0,
+	.dimm_bad_shutdown = 0,
+	.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY,
+	.extension_flags = PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID | PDSM_DIMM_HEALTH_ALARM_VALID |
+			   PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID | PDSM_DIMM_HEALTH_SPARES_VALID |
+			   PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+	.dimm_fuel_gauge = 95,
+	.media_temperature = 23 * 16,
+	.ctrl_temperature = 25 * 16,
+	.spares = 75,
+	.alarm_flags = ND_PAPR_HEALTH_SPARE_TRIP |
+			ND_PAPR_HEALTH_TEMP_TRIP,
+};
+
 static struct ndtest_dimm dimm_group1[] = {
 	{
 		.size = DIMM_SIZE,
@@ -48,6 +65,16 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "1e5c75d2-b618-11ea-9aa3-507b9ddc0f72",
 		.physical_id = 0,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = health_defaults.extension_flags,
+		.dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+		.media_temperature = health_defaults.media_temperature,
+		.ctrl_temperature = health_defaults.ctrl_temperature,
+		.spares = health_defaults.spares,
+		.alarm_flags = health_defaults.alarm_flags,
+		.media_temperature_threshold = 40 * 16,
+		.ctrl_temperature_threshold = 30 * 16,
+		.spares_threshold = 5,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -55,6 +82,16 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "1c4d43ac-b618-11ea-be80-507b9ddc0f72",
 		.physical_id = 1,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = health_defaults.extension_flags,
+		.dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+		.media_temperature = health_defaults.media_temperature,
+		.ctrl_temperature = health_defaults.ctrl_temperature,
+		.spares = health_defaults.spares,
+		.alarm_flags = health_defaults.alarm_flags,
+		.media_temperature_threshold = 40 * 16,
+		.ctrl_temperature_threshold = 30 * 16,
+		.spares_threshold = 5,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -62,6 +99,16 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "a9f17ffc-b618-11ea-b36d-507b9ddc0f72",
 		.physical_id = 2,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = health_defaults.extension_flags,
+		.dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+		.media_temperature = health_defaults.media_temperature,
+		.ctrl_temperature = health_defaults.ctrl_temperature,
+		.spares = health_defaults.spares,
+		.alarm_flags = health_defaults.alarm_flags,
+		.media_temperature_threshold = 40 * 16,
+		.ctrl_temperature_threshold = 30 * 16,
+		.spares_threshold = 5,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -69,6 +116,16 @@ static struct ndtest_dimm dimm_group1[] = {
 		.uuid_str = "b6b83b22-b618-11ea-8aae-507b9ddc0f72",
 		.physical_id = 3,
 		.num_formats = 2,
+		.flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+		.extension_flags = health_defaults.extension_flags,
+		.dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+		.media_temperature = health_defaults.media_temperature,
+		.ctrl_temperature = health_defaults.ctrl_temperature,
+		.spares = health_defaults.spares,
+		.alarm_flags = health_defaults.alarm_flags,
+		.media_temperature_threshold = 40 * 16,
+		.ctrl_temperature_threshold = 30 * 16,
+		.spares_threshold = 5,
 	},
 	{
 		.size = DIMM_SIZE,
@@ -296,6 +353,172 @@ static int ndtest_get_config_size(struct ndtest_dimm *dimm, unsigned int buf_len
 	return 0;
 }
 
+static int ndtest_pdsm_health(struct ndtest_dimm *dimm,
+			union nd_pdsm_payload *payload,
+			unsigned int buf_len)
+{
+	struct nd_papr_pdsm_health *health = &payload->health;
+
+	if (buf_len < sizeof(health))
+		return -EINVAL;
+
+	health->extension_flags = 0;
+	health->dimm_unarmed = !!(dimm->flags & PAPR_PMEM_UNARMED_MASK);
+	health->dimm_bad_shutdown = !!(dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK);
+	health->dimm_bad_restore = !!(dimm->flags & PAPR_PMEM_BAD_RESTORE_MASK);
+	health->dimm_health = PAPR_PDSM_DIMM_HEALTHY;
+
+	if (dimm->flags & PAPR_PMEM_HEALTH_FATAL)
+		health->dimm_health = PAPR_PDSM_DIMM_FATAL;
+	else if (dimm->flags & PAPR_PMEM_HEALTH_CRITICAL)
+		health->dimm_health = PAPR_PDSM_DIMM_CRITICAL;
+	else if (dimm->flags & PAPR_PMEM_HEALTH_UNHEALTHY ||
+		 dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL)
+		health->dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
+
+	health->extension_flags = 0;
+	if (dimm->extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID) {
+		health->dimm_fuel_gauge = dimm->dimm_fuel_gauge;
+		health->extension_flags |= PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
+	}
+	if (dimm->extension_flags & PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID) {
+		health->media_temperature = dimm->media_temperature;
+		health->extension_flags |= PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID;
+	}
+	if (dimm->extension_flags & PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID) {
+		health->ctrl_temperature = dimm->ctrl_temperature;
+		health->extension_flags |= PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID;
+	}
+	if (dimm->extension_flags & PDSM_DIMM_HEALTH_SPARES_VALID) {
+		health->spares = dimm->spares;
+		health->extension_flags |= PDSM_DIMM_HEALTH_SPARES_VALID;
+	}
+	if (dimm->extension_flags & PDSM_DIMM_HEALTH_ALARM_VALID) {
+		health->alarm_flags = dimm->alarm_flags;
+		health->extension_flags |= PDSM_DIMM_HEALTH_ALARM_VALID;
+	}
+
+	return 0;
+}
+
+static void smart_notify(struct ndtest_dimm *dimm)
+{
+	struct device *bus = dimm->dev->parent;
+
+	if (((dimm->alarm_flags & ND_PAPR_HEALTH_SPARE_TRIP) &&
+	      dimm->spares <= dimm->spares_threshold) ||
+	    ((dimm->alarm_flags & ND_PAPR_HEALTH_TEMP_TRIP) &&
+	      dimm->media_temperature >= dimm->media_temperature_threshold) ||
+	    ((dimm->alarm_flags & ND_PAPR_HEALTH_CTEMP_TRIP) &&
+	     dimm->ctrl_temperature >= dimm->ctrl_temperature_threshold) ||
+	    !(dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL) ||
+	    (dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK)) {
+		device_lock(bus);
+		/* send smart notification */
+		if (dimm->notify_handle)
+			sysfs_notify_dirent(dimm->notify_handle);
+		device_unlock(bus);
+	}
+}
+
+static int ndtest_pdsm_health_inject(struct ndtest_dimm *dimm,
+				union nd_pdsm_payload *payload,
+				unsigned int buf_len)
+{
+	struct nd_papr_pdsm_health_inject *inj = &payload->inject;
+
+	if (buf_len < sizeof(inj))
+		return -EINVAL;
+
+	if (inj->flags & ND_PAPR_HEALTH_INJECT_MTEMP) {
+		if (inj->mtemp_enable)
+			dimm->media_temperature = inj->media_temperature;
+		else
+			dimm->media_temperature = health_defaults.media_temperature;
+	}
+	if (inj->flags & ND_PAPR_HEALTH_INJECT_SPARE) {
+		if (inj->spares_enable)
+			dimm->spares = inj->spares;
+		else
+			dimm->spares = health_defaults.spares;
+	}
+	if (inj->flags & ND_PAPR_HEALTH_INJECT_FATAL) {
+		if (inj->fatal_enable)
+			dimm->flags |= PAPR_PMEM_HEALTH_FATAL;
+		else
+			dimm->flags &= ~PAPR_PMEM_HEALTH_FATAL;
+	}
+	if (inj->flags & ND_PAPR_HEALTH_INJECT_SHUTDOWN) {
+		if (inj->unsafe_shutdown_enable)
+			dimm->flags |= PAPR_PMEM_SHUTDOWN_DIRTY;
+		else
+			dimm->flags &= ~PAPR_PMEM_SHUTDOWN_DIRTY;
+	}
+	smart_notify(dimm);
+	inj->status = 0;
+
+	return 0;
+}
+
+static int ndtest_pdsm_health_threshold(struct ndtest_dimm *dimm,
+			union nd_pdsm_payload *payload,
+			unsigned int buf_len)
+{
+	struct nd_papr_pdsm_health_threshold *threshold = &payload->threshold;
+
+	if (buf_len < sizeof(threshold))
+		return -EINVAL;
+
+	threshold->media_temperature = dimm->media_temperature_threshold;
+	threshold->ctrl_temperature = dimm->ctrl_temperature_threshold;
+	threshold->spares = dimm->spares_threshold;
+	threshold->alarm_control = dimm->alarm_flags;
+
+	return 0;
+}
+
+static int ndtest_pdsm_health_set_threshold(struct ndtest_dimm *dimm,
+			union nd_pdsm_payload *payload,
+			unsigned int buf_len)
+{
+	struct nd_papr_pdsm_health_threshold *threshold = &payload->threshold;
+
+	if (buf_len < sizeof(threshold))
+		return -EINVAL;
+
+	dimm->media_temperature_threshold = threshold->media_temperature;
+	dimm->ctrl_temperature_threshold = threshold->ctrl_temperature;
+	dimm->spares_threshold = threshold->spares;
+	dimm->alarm_flags = threshold->alarm_control;
+
+	smart_notify(dimm);
+
+	return 0;
+}
+
+static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
+			   void *buf)
+{
+	struct nd_cmd_pkg *call_pkg = buf;
+	unsigned int len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+	struct nd_pkg_pdsm *pdsm = (struct nd_pkg_pdsm *) call_pkg->nd_payload;
+	union nd_pdsm_payload *payload = &(pdsm->payload);
+	unsigned int func = call_pkg->nd_command;
+
+	switch (func) {
+	case PAPR_PDSM_HEALTH:
+		return ndtest_pdsm_health(dimm, payload, len);
+	case PAPR_PDSM_HEALTH_INJECT:
+		return ndtest_pdsm_health_inject(dimm, payload, len);
+	case PAPR_PDSM_HEALTH_THRESHOLD:
+		return ndtest_pdsm_health_threshold(dimm, payload, len);
+	case PAPR_PDSM_HEALTH_THRESHOLD_SET:
+		return ndtest_pdsm_health_set_threshold(dimm, payload, len);
+	}
+
+	return 0;
+}
+
 static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		      struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		      unsigned int buf_len, int *cmd_rc)
@@ -325,6 +548,9 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
 	case ND_CMD_SET_CONFIG_DATA:
 		*cmd_rc = ndtest_config_set(dimm, buf_len, buf);
 		break;
+	case ND_CMD_CALL:
+		*cmd_rc = ndtest_dimm_cmd_call(dimm, buf_len, buf);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -826,6 +1052,20 @@ static ssize_t flags_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(flags);
 
+#define PAPR_PMEM_DIMM_CMD_MASK				\
+	 ((1U << PAPR_PDSM_HEALTH)			\
+	 | (1U << PAPR_PDSM_HEALTH_INJECT)		\
+	 | (1U << PAPR_PDSM_HEALTH_THRESHOLD)		\
+	 | (1U << PAPR_PDSM_HEALTH_THRESHOLD_SET))
+
+
+static ssize_t dsm_mask_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%#x\n", PAPR_PMEM_DIMM_CMD_MASK);
+}
+static DEVICE_ATTR_RO(dsm_mask);
+
 static struct attribute *ndtest_nvdimm_attributes[] = {
 	&dev_attr_nvdimm_show_handle.attr,
 	&dev_attr_vendor.attr,
@@ -837,6 +1077,7 @@ static struct attribute *ndtest_nvdimm_attributes[] = {
 	&dev_attr_format.attr,
 	&dev_attr_format1.attr,
 	&dev_attr_flags.attr,
+	&dev_attr_dsm_mask.attr,
 	NULL,
 };
 
@@ -856,6 +1097,7 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
 {
 	struct device *dev = &priv->pdev.dev;
 	unsigned long dimm_flags = dimm->flags;
+	struct kernfs_node *papr_kernfs;
 
 	if (dimm->num_formats > 1) {
 		set_bit(NDD_ALIASING, &dimm_flags);
@@ -882,6 +1124,20 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
 		return -ENOMEM;
 	}
 
+	nd_synchronize();
+
+	papr_kernfs = sysfs_get_dirent(nvdimm_kobj(dimm->nvdimm)->sd, "papr");
+	if (!papr_kernfs) {
+		pr_err("Could not initialize the notifier handle\n");
+		return 0;
+	}
+
+	dimm->notify_handle = sysfs_get_dirent(papr_kernfs, "flags");
+	sysfs_put(papr_kernfs);
+	if (!dimm->notify_handle) {
+		pr_err("Could not initialize the notifier handle\n");
+		return 0;
+	}
 	return 0;
 }
 
@@ -953,6 +1209,8 @@ static int ndtest_bus_register(struct ndtest_priv *p)
 	p->bus_desc.provider_name = NULL;
 	p->bus_desc.attr_groups = ndtest_attribute_groups;
 
+	set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
 	p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc);
 	if (!p->bus) {
 		dev_err(&p->pdev.dev, "Error creating nvdimm bus %pOF\n", p->dn);
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index 2c54c9cbb90c..d29638b6a332 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -16,6 +16,8 @@
 #define PAPR_PMEM_HEALTH_FATAL              (1ULL << (63 - 5))
 /* SCM contents cannot persist due to current platform health status */
 #define PAPR_PMEM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
 
 /* Bits status indicators for health bitmap indicating unarmed dimm */
 #define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED |		\
@@ -38,6 +40,49 @@
 
 struct ndtest_config;
 
+/* DIMM Health extension flag bits */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID                (1 << 0)
+#define PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID        (1 << 1)
+#define PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID         (1 << 2)
+#define PDSM_DIMM_HEALTH_SHUTDOWN_COUNT_VALID		(1 << 3)
+#define PDSM_DIMM_HEALTH_SPARES_VALID                   (1 << 4)
+#define PDSM_DIMM_HEALTH_ALARM_VALID                    (1 << 5)
+
+#define PAPR_PDSM_DIMM_HEALTHY           0
+
+#define ND_PAPR_HEALTH_SPARE_TRIP       (1 << 0)
+#define ND_PAPR_HEALTH_TEMP_TRIP        (1 << 1)
+#define ND_PAPR_HEALTH_CTEMP_TRIP       (1 << 2)
+
+/* DIMM Health inject flag bits */
+#define ND_PAPR_HEALTH_INJECT_MTEMP     (1 << 0)
+#define ND_PAPR_HEALTH_INJECT_SPARE     (1 << 1)
+#define ND_PAPR_HEALTH_INJECT_FATAL     (1 << 2)
+#define ND_PAPR_HEALTH_INJECT_SHUTDOWN  (1 << 3)
+
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY           0
+#define PAPR_PDSM_DIMM_UNHEALTHY         1
+#define PAPR_PDSM_DIMM_CRITICAL          2
+#define PAPR_PDSM_DIMM_FATAL             3
+
+enum papr_pdsm {
+	PAPR_PDSM_MIN = 0x0,
+	PAPR_PDSM_HEALTH,
+	PAPR_PDSM_INJECT_SET = 11,
+	PAPR_PDSM_INJECT_CLEAR = 12,
+	PAPR_PDSM_INJECT_GET = 13,
+	PAPR_PDSM_HEALTH_INJECT = 14,
+	PAPR_PDSM_HEALTH_THRESHOLD = 15,
+	PAPR_PDSM_HEALTH_THRESHOLD_SET = 16,
+	PAPR_PDSM_MAX,
+};
+
+enum dimm_type {
+	NDTEST_REGION_TYPE_PMEM = 0x0,
+	NDTEST_REGION_TYPE_BLK = 0x1,
+};
+
 struct ndtest_priv {
 	struct platform_device pdev;
 	struct device_node *dn;
@@ -80,6 +125,21 @@ struct ndtest_dimm {
 	int id;
 	int fail_cmd_code;
 	u8 no_alias;
+
+	struct kernfs_node *notify_handle;
+
+	/* SMART Health information */
+	unsigned long long extension_flags;
+	__u16 dimm_fuel_gauge;
+	__u16 media_temperature;
+	__u16 ctrl_temperature;
+	__u8 spares;
+	__u8 alarm_flags;
+
+	/* SMART Health thresholds */
+	__u16 media_temperature_threshold;
+	__u16 ctrl_temperature_threshold;
+	__u8 spares_threshold;
 };
 
 struct ndtest_mapping {
@@ -106,4 +166,73 @@ struct ndtest_config {
 	u8 num_regions;
 };
 
+#define ND_PDSM_PAYLOAD_MAX_SIZE 184
+
+struct nd_papr_pdsm_health {
+	union {
+		struct {
+			__u32 extension_flags;
+			__u8 dimm_unarmed;
+			__u8 dimm_bad_shutdown;
+			__u8 dimm_bad_restore;
+			__u8 dimm_scrubbed;
+			__u8 dimm_locked;
+			__u8 dimm_encrypted;
+			__u16 dimm_health;
+
+			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+			__u16 dimm_fuel_gauge;
+			__u16 media_temperature;
+			__u16 ctrl_temperature;
+			__u8 spares;
+			__u16 alarm_flags;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+struct nd_papr_pdsm_health_threshold {
+	union {
+		struct {
+			__u16 alarm_control;
+			__u8 spares;
+			__u16 media_temperature;
+			__u16 ctrl_temperature;
+			__u32 status;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+struct nd_papr_pdsm_health_inject {
+	union {
+		struct {
+			__u64 flags;
+			__u8 mtemp_enable;
+			__u16 media_temperature;
+			__u8 ctemp_enable;
+			__u16 ctrl_temperature;
+			__u8 spares_enable;
+			__u8 spares;
+			__u8 fatal_enable;
+			__u8 unsafe_shutdown_enable;
+			__u32 status;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+union nd_pdsm_payload {
+	struct nd_papr_pdsm_health health;
+	struct nd_papr_pdsm_health_inject inject;
+	struct nd_papr_pdsm_health_threshold threshold;
+	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+} __packed;
+
+struct nd_pkg_pdsm {
+	__s32 cmd_status;       /* Out: Sub-cmd status returned back */
+	__u16 reserved[2];      /* Ignored and to be set as '0' */
+	union nd_pdsm_payload payload;
+} __packed;
+
 #endif /* NDTEST_H */
-- 
2.31.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [v2 2/2] nvdimm/ndtest: Add support for error injection tests
  2021-05-17  8:40 [v2 1/2] tests/nvdimm/ndtest: Enable smart tests Santosh Sivaraj
@ 2021-05-17  8:40 ` Santosh Sivaraj
  0 siblings, 0 replies; 2+ messages in thread
From: Santosh Sivaraj @ 2021-05-17  8:40 UTC (permalink / raw)
  To: Linux NVDIMM; +Cc: Shivaprasad G Bhat, Harish Sriram, Aneesh Kumar K.V

Add necessary support for error injection family of tests on non-acpi
platforms.

Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
 tools/testing/nvdimm/test/ndtest.c | 455 ++++++++++++++++++++++++++++-
 tools/testing/nvdimm/test/ndtest.h |  25 ++
 2 files changed, 477 insertions(+), 3 deletions(-)

diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index bb47b145466d..09d98317bf4e 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define pr_fmt(fmt) "ndtest :" fmt
 
 #include <linux/platform_device.h>
 #include <linux/device.h>
@@ -42,6 +42,7 @@ static DEFINE_SPINLOCK(ndtest_lock);
 static struct ndtest_priv *instances[NUM_INSTANCES];
 static struct class *ndtest_dimm_class;
 static struct gen_pool *ndtest_pool;
+static struct workqueue_struct *ndtest_wq;
 
 static const struct nd_papr_pdsm_health health_defaults = {
 	.dimm_unarmed = 0,
@@ -496,6 +497,139 @@ static int ndtest_pdsm_health_set_threshold(struct ndtest_dimm *dimm,
 	return 0;
 }
 
+static void ars_complete_all(struct ndtest_priv *p)
+{
+	int i;
+
+	for (i = 0; i < p->config->num_regions; i++) {
+		struct ndtest_region *region = &p->config->regions[i];
+
+		if (region->region)
+			nvdimm_region_notify(region->region,
+					     NVDIMM_REVALIDATE_POISON);
+	}
+}
+
+static void ndtest_scrub(struct work_struct *work)
+{
+	struct ndtest_priv *p = container_of(work, typeof(struct ndtest_priv),
+					     dwork.work);
+	struct badrange_entry *be;
+	int rc, i = 0;
+
+	spin_lock(&p->badrange.lock);
+	list_for_each_entry(be, &p->badrange.list, list) {
+		rc = nvdimm_bus_add_badrange(p->bus, be->start, be->length);
+		if (rc)
+			dev_err(&p->pdev.dev, "Failed to process ARS records\n");
+		else
+			i++;
+	}
+	spin_unlock(&p->badrange.lock);
+
+	if (i == 0) {
+		queue_delayed_work(ndtest_wq, &p->dwork, HZ);
+		return;
+	}
+
+	ars_complete_all(p);
+	p->scrub_count++;
+
+	mutex_lock(&p->ars_lock);
+	sysfs_notify_dirent(p->scrub_state);
+	clear_bit(ARS_BUSY, &p->scrub_flags);
+	clear_bit(ARS_POLL, &p->scrub_flags);
+	set_bit(ARS_VALID, &p->scrub_flags);
+	mutex_unlock(&p->ars_lock);
+
+}
+
+static int ndtest_scrub_notify(struct ndtest_priv *p)
+{
+	if (!test_and_set_bit(ARS_BUSY, &p->scrub_flags))
+		queue_delayed_work(ndtest_wq, &p->dwork, HZ);
+
+	return 0;
+}
+
+static int ndtest_ars_inject(struct ndtest_priv *p,
+			     struct nd_cmd_ars_err_inj *inj,
+			     unsigned int buf_len)
+{
+	int rc;
+
+	if (buf_len != sizeof(*inj)) {
+		dev_dbg(&p->bus->dev, "buflen: %u, inj size: %lu\n",
+			buf_len, sizeof(*inj));
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc =  badrange_add(&p->badrange, inj->err_inj_spa_range_base,
+			   inj->err_inj_spa_range_length);
+
+	if (inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY))
+		ndtest_scrub_notify(p);
+
+	inj->status = 0;
+
+	return 0;
+
+err:
+	inj->status = NFIT_ARS_INJECT_INVALID;
+	return rc;
+}
+
+static int ndtest_ars_inject_clear(struct ndtest_priv *p,
+				   struct nd_cmd_ars_err_inj_clr *inj,
+				   unsigned int buf_len)
+{
+	int rc;
+
+	if (buf_len != sizeof(*inj)) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	if (inj->err_inj_clr_spa_range_length <= 0) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	badrange_forget(&p->badrange, inj->err_inj_clr_spa_range_base,
+			inj->err_inj_clr_spa_range_length);
+
+	inj->status = 0;
+	return 0;
+
+err:
+	inj->status = NFIT_ARS_INJECT_INVALID;
+	return rc;
+}
+
+static int ndtest_ars_inject_status(struct ndtest_priv *p,
+				    struct nd_cmd_ars_err_inj_stat *stat,
+				    unsigned int buf_len)
+{
+	struct badrange_entry *be;
+	int max = SZ_4K / sizeof(struct nd_error_stat_query_record);
+	int i = 0;
+
+	stat->status = 0;
+	spin_lock(&p->badrange.lock);
+	list_for_each_entry(be, &p->badrange.list, list) {
+		stat->record[i].err_inj_stat_spa_range_base = be->start;
+		stat->record[i].err_inj_stat_spa_range_length = be->length;
+		i++;
+		if (i > max)
+			break;
+	}
+	spin_unlock(&p->badrange.lock);
+	stat->inj_err_rec_count = i;
+
+	return 0;
+}
+
 static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
 			   void *buf)
 {
@@ -519,6 +653,157 @@ static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
 	return 0;
 }
 
+static int ndtest_bus_cmd_call(struct nvdimm_bus_descriptor *nd_desc, void *buf,
+			       unsigned int buf_len, int *cmd_rc)
+{
+	struct nd_cmd_pkg *pkg = buf;
+	struct ndtest_priv *p = container_of(nd_desc, struct ndtest_priv,
+					     bus_desc);
+	void *payload = pkg->nd_payload;
+	unsigned int func = pkg->nd_command;
+	unsigned int len = pkg->nd_size_in + pkg->nd_size_out;
+
+	switch (func) {
+	case PAPR_PDSM_INJECT_SET:
+		return ndtest_ars_inject(p, payload, len);
+	case PAPR_PDSM_INJECT_CLEAR:
+		return ndtest_ars_inject_clear(p, payload, len);
+	case PAPR_PDSM_INJECT_GET:
+		return ndtest_ars_inject_status(p, payload, len);
+	}
+
+	return -ENOTTY;
+}
+
+static int ndtest_cmd_ars_cap(struct ndtest_priv *p, struct nd_cmd_ars_cap *cmd,
+			      unsigned int buf_len)
+{
+	int ars_recs;
+
+	if (buf_len < sizeof(*cmd))
+		return -EINVAL;
+
+	/* for testing, only store up to n records that fit within a page */
+	ars_recs = SZ_4K / sizeof(struct nd_ars_record);
+
+	cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+		+ ars_recs * sizeof(struct nd_ars_record);
+	cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+	cmd->clear_err_unit = 256;
+	p->max_ars = cmd->max_ars_out;
+
+	return 0;
+}
+
+static void post_ars_status(struct ars_state *state,
+			    struct badrange *badrange, u64 addr, u64 len)
+{
+	struct nd_cmd_ars_status *status;
+	struct nd_ars_record *record;
+	struct badrange_entry *be;
+	u64 end = addr + len - 1;
+	int i = 0;
+
+	state->deadline = jiffies + 1*HZ;
+	status = state->ars_status;
+	status->status = 0;
+	status->address = addr;
+	status->length = len;
+	status->type = ND_ARS_PERSISTENT;
+
+	spin_lock(&badrange->lock);
+	list_for_each_entry(be, &badrange->list, list) {
+		u64 be_end = be->start + be->length - 1;
+		u64 rstart, rend;
+
+		/* skip entries outside the range */
+		if (be_end < addr || be->start > end)
+			continue;
+
+		rstart = (be->start < addr) ? addr : be->start;
+		rend = (be_end < end) ? be_end : end;
+		record = &status->records[i];
+		record->handle = 0;
+		record->err_address = rstart;
+		record->length = rend - rstart + 1;
+		i++;
+	}
+	spin_unlock(&badrange->lock);
+
+	status->num_records = i;
+	status->out_length = sizeof(struct nd_cmd_ars_status)
+		+ i * sizeof(struct nd_ars_record);
+}
+
+#define NFIT_ARS_STATUS_BUSY (1 << 16)
+#define NFIT_ARS_START_BUSY 6
+
+static int ndtest_cmd_ars_start(struct ndtest_priv *priv,
+				struct nd_cmd_ars_start *start,
+				unsigned int buf_len, int *cmd_rc)
+{
+	if (buf_len < sizeof(*start))
+		return -EINVAL;
+
+	spin_lock(&priv->state.lock);
+	if (time_before(jiffies, priv->state.deadline)) {
+		start->status = NFIT_ARS_START_BUSY;
+		*cmd_rc = -EBUSY;
+	} else {
+		start->status = 0;
+		start->scrub_time = 1;
+		post_ars_status(&priv->state, &priv->badrange,
+				start->address, start->length);
+		*cmd_rc = 0;
+	}
+	spin_unlock(&priv->state.lock);
+
+	return 0;
+}
+
+static int ndtest_cmd_ars_status(struct ndtest_priv *priv,
+				 struct nd_cmd_ars_status *status,
+				 unsigned int buf_len, int *cmd_rc)
+{
+	if (buf_len < priv->state.ars_status->out_length)
+		return -EINVAL;
+
+	spin_lock(&priv->state.lock);
+	if (time_before(jiffies, priv->state.deadline)) {
+		memset(status, 0, buf_len);
+		status->status = NFIT_ARS_STATUS_BUSY;
+		status->out_length = sizeof(*status);
+		*cmd_rc = -EBUSY;
+	} else {
+		memcpy(status, priv->state.ars_status,
+		       priv->state.ars_status->out_length);
+		*cmd_rc = 0;
+	}
+	spin_unlock(&priv->state.lock);
+
+	return 0;
+}
+
+static int ndtest_cmd_clear_error(struct ndtest_priv *priv,
+				     struct nd_cmd_clear_error *inj,
+				     unsigned int buf_len, int *cmd_rc)
+{
+	const u64 mask = 255;
+
+	if (buf_len < sizeof(*inj))
+		return -EINVAL;
+
+	if ((inj->address & mask) || (inj->length & mask))
+		return -EINVAL;
+
+	badrange_forget(&priv->badrange, inj->address, inj->length);
+	inj->status = 0;
+	inj->cleared = inj->length;
+	*cmd_rc = 0;
+
+	return 0;
+}
+
 static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		      struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		      unsigned int buf_len, int *cmd_rc)
@@ -531,8 +816,32 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
 
 	*cmd_rc = 0;
 
-	if (!nvdimm)
-		return -EINVAL;
+	if (!nvdimm) {
+		struct ndtest_priv *priv;
+
+		if (!nd_desc)
+			return -ENOTTY;
+
+		priv = container_of(nd_desc, struct ndtest_priv, bus_desc);
+		switch (cmd) {
+		case ND_CMD_CALL:
+			return ndtest_bus_cmd_call(nd_desc, buf, buf_len,
+						   cmd_rc);
+		case ND_CMD_ARS_CAP:
+			return ndtest_cmd_ars_cap(priv, buf, buf_len);
+		case ND_CMD_ARS_START:
+			return ndtest_cmd_ars_start(priv, buf, buf_len, cmd_rc);
+		case ND_CMD_ARS_STATUS:
+			return ndtest_cmd_ars_status(priv, buf, buf_len,
+						     cmd_rc);
+		case ND_CMD_CLEAR_ERROR:
+			return ndtest_cmd_clear_error(priv, buf, buf_len,
+						      cmd_rc);
+		default:
+			dev_dbg(&priv->pdev.dev, "Invalid command\n");
+			return -ENOTTY;
+		}
+	}
 
 	dimm = nvdimm_provider_data(nvdimm);
 	if (!dimm)
@@ -683,6 +992,9 @@ static void *ndtest_alloc_resource(struct ndtest_priv *p, size_t size,
 		return NULL;
 
 	buf = vmalloc(size);
+	if (!buf)
+		return NULL;
+
 	if (size >= DIMM_SIZE)
 		__dma = gen_pool_alloc_algo(ndtest_pool, size,
 					    gen_pool_first_fit_align, &data);
@@ -1052,6 +1364,7 @@ static ssize_t flags_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(flags);
 
+
 #define PAPR_PMEM_DIMM_CMD_MASK				\
 	 ((1U << PAPR_PDSM_HEALTH)			\
 	 | (1U << PAPR_PDSM_HEALTH_INJECT)		\
@@ -1195,11 +1508,102 @@ static const struct attribute_group of_node_attribute_group = {
 	.attrs = of_node_attributes,
 };
 
+#define PAPR_PMEM_BUS_DSM_MASK				\
+	((1U << PAPR_PDSM_INJECT_SET)			\
+	 | (1U << PAPR_PDSM_INJECT_GET)			\
+	 | (1U << PAPR_PDSM_INJECT_CLEAR))
+
+static ssize_t bus_dsm_mask_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%#x\n", PAPR_PMEM_BUS_DSM_MASK);
+}
+static struct device_attribute dev_attr_bus_dsm_mask = {
+	.attr	= { .name = "dsm_mask", .mode = 0444 },
+	.show	= bus_dsm_mask_show,
+};
+
+static ssize_t scrub_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct ndtest_priv *p;
+	ssize_t rc = -ENXIO;
+	bool busy = 0;
+
+	device_lock(dev);
+	nd_desc = dev_get_drvdata(dev);
+	if (!nd_desc) {
+		device_unlock(dev);
+		return rc;
+	}
+
+	p = container_of(nd_desc, struct ndtest_priv, bus_desc);
+
+	mutex_lock(&p->ars_lock);
+	busy = test_bit(ARS_BUSY, &p->scrub_flags) &&
+		!test_bit(ARS_CANCEL, &p->scrub_flags);
+	rc = sprintf(buf, "%d%s", p->scrub_count, busy ? "+\n" : "\n");
+	if (busy && capable(CAP_SYS_RAWIO) &&
+	    !test_and_set_bit(ARS_POLL, &p->scrub_flags))
+		mod_delayed_work(ndtest_wq, &p->dwork, HZ);
+
+	mutex_unlock(&p->ars_lock);
+
+	device_unlock(dev);
+	return rc;
+}
+
+static ssize_t scrub_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct ndtest_priv *p;
+	ssize_t rc = 0;
+	long val;
+
+	rc = kstrtol(buf, 0, &val);
+	if (rc)
+		return rc;
+	if (val != 1)
+		return -EINVAL;
+	device_lock(dev);
+	nd_desc = dev_get_drvdata(dev);
+	if (nd_desc) {
+		p = container_of(nd_desc, struct ndtest_priv, bus_desc);
+
+		ndtest_scrub_notify(p);
+	}
+	device_unlock(dev);
+
+	return size;
+}
+static DEVICE_ATTR_RW(scrub);
+
+static struct attribute *ndtest_attributes[] = {
+	&dev_attr_bus_dsm_mask.attr,
+	&dev_attr_scrub.attr,
+	NULL,
+};
+
+static const struct attribute_group ndtest_attribute_group = {
+	.name = "papr",
+	.attrs = ndtest_attributes,
+};
+
 static const struct attribute_group *ndtest_attribute_groups[] = {
 	&of_node_attribute_group,
+	&ndtest_attribute_group,
 	NULL,
 };
 
+#define PAPR_PMEM_BUS_CMD_MASK				   \
+	(1UL << ND_CMD_ARS_CAP				   \
+	 | 1UL << ND_CMD_ARS_START			   \
+	 | 1UL << ND_CMD_ARS_STATUS			   \
+	 | 1UL << ND_CMD_CLEAR_ERROR			   \
+	 | 1UL << ND_CMD_CALL)
+
 static int ndtest_bus_register(struct ndtest_priv *p)
 {
 	p->config = &bus_configs[p->pdev.id];
@@ -1207,7 +1611,9 @@ static int ndtest_bus_register(struct ndtest_priv *p)
 	p->bus_desc.ndctl = ndtest_ctl;
 	p->bus_desc.module = THIS_MODULE;
 	p->bus_desc.provider_name = NULL;
+	p->bus_desc.cmd_mask = PAPR_PMEM_BUS_CMD_MASK;
 	p->bus_desc.attr_groups = ndtest_attribute_groups;
+	p->bus_desc.bus_family_mask = NVDIMM_FAMILY_PAPR;
 
 	set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
 
@@ -1228,6 +1634,33 @@ static int ndtest_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int ndtest_init_ars(struct ndtest_priv *p)
+{
+	struct kernfs_node *papr_node;
+	struct device *bus_dev;
+
+	p->state.ars_status = devm_kzalloc(
+		&p->pdev.dev, sizeof(struct nd_cmd_ars_status) + SZ_4K,
+		GFP_KERNEL);
+	if (!p->state.ars_status)
+		return -ENOMEM;
+
+	bus_dev = to_nvdimm_bus_dev(p->bus);
+	papr_node = sysfs_get_dirent(bus_dev->kobj.sd, "papr");
+	if (!papr_node) {
+		dev_err(&p->pdev.dev, "sysfs_get_dirent 'papr' failed\n");
+		return -ENOENT;
+	}
+
+	p->scrub_state = sysfs_get_dirent(papr_node, "scrub");
+	if (!p->scrub_state) {
+		dev_err(&p->pdev.dev, "sysfs_get_dirent 'scrub' failed\n");
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
 static int ndtest_probe(struct platform_device *pdev)
 {
 	struct ndtest_priv *p;
@@ -1252,6 +1685,10 @@ static int ndtest_probe(struct platform_device *pdev)
 	if (rc)
 		goto err;
 
+	rc = ndtest_init_ars(p);
+	if (rc)
+		goto err;
+
 	rc = devm_add_action_or_reset(&pdev->dev, put_dimms, p);
 	if (rc)
 		goto err;
@@ -1299,6 +1736,7 @@ static void cleanup_devices(void)
 	if (ndtest_pool)
 		gen_pool_destroy(ndtest_pool);
 
+	destroy_workqueue(ndtest_wq);
 
 	if (ndtest_dimm_class)
 		class_destroy(ndtest_dimm_class);
@@ -1319,6 +1757,10 @@ static __init int ndtest_init(void)
 
 	nfit_test_setup(ndtest_resource_lookup, NULL);
 
+	ndtest_wq = create_singlethread_workqueue("nfit");
+	if (!ndtest_wq)
+		return -ENOMEM;
+
 	ndtest_dimm_class = class_create(THIS_MODULE, "nfit_test_dimm");
 	if (IS_ERR(ndtest_dimm_class)) {
 		rc = PTR_ERR(ndtest_dimm_class);
@@ -1348,6 +1790,7 @@ static __init int ndtest_init(void)
 		}
 
 		INIT_LIST_HEAD(&priv->resources);
+		badrange_init(&priv->badrange);
 		pdev = &priv->pdev;
 		pdev->name = KBUILD_MODNAME;
 		pdev->id = i;
@@ -1360,6 +1803,11 @@ static __init int ndtest_init(void)
 		get_device(&pdev->dev);
 
 		instances[i] = priv;
+
+		/* Everything about ARS here */
+		INIT_DELAYED_WORK(&priv->dwork, ndtest_scrub);
+		mutex_init(&priv->ars_lock);
+		spin_lock_init(&priv->state.lock);
 	}
 
 	rc = platform_driver_register(&ndtest_driver);
@@ -1377,6 +1825,7 @@ static __init int ndtest_init(void)
 
 static __exit void ndtest_exit(void)
 {
+	flush_workqueue(ndtest_wq);
 	cleanup_devices();
 	platform_driver_unregister(&ndtest_driver);
 }
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index d29638b6a332..d92c4f3df344 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -83,17 +83,34 @@ enum dimm_type {
 	NDTEST_REGION_TYPE_BLK = 0x1,
 };
 
+struct ars_state {
+	struct nd_cmd_ars_status *ars_status;
+	unsigned long deadline;
+	spinlock_t lock;
+};
+
 struct ndtest_priv {
 	struct platform_device pdev;
 	struct device_node *dn;
 	struct list_head resources;
 	struct nvdimm_bus_descriptor bus_desc;
+	struct delayed_work dwork;
+	struct mutex ars_lock;
 	struct nvdimm_bus *bus;
 	struct ndtest_config *config;
+	struct ars_state state;
+	struct badrange badrange;
+	struct nd_cmd_ars_status *ars_status;
+	struct kernfs_node *scrub_state;
 
 	dma_addr_t *dcr_dma;
 	dma_addr_t *label_dma;
 	dma_addr_t *dimm_dma;
+
+	unsigned long scrub_flags;
+	unsigned long ars_state;
+	unsigned int max_ars;
+	int scrub_count;
 };
 
 struct ndtest_blk_mmio {
@@ -235,4 +252,12 @@ struct nd_pkg_pdsm {
 	union nd_pdsm_payload payload;
 } __packed;
 
+enum scrub_flags {
+	ARS_BUSY,
+	ARS_CANCEL,
+	ARS_VALID,
+	ARS_POLL,
+	ARS_FAILED,
+};
+
 #endif /* NDTEST_H */
-- 
2.31.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-05-17  8:40 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-17  8:40 [v2 1/2] tests/nvdimm/ndtest: Enable smart tests Santosh Sivaraj
2021-05-17  8:40 ` [v2 2/2] nvdimm/ndtest: Add support for error injection tests Santosh Sivaraj

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.