* [v2 1/2] tests/nvdimm/ndtest: Enable smart tests
@ 2021-05-17 8:40 Santosh Sivaraj
2021-05-17 8:40 ` [v2 2/2] nvdimm/ndtest: Add support for error injection tests Santosh Sivaraj
0 siblings, 1 reply; 2+ messages in thread
From: Santosh Sivaraj @ 2021-05-17 8:40 UTC (permalink / raw)
To: Linux NVDIMM
Cc: Shivaprasad G Bhat, Harish Sriram, Aneesh Kumar K.V, Shivaprasad G Bhat
From: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>
The patch adds necessary health related dsm command implementations for
the ndctl inject-smart and monitor tests to pass.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>
---
tools/testing/nvdimm/test/ndtest.c | 258 +++++++++++++++++++++++++++++
tools/testing/nvdimm/test/ndtest.h | 129 +++++++++++++++
2 files changed, 387 insertions(+)
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 6862915f1fb0..bb47b145466d 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -30,6 +30,8 @@ enum {
((1ul << ND_CMD_GET_CONFIG_SIZE) | \
(1ul << ND_CMD_GET_CONFIG_DATA) | \
(1ul << ND_CMD_SET_CONFIG_DATA) | \
+ (1ul << ND_CMD_SMART_THRESHOLD) | \
+ (1uL << ND_CMD_SMART) | \
(1ul << ND_CMD_CALL))
#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \
@@ -41,6 +43,21 @@ static struct ndtest_priv *instances[NUM_INSTANCES];
static struct class *ndtest_dimm_class;
static struct gen_pool *ndtest_pool;
+static const struct nd_papr_pdsm_health health_defaults = {
+ .dimm_unarmed = 0,
+ .dimm_bad_shutdown = 0,
+ .dimm_health = PAPR_PDSM_DIMM_UNHEALTHY,
+ .extension_flags = PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID | PDSM_DIMM_HEALTH_ALARM_VALID |
+ PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID | PDSM_DIMM_HEALTH_SPARES_VALID |
+ PDSM_DIMM_HEALTH_RUN_GAUGE_VALID,
+ .dimm_fuel_gauge = 95,
+ .media_temperature = 23 * 16,
+ .ctrl_temperature = 25 * 16,
+ .spares = 75,
+ .alarm_flags = ND_PAPR_HEALTH_SPARE_TRIP |
+ ND_PAPR_HEALTH_TEMP_TRIP,
+};
+
static struct ndtest_dimm dimm_group1[] = {
{
.size = DIMM_SIZE,
@@ -48,6 +65,16 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "1e5c75d2-b618-11ea-9aa3-507b9ddc0f72",
.physical_id = 0,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = health_defaults.extension_flags,
+ .dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+ .media_temperature = health_defaults.media_temperature,
+ .ctrl_temperature = health_defaults.ctrl_temperature,
+ .spares = health_defaults.spares,
+ .alarm_flags = health_defaults.alarm_flags,
+ .media_temperature_threshold = 40 * 16,
+ .ctrl_temperature_threshold = 30 * 16,
+ .spares_threshold = 5,
},
{
.size = DIMM_SIZE,
@@ -55,6 +82,16 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "1c4d43ac-b618-11ea-be80-507b9ddc0f72",
.physical_id = 1,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = health_defaults.extension_flags,
+ .dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+ .media_temperature = health_defaults.media_temperature,
+ .ctrl_temperature = health_defaults.ctrl_temperature,
+ .spares = health_defaults.spares,
+ .alarm_flags = health_defaults.alarm_flags,
+ .media_temperature_threshold = 40 * 16,
+ .ctrl_temperature_threshold = 30 * 16,
+ .spares_threshold = 5,
},
{
.size = DIMM_SIZE,
@@ -62,6 +99,16 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "a9f17ffc-b618-11ea-b36d-507b9ddc0f72",
.physical_id = 2,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = health_defaults.extension_flags,
+ .dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+ .media_temperature = health_defaults.media_temperature,
+ .ctrl_temperature = health_defaults.ctrl_temperature,
+ .spares = health_defaults.spares,
+ .alarm_flags = health_defaults.alarm_flags,
+ .media_temperature_threshold = 40 * 16,
+ .ctrl_temperature_threshold = 30 * 16,
+ .spares_threshold = 5,
},
{
.size = DIMM_SIZE,
@@ -69,6 +116,16 @@ static struct ndtest_dimm dimm_group1[] = {
.uuid_str = "b6b83b22-b618-11ea-8aae-507b9ddc0f72",
.physical_id = 3,
.num_formats = 2,
+ .flags = PAPR_PMEM_HEALTH_NON_CRITICAL,
+ .extension_flags = health_defaults.extension_flags,
+ .dimm_fuel_gauge = health_defaults.dimm_fuel_gauge,
+ .media_temperature = health_defaults.media_temperature,
+ .ctrl_temperature = health_defaults.ctrl_temperature,
+ .spares = health_defaults.spares,
+ .alarm_flags = health_defaults.alarm_flags,
+ .media_temperature_threshold = 40 * 16,
+ .ctrl_temperature_threshold = 30 * 16,
+ .spares_threshold = 5,
},
{
.size = DIMM_SIZE,
@@ -296,6 +353,172 @@ static int ndtest_get_config_size(struct ndtest_dimm *dimm, unsigned int buf_len
return 0;
}
+static int ndtest_pdsm_health(struct ndtest_dimm *dimm,
+ union nd_pdsm_payload *payload,
+ unsigned int buf_len)
+{
+ struct nd_papr_pdsm_health *health = &payload->health;
+
+ if (buf_len < sizeof(health))
+ return -EINVAL;
+
+ health->extension_flags = 0;
+ health->dimm_unarmed = !!(dimm->flags & PAPR_PMEM_UNARMED_MASK);
+ health->dimm_bad_shutdown = !!(dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK);
+ health->dimm_bad_restore = !!(dimm->flags & PAPR_PMEM_BAD_RESTORE_MASK);
+ health->dimm_health = PAPR_PDSM_DIMM_HEALTHY;
+
+ if (dimm->flags & PAPR_PMEM_HEALTH_FATAL)
+ health->dimm_health = PAPR_PDSM_DIMM_FATAL;
+ else if (dimm->flags & PAPR_PMEM_HEALTH_CRITICAL)
+ health->dimm_health = PAPR_PDSM_DIMM_CRITICAL;
+ else if (dimm->flags & PAPR_PMEM_HEALTH_UNHEALTHY ||
+ dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL)
+ health->dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
+
+ health->extension_flags = 0;
+ if (dimm->extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID) {
+ health->dimm_fuel_gauge = dimm->dimm_fuel_gauge;
+ health->extension_flags |= PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
+ }
+ if (dimm->extension_flags & PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID) {
+ health->media_temperature = dimm->media_temperature;
+ health->extension_flags |= PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID;
+ }
+ if (dimm->extension_flags & PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID) {
+ health->ctrl_temperature = dimm->ctrl_temperature;
+ health->extension_flags |= PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID;
+ }
+ if (dimm->extension_flags & PDSM_DIMM_HEALTH_SPARES_VALID) {
+ health->spares = dimm->spares;
+ health->extension_flags |= PDSM_DIMM_HEALTH_SPARES_VALID;
+ }
+ if (dimm->extension_flags & PDSM_DIMM_HEALTH_ALARM_VALID) {
+ health->alarm_flags = dimm->alarm_flags;
+ health->extension_flags |= PDSM_DIMM_HEALTH_ALARM_VALID;
+ }
+
+ return 0;
+}
+
+static void smart_notify(struct ndtest_dimm *dimm)
+{
+ struct device *bus = dimm->dev->parent;
+
+ if (((dimm->alarm_flags & ND_PAPR_HEALTH_SPARE_TRIP) &&
+ dimm->spares <= dimm->spares_threshold) ||
+ ((dimm->alarm_flags & ND_PAPR_HEALTH_TEMP_TRIP) &&
+ dimm->media_temperature >= dimm->media_temperature_threshold) ||
+ ((dimm->alarm_flags & ND_PAPR_HEALTH_CTEMP_TRIP) &&
+ dimm->ctrl_temperature >= dimm->ctrl_temperature_threshold) ||
+ !(dimm->flags & PAPR_PMEM_HEALTH_NON_CRITICAL) ||
+ (dimm->flags & PAPR_PMEM_BAD_SHUTDOWN_MASK)) {
+ device_lock(bus);
+ /* send smart notification */
+ if (dimm->notify_handle)
+ sysfs_notify_dirent(dimm->notify_handle);
+ device_unlock(bus);
+ }
+}
+
+static int ndtest_pdsm_health_inject(struct ndtest_dimm *dimm,
+ union nd_pdsm_payload *payload,
+ unsigned int buf_len)
+{
+ struct nd_papr_pdsm_health_inject *inj = &payload->inject;
+
+ if (buf_len < sizeof(inj))
+ return -EINVAL;
+
+ if (inj->flags & ND_PAPR_HEALTH_INJECT_MTEMP) {
+ if (inj->mtemp_enable)
+ dimm->media_temperature = inj->media_temperature;
+ else
+ dimm->media_temperature = health_defaults.media_temperature;
+ }
+ if (inj->flags & ND_PAPR_HEALTH_INJECT_SPARE) {
+ if (inj->spares_enable)
+ dimm->spares = inj->spares;
+ else
+ dimm->spares = health_defaults.spares;
+ }
+ if (inj->flags & ND_PAPR_HEALTH_INJECT_FATAL) {
+ if (inj->fatal_enable)
+ dimm->flags |= PAPR_PMEM_HEALTH_FATAL;
+ else
+ dimm->flags &= ~PAPR_PMEM_HEALTH_FATAL;
+ }
+ if (inj->flags & ND_PAPR_HEALTH_INJECT_SHUTDOWN) {
+ if (inj->unsafe_shutdown_enable)
+ dimm->flags |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ else
+ dimm->flags &= ~PAPR_PMEM_SHUTDOWN_DIRTY;
+ }
+ smart_notify(dimm);
+ inj->status = 0;
+
+ return 0;
+}
+
+static int ndtest_pdsm_health_threshold(struct ndtest_dimm *dimm,
+ union nd_pdsm_payload *payload,
+ unsigned int buf_len)
+{
+ struct nd_papr_pdsm_health_threshold *threshold = &payload->threshold;
+
+ if (buf_len < sizeof(threshold))
+ return -EINVAL;
+
+ threshold->media_temperature = dimm->media_temperature_threshold;
+ threshold->ctrl_temperature = dimm->ctrl_temperature_threshold;
+ threshold->spares = dimm->spares_threshold;
+ threshold->alarm_control = dimm->alarm_flags;
+
+ return 0;
+}
+
+static int ndtest_pdsm_health_set_threshold(struct ndtest_dimm *dimm,
+ union nd_pdsm_payload *payload,
+ unsigned int buf_len)
+{
+ struct nd_papr_pdsm_health_threshold *threshold = &payload->threshold;
+
+ if (buf_len < sizeof(threshold))
+ return -EINVAL;
+
+ dimm->media_temperature_threshold = threshold->media_temperature;
+ dimm->ctrl_temperature_threshold = threshold->ctrl_temperature;
+ dimm->spares_threshold = threshold->spares;
+ dimm->alarm_flags = threshold->alarm_control;
+
+ smart_notify(dimm);
+
+ return 0;
+}
+
+static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
+ void *buf)
+{
+ struct nd_cmd_pkg *call_pkg = buf;
+ unsigned int len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ struct nd_pkg_pdsm *pdsm = (struct nd_pkg_pdsm *) call_pkg->nd_payload;
+ union nd_pdsm_payload *payload = &(pdsm->payload);
+ unsigned int func = call_pkg->nd_command;
+
+ switch (func) {
+ case PAPR_PDSM_HEALTH:
+ return ndtest_pdsm_health(dimm, payload, len);
+ case PAPR_PDSM_HEALTH_INJECT:
+ return ndtest_pdsm_health_inject(dimm, payload, len);
+ case PAPR_PDSM_HEALTH_THRESHOLD:
+ return ndtest_pdsm_health_threshold(dimm, payload, len);
+ case PAPR_PDSM_HEALTH_THRESHOLD_SET:
+ return ndtest_pdsm_health_set_threshold(dimm, payload, len);
+ }
+
+ return 0;
+}
+
static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -325,6 +548,9 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
case ND_CMD_SET_CONFIG_DATA:
*cmd_rc = ndtest_config_set(dimm, buf_len, buf);
break;
+ case ND_CMD_CALL:
+ *cmd_rc = ndtest_dimm_cmd_call(dimm, buf_len, buf);
+ break;
default:
return -EINVAL;
}
@@ -826,6 +1052,20 @@ static ssize_t flags_show(struct device *dev,
}
static DEVICE_ATTR_RO(flags);
+#define PAPR_PMEM_DIMM_CMD_MASK \
+ ((1U << PAPR_PDSM_HEALTH) \
+ | (1U << PAPR_PDSM_HEALTH_INJECT) \
+ | (1U << PAPR_PDSM_HEALTH_THRESHOLD) \
+ | (1U << PAPR_PDSM_HEALTH_THRESHOLD_SET))
+
+
+static ssize_t dsm_mask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%#x\n", PAPR_PMEM_DIMM_CMD_MASK);
+}
+static DEVICE_ATTR_RO(dsm_mask);
+
static struct attribute *ndtest_nvdimm_attributes[] = {
&dev_attr_nvdimm_show_handle.attr,
&dev_attr_vendor.attr,
@@ -837,6 +1077,7 @@ static struct attribute *ndtest_nvdimm_attributes[] = {
&dev_attr_format.attr,
&dev_attr_format1.attr,
&dev_attr_flags.attr,
+ &dev_attr_dsm_mask.attr,
NULL,
};
@@ -856,6 +1097,7 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
{
struct device *dev = &priv->pdev.dev;
unsigned long dimm_flags = dimm->flags;
+ struct kernfs_node *papr_kernfs;
if (dimm->num_formats > 1) {
set_bit(NDD_ALIASING, &dimm_flags);
@@ -882,6 +1124,20 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
return -ENOMEM;
}
+ nd_synchronize();
+
+ papr_kernfs = sysfs_get_dirent(nvdimm_kobj(dimm->nvdimm)->sd, "papr");
+ if (!papr_kernfs) {
+ pr_err("Could not initialize the notifier handle\n");
+ return 0;
+ }
+
+ dimm->notify_handle = sysfs_get_dirent(papr_kernfs, "flags");
+ sysfs_put(papr_kernfs);
+ if (!dimm->notify_handle) {
+ pr_err("Could not initialize the notifier handle\n");
+ return 0;
+ }
return 0;
}
@@ -953,6 +1209,8 @@ static int ndtest_bus_register(struct ndtest_priv *p)
p->bus_desc.provider_name = NULL;
p->bus_desc.attr_groups = ndtest_attribute_groups;
+ set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc);
if (!p->bus) {
dev_err(&p->pdev.dev, "Error creating nvdimm bus %pOF\n", p->dn);
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index 2c54c9cbb90c..d29638b6a332 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -16,6 +16,8 @@
#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
/* SCM contents cannot persist due to current platform health status */
#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7))
/* Bits status indicators for health bitmap indicating unarmed dimm */
#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
@@ -38,6 +40,49 @@
struct ndtest_config;
+/* DIMM Health extension flag bits */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID (1 << 0)
+#define PDSM_DIMM_HEALTH_MEDIA_TEMPERATURE_VALID (1 << 1)
+#define PDSM_DIMM_HEALTH_CTRL_TEMPERATURE_VALID (1 << 2)
+#define PDSM_DIMM_HEALTH_SHUTDOWN_COUNT_VALID (1 << 3)
+#define PDSM_DIMM_HEALTH_SPARES_VALID (1 << 4)
+#define PDSM_DIMM_HEALTH_ALARM_VALID (1 << 5)
+
+#define PAPR_PDSM_DIMM_HEALTHY 0
+
+#define ND_PAPR_HEALTH_SPARE_TRIP (1 << 0)
+#define ND_PAPR_HEALTH_TEMP_TRIP (1 << 1)
+#define ND_PAPR_HEALTH_CTEMP_TRIP (1 << 2)
+
+/* DIMM Health inject flag bits */
+#define ND_PAPR_HEALTH_INJECT_MTEMP (1 << 0)
+#define ND_PAPR_HEALTH_INJECT_SPARE (1 << 1)
+#define ND_PAPR_HEALTH_INJECT_FATAL (1 << 2)
+#define ND_PAPR_HEALTH_INJECT_SHUTDOWN (1 << 3)
+
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY 0
+#define PAPR_PDSM_DIMM_UNHEALTHY 1
+#define PAPR_PDSM_DIMM_CRITICAL 2
+#define PAPR_PDSM_DIMM_FATAL 3
+
+enum papr_pdsm {
+ PAPR_PDSM_MIN = 0x0,
+ PAPR_PDSM_HEALTH,
+ PAPR_PDSM_INJECT_SET = 11,
+ PAPR_PDSM_INJECT_CLEAR = 12,
+ PAPR_PDSM_INJECT_GET = 13,
+ PAPR_PDSM_HEALTH_INJECT = 14,
+ PAPR_PDSM_HEALTH_THRESHOLD = 15,
+ PAPR_PDSM_HEALTH_THRESHOLD_SET = 16,
+ PAPR_PDSM_MAX,
+};
+
+enum dimm_type {
+ NDTEST_REGION_TYPE_PMEM = 0x0,
+ NDTEST_REGION_TYPE_BLK = 0x1,
+};
+
struct ndtest_priv {
struct platform_device pdev;
struct device_node *dn;
@@ -80,6 +125,21 @@ struct ndtest_dimm {
int id;
int fail_cmd_code;
u8 no_alias;
+
+ struct kernfs_node *notify_handle;
+
+ /* SMART Health information */
+ unsigned long long extension_flags;
+ __u16 dimm_fuel_gauge;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u8 spares;
+ __u8 alarm_flags;
+
+ /* SMART Health thresholds */
+ __u16 media_temperature_threshold;
+ __u16 ctrl_temperature_threshold;
+ __u8 spares_threshold;
};
struct ndtest_mapping {
@@ -106,4 +166,73 @@ struct ndtest_config {
u8 num_regions;
};
+#define ND_PDSM_PAYLOAD_MAX_SIZE 184
+
+struct nd_papr_pdsm_health {
+ union {
+ struct {
+ __u32 extension_flags;
+ __u8 dimm_unarmed;
+ __u8 dimm_bad_shutdown;
+ __u8 dimm_bad_restore;
+ __u8 dimm_scrubbed;
+ __u8 dimm_locked;
+ __u8 dimm_encrypted;
+ __u16 dimm_health;
+
+ /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+ __u16 dimm_fuel_gauge;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u8 spares;
+ __u16 alarm_flags;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
+struct nd_papr_pdsm_health_threshold {
+ union {
+ struct {
+ __u16 alarm_control;
+ __u8 spares;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u32 status;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
+struct nd_papr_pdsm_health_inject {
+ union {
+ struct {
+ __u64 flags;
+ __u8 mtemp_enable;
+ __u16 media_temperature;
+ __u8 ctemp_enable;
+ __u16 ctrl_temperature;
+ __u8 spares_enable;
+ __u8 spares;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ __u32 status;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
+
+union nd_pdsm_payload {
+ struct nd_papr_pdsm_health health;
+ struct nd_papr_pdsm_health_inject inject;
+ struct nd_papr_pdsm_health_threshold threshold;
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+} __packed;
+
+struct nd_pkg_pdsm {
+ __s32 cmd_status; /* Out: Sub-cmd status returned back */
+ __u16 reserved[2]; /* Ignored and to be set as '0' */
+ union nd_pdsm_payload payload;
+} __packed;
+
#endif /* NDTEST_H */
--
2.31.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [v2 2/2] nvdimm/ndtest: Add support for error injection tests
2021-05-17 8:40 [v2 1/2] tests/nvdimm/ndtest: Enable smart tests Santosh Sivaraj
@ 2021-05-17 8:40 ` Santosh Sivaraj
0 siblings, 0 replies; 2+ messages in thread
From: Santosh Sivaraj @ 2021-05-17 8:40 UTC (permalink / raw)
To: Linux NVDIMM; +Cc: Shivaprasad G Bhat, Harish Sriram, Aneesh Kumar K.V
Add necessary support for error injection family of tests on non-acpi
platforms.
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
tools/testing/nvdimm/test/ndtest.c | 455 ++++++++++++++++++++++++++++-
tools/testing/nvdimm/test/ndtest.h | 25 ++
2 files changed, 477 insertions(+), 3 deletions(-)
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index bb47b145466d..09d98317bf4e 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define pr_fmt(fmt) "ndtest :" fmt
#include <linux/platform_device.h>
#include <linux/device.h>
@@ -42,6 +42,7 @@ static DEFINE_SPINLOCK(ndtest_lock);
static struct ndtest_priv *instances[NUM_INSTANCES];
static struct class *ndtest_dimm_class;
static struct gen_pool *ndtest_pool;
+static struct workqueue_struct *ndtest_wq;
static const struct nd_papr_pdsm_health health_defaults = {
.dimm_unarmed = 0,
@@ -496,6 +497,139 @@ static int ndtest_pdsm_health_set_threshold(struct ndtest_dimm *dimm,
return 0;
}
+static void ars_complete_all(struct ndtest_priv *p)
+{
+ int i;
+
+ for (i = 0; i < p->config->num_regions; i++) {
+ struct ndtest_region *region = &p->config->regions[i];
+
+ if (region->region)
+ nvdimm_region_notify(region->region,
+ NVDIMM_REVALIDATE_POISON);
+ }
+}
+
+static void ndtest_scrub(struct work_struct *work)
+{
+ struct ndtest_priv *p = container_of(work, typeof(struct ndtest_priv),
+ dwork.work);
+ struct badrange_entry *be;
+ int rc, i = 0;
+
+ spin_lock(&p->badrange.lock);
+ list_for_each_entry(be, &p->badrange.list, list) {
+ rc = nvdimm_bus_add_badrange(p->bus, be->start, be->length);
+ if (rc)
+ dev_err(&p->pdev.dev, "Failed to process ARS records\n");
+ else
+ i++;
+ }
+ spin_unlock(&p->badrange.lock);
+
+ if (i == 0) {
+ queue_delayed_work(ndtest_wq, &p->dwork, HZ);
+ return;
+ }
+
+ ars_complete_all(p);
+ p->scrub_count++;
+
+ mutex_lock(&p->ars_lock);
+ sysfs_notify_dirent(p->scrub_state);
+ clear_bit(ARS_BUSY, &p->scrub_flags);
+ clear_bit(ARS_POLL, &p->scrub_flags);
+ set_bit(ARS_VALID, &p->scrub_flags);
+ mutex_unlock(&p->ars_lock);
+
+}
+
+static int ndtest_scrub_notify(struct ndtest_priv *p)
+{
+ if (!test_and_set_bit(ARS_BUSY, &p->scrub_flags))
+ queue_delayed_work(ndtest_wq, &p->dwork, HZ);
+
+ return 0;
+}
+
+static int ndtest_ars_inject(struct ndtest_priv *p,
+ struct nd_cmd_ars_err_inj *inj,
+ unsigned int buf_len)
+{
+ int rc;
+
+ if (buf_len != sizeof(*inj)) {
+ dev_dbg(&p->bus->dev, "buflen: %u, inj size: %lu\n",
+ buf_len, sizeof(*inj));
+ rc = -EINVAL;
+ goto err;
+ }
+
+ rc = badrange_add(&p->badrange, inj->err_inj_spa_range_base,
+ inj->err_inj_spa_range_length);
+
+ if (inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY))
+ ndtest_scrub_notify(p);
+
+ inj->status = 0;
+
+ return 0;
+
+err:
+ inj->status = NFIT_ARS_INJECT_INVALID;
+ return rc;
+}
+
+static int ndtest_ars_inject_clear(struct ndtest_priv *p,
+ struct nd_cmd_ars_err_inj_clr *inj,
+ unsigned int buf_len)
+{
+ int rc;
+
+ if (buf_len != sizeof(*inj)) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (inj->err_inj_clr_spa_range_length <= 0) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ badrange_forget(&p->badrange, inj->err_inj_clr_spa_range_base,
+ inj->err_inj_clr_spa_range_length);
+
+ inj->status = 0;
+ return 0;
+
+err:
+ inj->status = NFIT_ARS_INJECT_INVALID;
+ return rc;
+}
+
+static int ndtest_ars_inject_status(struct ndtest_priv *p,
+ struct nd_cmd_ars_err_inj_stat *stat,
+ unsigned int buf_len)
+{
+ struct badrange_entry *be;
+ int max = SZ_4K / sizeof(struct nd_error_stat_query_record);
+ int i = 0;
+
+ stat->status = 0;
+ spin_lock(&p->badrange.lock);
+ list_for_each_entry(be, &p->badrange.list, list) {
+ stat->record[i].err_inj_stat_spa_range_base = be->start;
+ stat->record[i].err_inj_stat_spa_range_length = be->length;
+ i++;
+ if (i > max)
+ break;
+ }
+ spin_unlock(&p->badrange.lock);
+ stat->inj_err_rec_count = i;
+
+ return 0;
+}
+
static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
void *buf)
{
@@ -519,6 +653,157 @@ static int ndtest_dimm_cmd_call(struct ndtest_dimm *dimm, unsigned int buf_len,
return 0;
}
+static int ndtest_bus_cmd_call(struct nvdimm_bus_descriptor *nd_desc, void *buf,
+ unsigned int buf_len, int *cmd_rc)
+{
+ struct nd_cmd_pkg *pkg = buf;
+ struct ndtest_priv *p = container_of(nd_desc, struct ndtest_priv,
+ bus_desc);
+ void *payload = pkg->nd_payload;
+ unsigned int func = pkg->nd_command;
+ unsigned int len = pkg->nd_size_in + pkg->nd_size_out;
+
+ switch (func) {
+ case PAPR_PDSM_INJECT_SET:
+ return ndtest_ars_inject(p, payload, len);
+ case PAPR_PDSM_INJECT_CLEAR:
+ return ndtest_ars_inject_clear(p, payload, len);
+ case PAPR_PDSM_INJECT_GET:
+ return ndtest_ars_inject_status(p, payload, len);
+ }
+
+ return -ENOTTY;
+}
+
+static int ndtest_cmd_ars_cap(struct ndtest_priv *p, struct nd_cmd_ars_cap *cmd,
+ unsigned int buf_len)
+{
+ int ars_recs;
+
+ if (buf_len < sizeof(*cmd))
+ return -EINVAL;
+
+ /* for testing, only store up to n records that fit within a page */
+ ars_recs = SZ_4K / sizeof(struct nd_ars_record);
+
+ cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+ + ars_recs * sizeof(struct nd_ars_record);
+ cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+ cmd->clear_err_unit = 256;
+ p->max_ars = cmd->max_ars_out;
+
+ return 0;
+}
+
+static void post_ars_status(struct ars_state *state,
+ struct badrange *badrange, u64 addr, u64 len)
+{
+ struct nd_cmd_ars_status *status;
+ struct nd_ars_record *record;
+ struct badrange_entry *be;
+ u64 end = addr + len - 1;
+ int i = 0;
+
+ state->deadline = jiffies + 1*HZ;
+ status = state->ars_status;
+ status->status = 0;
+ status->address = addr;
+ status->length = len;
+ status->type = ND_ARS_PERSISTENT;
+
+ spin_lock(&badrange->lock);
+ list_for_each_entry(be, &badrange->list, list) {
+ u64 be_end = be->start + be->length - 1;
+ u64 rstart, rend;
+
+ /* skip entries outside the range */
+ if (be_end < addr || be->start > end)
+ continue;
+
+ rstart = (be->start < addr) ? addr : be->start;
+ rend = (be_end < end) ? be_end : end;
+ record = &status->records[i];
+ record->handle = 0;
+ record->err_address = rstart;
+ record->length = rend - rstart + 1;
+ i++;
+ }
+ spin_unlock(&badrange->lock);
+
+ status->num_records = i;
+ status->out_length = sizeof(struct nd_cmd_ars_status)
+ + i * sizeof(struct nd_ars_record);
+}
+
+#define NFIT_ARS_STATUS_BUSY (1 << 16)
+#define NFIT_ARS_START_BUSY 6
+
+static int ndtest_cmd_ars_start(struct ndtest_priv *priv,
+ struct nd_cmd_ars_start *start,
+ unsigned int buf_len, int *cmd_rc)
+{
+ if (buf_len < sizeof(*start))
+ return -EINVAL;
+
+ spin_lock(&priv->state.lock);
+ if (time_before(jiffies, priv->state.deadline)) {
+ start->status = NFIT_ARS_START_BUSY;
+ *cmd_rc = -EBUSY;
+ } else {
+ start->status = 0;
+ start->scrub_time = 1;
+ post_ars_status(&priv->state, &priv->badrange,
+ start->address, start->length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&priv->state.lock);
+
+ return 0;
+}
+
+static int ndtest_cmd_ars_status(struct ndtest_priv *priv,
+ struct nd_cmd_ars_status *status,
+ unsigned int buf_len, int *cmd_rc)
+{
+ if (buf_len < priv->state.ars_status->out_length)
+ return -EINVAL;
+
+ spin_lock(&priv->state.lock);
+ if (time_before(jiffies, priv->state.deadline)) {
+ memset(status, 0, buf_len);
+ status->status = NFIT_ARS_STATUS_BUSY;
+ status->out_length = sizeof(*status);
+ *cmd_rc = -EBUSY;
+ } else {
+ memcpy(status, priv->state.ars_status,
+ priv->state.ars_status->out_length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&priv->state.lock);
+
+ return 0;
+}
+
+static int ndtest_cmd_clear_error(struct ndtest_priv *priv,
+ struct nd_cmd_clear_error *inj,
+ unsigned int buf_len, int *cmd_rc)
+{
+ const u64 mask = 255;
+
+ if (buf_len < sizeof(*inj))
+ return -EINVAL;
+
+ if ((inj->address & mask) || (inj->length & mask))
+ return -EINVAL;
+
+ badrange_forget(&priv->badrange, inj->address, inj->length);
+ inj->status = 0;
+ inj->cleared = inj->length;
+ *cmd_rc = 0;
+
+ return 0;
+}
+
static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -531,8 +816,32 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
*cmd_rc = 0;
- if (!nvdimm)
- return -EINVAL;
+ if (!nvdimm) {
+ struct ndtest_priv *priv;
+
+ if (!nd_desc)
+ return -ENOTTY;
+
+ priv = container_of(nd_desc, struct ndtest_priv, bus_desc);
+ switch (cmd) {
+ case ND_CMD_CALL:
+ return ndtest_bus_cmd_call(nd_desc, buf, buf_len,
+ cmd_rc);
+ case ND_CMD_ARS_CAP:
+ return ndtest_cmd_ars_cap(priv, buf, buf_len);
+ case ND_CMD_ARS_START:
+ return ndtest_cmd_ars_start(priv, buf, buf_len, cmd_rc);
+ case ND_CMD_ARS_STATUS:
+ return ndtest_cmd_ars_status(priv, buf, buf_len,
+ cmd_rc);
+ case ND_CMD_CLEAR_ERROR:
+ return ndtest_cmd_clear_error(priv, buf, buf_len,
+ cmd_rc);
+ default:
+ dev_dbg(&priv->pdev.dev, "Invalid command\n");
+ return -ENOTTY;
+ }
+ }
dimm = nvdimm_provider_data(nvdimm);
if (!dimm)
@@ -683,6 +992,9 @@ static void *ndtest_alloc_resource(struct ndtest_priv *p, size_t size,
return NULL;
buf = vmalloc(size);
+ if (!buf)
+ return NULL;
+
if (size >= DIMM_SIZE)
__dma = gen_pool_alloc_algo(ndtest_pool, size,
gen_pool_first_fit_align, &data);
@@ -1052,6 +1364,7 @@ static ssize_t flags_show(struct device *dev,
}
static DEVICE_ATTR_RO(flags);
+
#define PAPR_PMEM_DIMM_CMD_MASK \
((1U << PAPR_PDSM_HEALTH) \
| (1U << PAPR_PDSM_HEALTH_INJECT) \
@@ -1195,11 +1508,102 @@ static const struct attribute_group of_node_attribute_group = {
.attrs = of_node_attributes,
};
+#define PAPR_PMEM_BUS_DSM_MASK \
+ ((1U << PAPR_PDSM_INJECT_SET) \
+ | (1U << PAPR_PDSM_INJECT_GET) \
+ | (1U << PAPR_PDSM_INJECT_CLEAR))
+
+static ssize_t bus_dsm_mask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%#x\n", PAPR_PMEM_BUS_DSM_MASK);
+}
+static struct device_attribute dev_attr_bus_dsm_mask = {
+ .attr = { .name = "dsm_mask", .mode = 0444 },
+ .show = bus_dsm_mask_show,
+};
+
+static ssize_t scrub_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ struct ndtest_priv *p;
+ ssize_t rc = -ENXIO;
+ bool busy = 0;
+
+ device_lock(dev);
+ nd_desc = dev_get_drvdata(dev);
+ if (!nd_desc) {
+ device_unlock(dev);
+ return rc;
+ }
+
+ p = container_of(nd_desc, struct ndtest_priv, bus_desc);
+
+ mutex_lock(&p->ars_lock);
+ busy = test_bit(ARS_BUSY, &p->scrub_flags) &&
+ !test_bit(ARS_CANCEL, &p->scrub_flags);
+ rc = sprintf(buf, "%d%s", p->scrub_count, busy ? "+\n" : "\n");
+ if (busy && capable(CAP_SYS_RAWIO) &&
+ !test_and_set_bit(ARS_POLL, &p->scrub_flags))
+ mod_delayed_work(ndtest_wq, &p->dwork, HZ);
+
+ mutex_unlock(&p->ars_lock);
+
+ device_unlock(dev);
+ return rc;
+}
+
+static ssize_t scrub_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ struct ndtest_priv *p;
+ ssize_t rc = 0;
+ long val;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val != 1)
+ return -EINVAL;
+ device_lock(dev);
+ nd_desc = dev_get_drvdata(dev);
+ if (nd_desc) {
+ p = container_of(nd_desc, struct ndtest_priv, bus_desc);
+
+ ndtest_scrub_notify(p);
+ }
+ device_unlock(dev);
+
+ return size;
+}
+static DEVICE_ATTR_RW(scrub);
+
+static struct attribute *ndtest_attributes[] = {
+ &dev_attr_bus_dsm_mask.attr,
+ &dev_attr_scrub.attr,
+ NULL,
+};
+
+static const struct attribute_group ndtest_attribute_group = {
+ .name = "papr",
+ .attrs = ndtest_attributes,
+};
+
static const struct attribute_group *ndtest_attribute_groups[] = {
&of_node_attribute_group,
+ &ndtest_attribute_group,
NULL,
};
+#define PAPR_PMEM_BUS_CMD_MASK \
+ (1UL << ND_CMD_ARS_CAP \
+ | 1UL << ND_CMD_ARS_START \
+ | 1UL << ND_CMD_ARS_STATUS \
+ | 1UL << ND_CMD_CLEAR_ERROR \
+ | 1UL << ND_CMD_CALL)
+
static int ndtest_bus_register(struct ndtest_priv *p)
{
p->config = &bus_configs[p->pdev.id];
@@ -1207,7 +1611,9 @@ static int ndtest_bus_register(struct ndtest_priv *p)
p->bus_desc.ndctl = ndtest_ctl;
p->bus_desc.module = THIS_MODULE;
p->bus_desc.provider_name = NULL;
+ p->bus_desc.cmd_mask = PAPR_PMEM_BUS_CMD_MASK;
p->bus_desc.attr_groups = ndtest_attribute_groups;
+ p->bus_desc.bus_family_mask = NVDIMM_FAMILY_PAPR;
set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
@@ -1228,6 +1634,33 @@ static int ndtest_remove(struct platform_device *pdev)
return 0;
}
+static int ndtest_init_ars(struct ndtest_priv *p)
+{
+ struct kernfs_node *papr_node;
+ struct device *bus_dev;
+
+ p->state.ars_status = devm_kzalloc(
+ &p->pdev.dev, sizeof(struct nd_cmd_ars_status) + SZ_4K,
+ GFP_KERNEL);
+ if (!p->state.ars_status)
+ return -ENOMEM;
+
+ bus_dev = to_nvdimm_bus_dev(p->bus);
+ papr_node = sysfs_get_dirent(bus_dev->kobj.sd, "papr");
+ if (!papr_node) {
+ dev_err(&p->pdev.dev, "sysfs_get_dirent 'papr' failed\n");
+ return -ENOENT;
+ }
+
+ p->scrub_state = sysfs_get_dirent(papr_node, "scrub");
+ if (!p->scrub_state) {
+ dev_err(&p->pdev.dev, "sysfs_get_dirent 'scrub' failed\n");
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
static int ndtest_probe(struct platform_device *pdev)
{
struct ndtest_priv *p;
@@ -1252,6 +1685,10 @@ static int ndtest_probe(struct platform_device *pdev)
if (rc)
goto err;
+ rc = ndtest_init_ars(p);
+ if (rc)
+ goto err;
+
rc = devm_add_action_or_reset(&pdev->dev, put_dimms, p);
if (rc)
goto err;
@@ -1299,6 +1736,7 @@ static void cleanup_devices(void)
if (ndtest_pool)
gen_pool_destroy(ndtest_pool);
+ destroy_workqueue(ndtest_wq);
if (ndtest_dimm_class)
class_destroy(ndtest_dimm_class);
@@ -1319,6 +1757,10 @@ static __init int ndtest_init(void)
nfit_test_setup(ndtest_resource_lookup, NULL);
+ ndtest_wq = create_singlethread_workqueue("nfit");
+ if (!ndtest_wq)
+ return -ENOMEM;
+
ndtest_dimm_class = class_create(THIS_MODULE, "nfit_test_dimm");
if (IS_ERR(ndtest_dimm_class)) {
rc = PTR_ERR(ndtest_dimm_class);
@@ -1348,6 +1790,7 @@ static __init int ndtest_init(void)
}
INIT_LIST_HEAD(&priv->resources);
+ badrange_init(&priv->badrange);
pdev = &priv->pdev;
pdev->name = KBUILD_MODNAME;
pdev->id = i;
@@ -1360,6 +1803,11 @@ static __init int ndtest_init(void)
get_device(&pdev->dev);
instances[i] = priv;
+
+ /* Everything about ARS here */
+ INIT_DELAYED_WORK(&priv->dwork, ndtest_scrub);
+ mutex_init(&priv->ars_lock);
+ spin_lock_init(&priv->state.lock);
}
rc = platform_driver_register(&ndtest_driver);
@@ -1377,6 +1825,7 @@ static __init int ndtest_init(void)
static __exit void ndtest_exit(void)
{
+ flush_workqueue(ndtest_wq);
cleanup_devices();
platform_driver_unregister(&ndtest_driver);
}
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index d29638b6a332..d92c4f3df344 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -83,17 +83,34 @@ enum dimm_type {
NDTEST_REGION_TYPE_BLK = 0x1,
};
+struct ars_state {
+ struct nd_cmd_ars_status *ars_status;
+ unsigned long deadline;
+ spinlock_t lock;
+};
+
struct ndtest_priv {
struct platform_device pdev;
struct device_node *dn;
struct list_head resources;
struct nvdimm_bus_descriptor bus_desc;
+ struct delayed_work dwork;
+ struct mutex ars_lock;
struct nvdimm_bus *bus;
struct ndtest_config *config;
+ struct ars_state state;
+ struct badrange badrange;
+ struct nd_cmd_ars_status *ars_status;
+ struct kernfs_node *scrub_state;
dma_addr_t *dcr_dma;
dma_addr_t *label_dma;
dma_addr_t *dimm_dma;
+
+ unsigned long scrub_flags;
+ unsigned long ars_state;
+ unsigned int max_ars;
+ int scrub_count;
};
struct ndtest_blk_mmio {
@@ -235,4 +252,12 @@ struct nd_pkg_pdsm {
union nd_pdsm_payload payload;
} __packed;
+enum scrub_flags {
+ ARS_BUSY,
+ ARS_CANCEL,
+ ARS_VALID,
+ ARS_POLL,
+ ARS_FAILED,
+};
+
#endif /* NDTEST_H */
--
2.31.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-05-17 8:40 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-17 8:40 [v2 1/2] tests/nvdimm/ndtest: Enable smart tests Santosh Sivaraj
2021-05-17 8:40 ` [v2 2/2] nvdimm/ndtest: Add support for error injection tests Santosh Sivaraj
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).