* [PATCH v2 1/2] libndctl, intel: Indicate supported smart-inject types
2021-09-07 13:58 [PATCH v2 0/2] papr: Implement initial support for injecting smart errors Shivaprasad G Bhat
@ 2021-09-07 13:59 ` Shivaprasad G Bhat
2021-09-07 13:59 ` [PATCH v2 2/2] libndctl/papr: Add limited support for inject-smart Shivaprasad G Bhat
1 sibling, 0 replies; 3+ messages in thread
From: Shivaprasad G Bhat @ 2021-09-07 13:59 UTC (permalink / raw)
To: nvdimm
Cc: aneesh.kumar, sbhat, vaibhav, dan.j.williams, ira.weiny, vishal.l.verma
From: Vaibhav Jain <vaibhav@linux.ibm.com>
Presently the inject-smart code assumes support for injecting all
smart-errors namely media-temperature, controller-temperature,
spares-remaining, fatal-health and unsafe-shutdown. This assumption
may break in case of other non-Intel NVDIMM types namely PAPR NVDIMMs
which presently only have support for injecting unsafe-shutdown and
fatal health events.
Trying to inject-smart errors on PAPR NVDIMMs causes problems as
smart_inject() prematurely exits when trying to inject
media-temperature smart-error errors out.
To fix this issue the patch proposes extending the definition of
dimm_op 'smart_inject_supported' to return bitmap of flags indicating
the type of smart-error injections supported by an NVDIMM. These types
are indicated by the newly introduced defines ND_SMART_INJECT_* . A
dimm-ops provide can return an bitmap composed of these flags back
from its implementation of 'smart_inject_supported' to indicate to
dimm_inject_smart() which type of smart-error injection it
supports. In case of an error the dimm-op is still expected to return
a negative error code back to the caller.
The patch updates intel_dimm_smart_inject_supported() to return a
bitmap composed of all ND_SMART_INJECT_* flags to indicate support for
all smart-error types.
Finally the patch also updates smart_inject() to test for specific
ND_START_INJECT_* flags before sending a smart-inject command via
dimm-provider.
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:
Since v1:
Link: https://lore.kernel.org/nvdimm/20210712173132.1205192-2-vaibhav@linux.ibm.com
* Minor update to patch description
ndctl/inject-smart.c | 33 ++++++++++++++++++++++++++-------
ndctl/lib/intel.c | 7 ++++++-
ndctl/libndctl.h | 8 ++++++++
3 files changed, 40 insertions(+), 8 deletions(-)
diff --git a/ndctl/inject-smart.c b/ndctl/inject-smart.c
index 9077bca2..ef0620f5 100644
--- a/ndctl/inject-smart.c
+++ b/ndctl/inject-smart.c
@@ -393,18 +393,26 @@ out:
} \
}
-static int smart_inject(struct ndctl_dimm *dimm)
+static int smart_inject(struct ndctl_dimm *dimm, unsigned int inject_types)
{
const char *name = ndctl_dimm_get_devname(dimm);
struct ndctl_cmd *si_cmd = NULL;
int rc = -EOPNOTSUPP;
- send_inject_val(media_temperature)
- send_inject_val(ctrl_temperature)
- send_inject_val(spares)
- send_inject_bool(fatal)
- send_inject_bool(unsafe_shutdown)
+ if (inject_types & ND_SMART_INJECT_MEDIA_TEMPERATURE)
+ send_inject_val(media_temperature);
+ if (inject_types & ND_SMART_INJECT_CTRL_TEMPERATURE)
+ send_inject_val(ctrl_temperature);
+
+ if (inject_types & ND_SMART_INJECT_SPARES_REMAINING)
+ send_inject_val(spares);
+
+ if (inject_types & ND_SMART_INJECT_HEALTH_STATE)
+ send_inject_bool(fatal);
+
+ if (inject_types & ND_SMART_INJECT_UNCLEAN_SHUTDOWN)
+ send_inject_bool(unsafe_shutdown);
out:
ndctl_cmd_unref(si_cmd);
return rc;
@@ -415,8 +423,10 @@ static int dimm_inject_smart(struct ndctl_dimm *dimm)
struct json_object *jhealth;
struct json_object *jdimms;
struct json_object *jdimm;
+ unsigned int supported_types;
int rc;
+ /* Get supported smart injection types */
rc = ndctl_dimm_smart_inject_supported(dimm);
switch (rc) {
case -ENOTTY:
@@ -431,6 +441,15 @@ static int dimm_inject_smart(struct ndctl_dimm *dimm)
error("%s: smart injection not supported by either platform firmware or the kernel.",
ndctl_dimm_get_devname(dimm));
return rc;
+ default:
+ if (rc < 0) {
+ error("%s: Unknown error %d while checking for smart injection support",
+ ndctl_dimm_get_devname(dimm), rc);
+ return rc;
+ }
+ /* Assigning to an unsigned type since rc < 0 */
+ supported_types = rc;
+ break;
}
if (sctx.op_mask & (1 << OP_SET)) {
@@ -439,7 +458,7 @@ static int dimm_inject_smart(struct ndctl_dimm *dimm)
goto out;
}
if (sctx.op_mask & (1 << OP_INJECT)) {
- rc = smart_inject(dimm);
+ rc = smart_inject(dimm, supported_types);
if (rc)
goto out;
}
diff --git a/ndctl/lib/intel.c b/ndctl/lib/intel.c
index a3df26e6..13148545 100644
--- a/ndctl/lib/intel.c
+++ b/ndctl/lib/intel.c
@@ -455,7 +455,12 @@ static int intel_dimm_smart_inject_supported(struct ndctl_dimm *dimm)
return -EIO;
}
- return 0;
+ /* Indicate all smart injection types are supported */
+ return ND_SMART_INJECT_SPARES_REMAINING |
+ ND_SMART_INJECT_MEDIA_TEMPERATURE |
+ ND_SMART_INJECT_CTRL_TEMPERATURE |
+ ND_SMART_INJECT_HEALTH_STATE |
+ ND_SMART_INJECT_UNCLEAN_SHUTDOWN;
}
static const char *intel_cmd_desc(int fn)
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index cdadd5fd..54539ac7 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -69,6 +69,13 @@ extern "C" {
#define ND_EVENT_HEALTH_STATE (1 << 3)
#define ND_EVENT_UNCLEAN_SHUTDOWN (1 << 4)
+/* Flags indicating support for various smart injection types */
+#define ND_SMART_INJECT_SPARES_REMAINING (1 << 0)
+#define ND_SMART_INJECT_MEDIA_TEMPERATURE (1 << 1)
+#define ND_SMART_INJECT_CTRL_TEMPERATURE (1 << 2)
+#define ND_SMART_INJECT_HEALTH_STATE (1 << 3)
+#define ND_SMART_INJECT_UNCLEAN_SHUTDOWN (1 << 4)
+
size_t ndctl_min_namespace_size(void);
size_t ndctl_sizeof_namespace_index(void);
size_t ndctl_sizeof_namespace_label(void);
@@ -310,6 +317,7 @@ int ndctl_cmd_smart_inject_spares(struct ndctl_cmd *cmd, bool enable,
unsigned int spares);
int ndctl_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable);
int ndctl_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd, bool enable);
+/* Returns a bitmap of ND_SMART_INJECT_* supported */
int ndctl_dimm_smart_inject_supported(struct ndctl_dimm *dimm);
struct ndctl_cmd *ndctl_dimm_cmd_new_vendor_specific(struct ndctl_dimm *dimm,
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v2 2/2] libndctl/papr: Add limited support for inject-smart
2021-09-07 13:58 [PATCH v2 0/2] papr: Implement initial support for injecting smart errors Shivaprasad G Bhat
2021-09-07 13:59 ` [PATCH v2 1/2] libndctl, intel: Indicate supported smart-inject types Shivaprasad G Bhat
@ 2021-09-07 13:59 ` Shivaprasad G Bhat
1 sibling, 0 replies; 3+ messages in thread
From: Shivaprasad G Bhat @ 2021-09-07 13:59 UTC (permalink / raw)
To: nvdimm
Cc: aneesh.kumar, sbhat, vaibhav, dan.j.williams, ira.weiny, vishal.l.verma
From: Vaibhav Jain <vaibhav@linux.ibm.com>
Implements support for ndctl inject-smart command by providing an
implementation of 'smart_inject*' dimm-ops callbacks. Presently only
support for injecting unsafe-shutdown and fatal-health states is
available.
The patch also introduce various PAPR PDSM structures that are used to
communicate the inject-smart errors to the papr_scm kernel
module. This is done via SMART_INJECT PDSM which sends a payload of
type 'struct nd_papr_pdsm_smart_inject'.
With the patch following output from ndctl inject-smart command is
expected for PAPR NVDIMMs:
$ sudo ndctl inject-smart -fU nmem0
[
{
"dev":"nmem0",
"flag_failed_flush":true,
"flag_smart_event":true,
"health":{
"health_state":"fatal",
"shutdown_state":"dirty",
"shutdown_count":0
}
}
]
$ sudo ndctl inject-smart -N nmem0
[
{
"dev":"nmem0",
"health":{
"health_state":"ok",
"shutdown_state":"clean",
"shutdown_count":0
}
}
]
The patch depends on the kernel PAPR PDSM implementation for
PDSM_SMART_INJECT posted at [1].
[1] : https://patchwork.kernel.org/project/linux-nvdimm/patch/163091917031.334.16212158243308361834.stgit@82313cf9f602/
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:
Since v1:
Link: https://lore.kernel.org/nvdimm/20210712173132.1205192-3-vaibhav@linux.ibm.com/
* Updates to patch description.
ndctl/lib/papr.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
ndctl/lib/papr_pdsm.h | 17 ++++++++++++++
2 files changed, 78 insertions(+)
diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
index 42ff200d..b797e1e5 100644
--- a/ndctl/lib/papr.c
+++ b/ndctl/lib/papr.c
@@ -221,6 +221,41 @@ static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd)
return health.dimm_bad_shutdown;
}
+static int papr_smart_inject_supported(struct ndctl_dimm *dimm)
+{
+ if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL))
+ return -EOPNOTSUPP;
+
+ if (!test_dimm_dsm(dimm, PAPR_PDSM_SMART_INJECT))
+ return -EIO;
+
+ return ND_SMART_INJECT_HEALTH_STATE | ND_SMART_INJECT_UNCLEAN_SHUTDOWN;
+}
+
+static int papr_smart_inject_valid(struct ndctl_cmd *cmd)
+{
+ if (cmd->type != ND_CMD_CALL ||
+ to_pdsm(cmd)->cmd_status != 0 ||
+ to_pdsm_cmd(cmd) != PAPR_PDSM_SMART_INJECT)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct ndctl_cmd *papr_new_smart_inject(struct ndctl_dimm *dimm)
+{
+ struct ndctl_cmd *cmd;
+
+ cmd = allocate_cmd(dimm, PAPR_PDSM_SMART_INJECT,
+ sizeof(struct nd_papr_pdsm_smart_inject));
+ if (!cmd)
+ return NULL;
+ /* Set the input payload size */
+ to_ndcmd(cmd)->nd_size_in = ND_PDSM_HDR_SIZE +
+ sizeof(struct nd_papr_pdsm_smart_inject);
+ return cmd;
+}
+
static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
{
struct nd_papr_pdsm_health health;
@@ -255,11 +290,37 @@ static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd)
return (health.extension_flags & PDSM_DIMM_DSC_VALID) ?
(health.dimm_dsc) : 0;
+}
+
+static int papr_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable)
+{
+ if (papr_smart_inject_valid(cmd) < 0)
+ return -EINVAL;
+
+ to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+ to_payload(cmd)->inject.fatal_enable = enable;
+ return 0;
+}
+
+static int papr_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd,
+ bool enable)
+{
+ if (papr_smart_inject_valid(cmd) < 0)
+ return -EINVAL;
+
+ to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+ to_payload(cmd)->inject.unsafe_shutdown_enable = enable;
+
+ return 0;
}
struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
.cmd_is_supported = papr_cmd_is_supported,
+ .new_smart_inject = papr_new_smart_inject,
+ .smart_inject_supported = papr_smart_inject_supported,
+ .smart_inject_fatal = papr_cmd_smart_inject_fatal,
+ .smart_inject_unsafe_shutdown = papr_cmd_smart_inject_unsafe_shutdown,
.smart_get_flags = papr_smart_get_flags,
.get_firmware_status = papr_get_firmware_status,
.xlat_firmware_status = papr_xlat_firmware_status,
diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
index f45b1e40..20ac20f8 100644
--- a/ndctl/lib/papr_pdsm.h
+++ b/ndctl/lib/papr_pdsm.h
@@ -121,12 +121,29 @@ struct nd_papr_pdsm_health {
enum papr_pdsm {
PAPR_PDSM_MIN = 0x0,
PAPR_PDSM_HEALTH,
+ PAPR_PDSM_SMART_INJECT,
PAPR_PDSM_MAX,
};
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+ union {
+ struct {
+ /* One or more of PDSM_SMART_INJECT_ */
+ __u32 flags;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ };
+ __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+ };
+};
/* Maximal union that can hold all possible payload types */
union nd_pdsm_payload {
struct nd_papr_pdsm_health health;
+ struct nd_papr_pdsm_smart_inject inject;
__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
} __attribute__((packed));
^ permalink raw reply related [flat|nested] 3+ messages in thread