[PATCH v2 0/2] nvme: Support for FW activation without reset

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 0/2] nvme: Support for FW activation without reset
       [not found] <CGME20170529141634epcas5p3913fde49b40fcce3cc15971688820834@epcas5p3.samsung.com>
@ 2017-05-29 14:16 ` Arnav Dawn
       [not found]   ` <CGME20170529141731epcas5p39667840c01555c7cf49527167535f82c@epcas5p3.samsung.com>
       [not found]   ` <CGME20170529141807epcas1p259e2e9b04327285d18f2d62bb6ccbce1@epcas1p2.samsung.com>
  0 siblings, 2 replies; 11+ messages in thread
From: Arnav Dawn @ 2017-05-29 14:16 UTC (permalink / raw)


This series of two patches adds support for FW activation without reset.

Arnav Dawn (2):
  nvme: Adding support for FW Slot info log
  nvme: Add support for FW activation without reset

 drivers/nvme/host/core.c | 109 ++++++++++++++++++++++++++++++++++++++++-------
 drivers/nvme/host/nvme.h |   5 ++-
 drivers/nvme/host/scsi.c |  26 ++++++++---
 include/linux/nvme.h     |  10 +++++
 4 files changed, 127 insertions(+), 23 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme: Adding support for FW Slot info log
       [not found]   ` <CGME20170529141731epcas5p39667840c01555c7cf49527167535f82c@epcas5p3.samsung.com>
@ 2017-05-29 14:17     ` Arnav Dawn
  2017-05-29 17:54       ` Christoph Hellwig
  0 siblings, 1 reply; 11+ messages in thread
From: Arnav Dawn @ 2017-05-29 14:17 UTC (permalink / raw)


This adds support for FW slot info log to nvme_get_log_page.
The nvme_get_log_page takes log type as parameter. This will also
be used to clear FW slot info log for Fw activation AER.

Signed-off-by: Arnav Dawn <a.dawn at samsung.com>
---
 drivers/nvme/host/core.c | 38 +++++++++++++++++++++++---------------
 drivers/nvme/host/nvme.h |  2 +-
 drivers/nvme/host/scsi.c | 26 ++++++++++++++++++++------
 include/linux/nvme.h     |  7 +++++++
 4 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a609264..01d622e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -713,26 +713,34 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 	return ret;
 }
 
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
+int nvme_get_log_page(struct nvme_ctrl *dev, int log_type, void *log)
 {
 	struct nvme_command c = { };
-	int error;
+	int log_size;
 
-	c.common.opcode = nvme_admin_get_log_page,
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF),
-	c.common.cdw10[0] = cpu_to_le32(
-			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
-			 NVME_LOG_SMART),
+	if (!log)
+		return -EINVAL;
 
-	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
-	if (!*log)
-		return -ENOMEM;
+	c.common.opcode = nvme_admin_get_log_page;
 
-	error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
-			sizeof(struct nvme_smart_log));
-	if (error)
-		kfree(*log);
-	return error;
+	switch (log_type) {
+	case NVME_LOG_SMART:
+		log_size = sizeof(struct nvme_smart_log);
+		c.common.nsid = cpu_to_le32(0xFFFFFFFF);
+		c.common.cdw10[0] = cpu_to_le32(
+			(((log_size / 4) - 1) << 16) | NVME_LOG_SMART);
+		break;
+	case NVME_LOG_FW_SLOT:
+		log_size = sizeof(struct nvme_fw_slot_info_log);
+		c.common.nsid = cpu_to_le32(0xFFFFFFFF);
+		c.common.cdw10[0] = cpu_to_le32(
+			(((log_size / 4) - 1) << 16) | NVME_LOG_FW_SLOT);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return nvme_submit_sync_cmd(dev->admin_q, &c, log, log_size);
 }
 
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9d6a070..e78143a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -313,7 +313,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
+int nvme_get_log_page(struct nvme_ctrl *dev, int log_type, void *log);
 int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 		      void *buffer, size_t buflen, u32 *result);
 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index 1f7671e..8118677 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -846,9 +846,15 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	if (log_response == NULL)
 		return -ENOMEM;
 
-	res = nvme_get_log_page(ns->ctrl, &smart_log);
-	if (res < 0)
+	smart_log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+	if (smart_log == NULL) {
+		res = -ENOMEM;
 		goto out_free_response;
+	}
+
+	res = nvme_get_log_page(ns->ctrl, NVME_LOG_SMART, smart_log);
+	if (res < 0)
+		goto out_free_log;
 
 	if (res != NVME_SC_SUCCESS) {
 		temp_c = LOG_TEMP_UNKNOWN;
@@ -857,7 +863,6 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 				(smart_log->temperature[0]);
 		temp_c = temp_k - KELVIN_TEMP_FACTOR;
 	}
-	kfree(smart_log);
 
 	log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
 	/* Subpage=0x00, Page Length MSB=0 */
@@ -873,6 +878,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
+ out_free_log:
+	kfree(smart_log);
  out_free_response:
 	kfree(log_response);
 	return res;
@@ -893,9 +900,15 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	if (log_response == NULL)
 		return -ENOMEM;
 
-	res = nvme_get_log_page(ns->ctrl, &smart_log);
-	if (res < 0)
+	smart_log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+	if (smart_log == NULL) {
+		res = -ENOMEM;
 		goto out_free_response;
+	}
+
+	res = nvme_get_log_page(ns->ctrl, NVME_LOG_SMART, smart_log);
+	if (res < 0)
+		goto out_free_log;
 
 	if (res != NVME_SC_SUCCESS) {
 		temp_c_cur = LOG_TEMP_UNKNOWN;
@@ -904,7 +917,6 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				(smart_log->temperature[0]);
 		temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
 	}
-	kfree(smart_log);
 
 	/* Get Features for Temp Threshold */
 	res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
@@ -933,6 +945,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
+ out_free_log:
+	kfree(smart_log);
  out_free_response:
 	kfree(log_response);
 	return res;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b625bac..282fc69 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -337,6 +337,13 @@ struct nvme_smart_log {
 	__u8			rsvd216[296];
 };
 
+struct nvme_fw_slot_info_log {
+	__u8			afi;
+	__u8			rsvd1[7];
+	__le64			frs[7];
+	__u8			rsvd64[448];
+};
+
 enum {
 	NVME_SMART_CRIT_SPARE		= 1 << 0,
 	NVME_SMART_CRIT_TEMPERATURE	= 1 << 1,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme: Add support for FW activation without reset
       [not found]   ` <CGME20170529141807epcas1p259e2e9b04327285d18f2d62bb6ccbce1@epcas1p2.samsung.com>
@ 2017-05-29 14:18     ` Arnav Dawn
  2017-05-29 17:57       ` Christoph Hellwig
  2017-05-30 11:38       ` Sagi Grimberg
  0 siblings, 2 replies; 11+ messages in thread
From: Arnav Dawn @ 2017-05-29 14:18 UTC (permalink / raw)


This patch adds support for handling FW activation without reset.
On completion of FW-activation-starting AER, all queues are
paused till CSTS.PP is cleared or timed out (exceeds max time for
fw activtion MTFA). If device fails to clear CSTS.PP within MTFA,
driver issues reset controller.

Signed-off-by: Arnav Dawn <a.dawn at samsung.com>
---
 drivers/nvme/host/core.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/nvme/host/nvme.h |  3 ++
 include/linux/nvme.h     |  3 ++
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 01d622e..4537346 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1610,7 +1610,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	nvme_set_queue_limits(ctrl, ctrl->admin_q);
 	ctrl->sgls = le32_to_cpu(id->sgls);
 	ctrl->kas = le16_to_cpu(id->kas);
-
+	ctrl->mtfa = le16_to_cpu(id->mtfa);
 	ctrl->npss = id->npss;
 	prev_apsta = ctrl->apsta;
 	if (ctrl->quirks & NVME_QUIRK_NO_APST) {
@@ -2279,6 +2279,45 @@ static void nvme_async_event_work(struct work_struct *work)
 	spin_unlock_irq(&ctrl->lock);
 }
 
+static void nvme_fw_act_work(struct work_struct *work)
+{
+	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+				struct nvme_ctrl, fw_act_work);
+	struct nvme_fw_slot_info_log *log;
+	u32 csts;
+
+	nvme_stop_queues(ctrl);
+	while (1) {
+		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
+			break;
+
+		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
+				&ctrl->ctrl_config))
+			break;
+
+		if (csts == ~0 || ((ctrl->ctrl_config & NVME_CC_ENABLE)
+				&& !(csts & NVME_CSTS_PP)))
+			break;
+
+		if (time_after(jiffies, ctrl->fw_act_timeout)) {
+			dev_warn(ctrl->device,
+				"Fw activation timeout, reset controller\n");
+			ctrl->ops->reset_ctrl(ctrl);
+			break;
+		}
+		msleep(100);
+	}
+	nvme_start_queues(ctrl);
+	ctrl->fw_act_timeout = 0;
+	log =  kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
+	if (!log)
+		return;
+	if (nvme_get_log_page(ctrl, NVME_LOG_FW_SLOT, log))
+		dev_warn(ctrl->device,
+				"Get FW SLOT INFO log error\n");
+	kfree(log);
+}
+
 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		union nvme_result *res)
 {
@@ -2305,6 +2344,34 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		dev_info(ctrl->device, "rescanning\n");
 		nvme_queue_scan(ctrl);
 		break;
+	case NVME_AER_NOTICE_FW_ACT_STARTING:
+	{
+		u32 csts;
+
+		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
+			return;
+		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
+					&ctrl->ctrl_config))
+			return;
+
+		if ((ctrl->ctrl_config & NVME_CC_ENABLE)
+					&& (csts & NVME_CSTS_PP)) {
+			if (ctrl->mtfa)
+				ctrl->fw_act_timeout = jiffies +
+					msecs_to_jiffies(ctrl->mtfa * 100);
+			else
+				ctrl->fw_act_timeout = jiffies +
+					msecs_to_jiffies(admin_timeout * 1000);
+
+			schedule_delayed_work(&ctrl->fw_act_work, 0);
+		}
+		break;
+	}
+	case NVME_AER_ERR_FW_IMG_LOAD:
+		dev_warn(ctrl->device, "FW image load error\n");
+		cancel_delayed_work(&ctrl->fw_act_work);
+		ctrl->fw_act_timeout = 0;
+		break;
 	default:
 		dev_warn(ctrl->device, "async event result %08x\n", result);
 	}
@@ -2351,6 +2418,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
 {
 	flush_work(&ctrl->async_event_work);
 	flush_work(&ctrl->scan_work);
+	cancel_delayed_work(&ctrl->fw_act_work);
 	nvme_remove_namespaces(ctrl);
 
 	device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
@@ -2398,6 +2466,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	ctrl->quirks = quirks;
 	INIT_WORK(&ctrl->scan_work, nvme_scan_work);
 	INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
+	INIT_DELAYED_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
 
 	ret = nvme_set_instance(ctrl);
 	if (ret)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e78143a..082e4a2 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -141,6 +141,8 @@ struct nvme_ctrl {
 	u16 cntlid;
 
 	u32 ctrl_config;
+	u16 mtfa;
+	unsigned long fw_act_timeout;
 
 	u32 page_size;
 	u32 max_hw_sectors;
@@ -162,6 +164,7 @@ struct nvme_ctrl {
 	struct work_struct scan_work;
 	struct work_struct async_event_work;
 	struct delayed_work ka_work;
+	struct delayed_work fw_act_work;
 
 	/* Power saving configuration */
 	u64 ps_max_latency_us;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 282fc69..53b7e73 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -144,6 +144,7 @@ enum {
 	NVME_CSTS_RDY		= 1 << 0,
 	NVME_CSTS_CFS		= 1 << 1,
 	NVME_CSTS_NSSRO		= 1 << 4,
+	NVME_CSTS_PP		= 1 << 5,
 	NVME_CSTS_SHST_NORMAL	= 0 << 2,
 	NVME_CSTS_SHST_OCCUR	= 1 << 2,
 	NVME_CSTS_SHST_CMPLT	= 2 << 2,
@@ -354,6 +355,8 @@ enum {
 
 enum {
 	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
+	NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
+	NVME_AER_ERR_FW_IMG_LOAD	= 0x0500,
 };
 
 struct nvme_lba_range_type {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme: Adding support for FW Slot info log
  2017-05-29 14:17     ` [PATCH v2 1/2] nvme: Adding support for FW Slot info log Arnav Dawn
@ 2017-05-29 17:54       ` Christoph Hellwig
  2017-05-30 11:30         ` Sagi Grimberg
       [not found]         ` <CGME20170530140219epcas1p31163ac83162e9fa69328f5484e4a1daa@epcas1p3.samsung.com>
  0 siblings, 2 replies; 11+ messages in thread
From: Christoph Hellwig @ 2017-05-29 17:54 UTC (permalink / raw)


On Mon, May 29, 2017@07:47:38PM +0530, Arnav Dawn wrote:
> +int nvme_get_log_page(struct nvme_ctrl *dev, int log_type, void *log)
>  {
>  	struct nvme_command c = { };
> +	int log_size;
>  
> +	if (!log)
> +		return -EINVAL;
>  
> +	c.common.opcode = nvme_admin_get_log_page;
>  
> +	switch (log_type) {
> +	case NVME_LOG_SMART:
> +		log_size = sizeof(struct nvme_smart_log);
> +		c.common.nsid = cpu_to_le32(0xFFFFFFFF);
> +		c.common.cdw10[0] = cpu_to_le32(
> +			(((log_size / 4) - 1) << 16) | NVME_LOG_SMART);
> +		break;
> +	case NVME_LOG_FW_SLOT:
> +		log_size = sizeof(struct nvme_fw_slot_info_log);
> +		c.common.nsid = cpu_to_le32(0xFFFFFFFF);
> +		c.common.cdw10[0] = cpu_to_le32(
> +			(((log_size / 4) - 1) << 16) | NVME_LOG_FW_SLOT);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return nvme_submit_sync_cmd(dev->admin_q, &c, log, log_size);

This functioin has basically no common code.  I think the current
nvme_get_log_page should be renamed to nvme_get_smart_log and moved
to scsi.c, and you'll just add a new helper for your log page or
even just open code it in the caller.  A little inline helper for
setting up dword10 would be nice, though.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme: Add support for FW activation without reset
  2017-05-29 14:18     ` [PATCH v2 2/2] nvme: Add support for FW activation without reset Arnav Dawn
@ 2017-05-29 17:57       ` Christoph Hellwig
       [not found]         ` <CGME20170530140329epcas1p464afeb48d692b2108592a39a7593f6b3@epcas1p4.samsung.com>
  2017-05-30 11:38       ` Sagi Grimberg
  1 sibling, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2017-05-29 17:57 UTC (permalink / raw)


On Mon, May 29, 2017@07:48:18PM +0530, Arnav Dawn wrote:
> This patch adds support for handling FW activation without reset.
> On completion of FW-activation-starting AER, all queues are
> paused till CSTS.PP is cleared or timed out (exceeds max time for
> fw activtion MTFA). If device fails to clear CSTS.PP within MTFA,
> driver issues reset controller.
> 
> Signed-off-by: Arnav Dawn <a.dawn at samsung.com>
> ---
>  drivers/nvme/host/core.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/nvme/host/nvme.h |  3 ++
>  include/linux/nvme.h     |  3 ++
>  3 files changed, 76 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 01d622e..4537346 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1610,7 +1610,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>  	nvme_set_queue_limits(ctrl, ctrl->admin_q);
>  	ctrl->sgls = le32_to_cpu(id->sgls);
>  	ctrl->kas = le16_to_cpu(id->kas);
> -
> +	ctrl->mtfa = le16_to_cpu(id->mtfa);
>  	ctrl->npss = id->npss;
>  	prev_apsta = ctrl->apsta;
>  	if (ctrl->quirks & NVME_QUIRK_NO_APST) {
> @@ -2279,6 +2279,45 @@ static void nvme_async_event_work(struct work_struct *work)
>  	spin_unlock_irq(&ctrl->lock);
>  }
>  
> +static void nvme_fw_act_work(struct work_struct *work)
> +{
> +	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
> +				struct nvme_ctrl, fw_act_work);
> +	struct nvme_fw_slot_info_log *log;
> +	u32 csts;
> +
> +	nvme_stop_queues(ctrl);
> +	while (1) {
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
> +			break;
> +
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
> +				&ctrl->ctrl_config))
> +			break;
> +
> +		if (csts == ~0 || ((ctrl->ctrl_config & NVME_CC_ENABLE)
> +				&& !(csts & NVME_CSTS_PP)))
> +			break;

Can you factor these checks into a helper that your loop checks in
the while statement?

> +	nvme_start_queues(ctrl);
> +	ctrl->fw_act_timeout = 0;
> +	log =  kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
> +	if (!log)
> +		return;
> +	if (nvme_get_log_page(ctrl, NVME_LOG_FW_SLOT, log))
> +		dev_warn(ctrl->device,
> +				"Get FW SLOT INFO log error\n");
> +	kfree(log);

So we don't do anything with the log.  It might be worth having a comment
like:

	/* read the firmware activation log to clear the AER */

for readers of the code that don't know the NVMe spec inside out.

> +	case NVME_AER_NOTICE_FW_ACT_STARTING:
> +	{
> +		u32 csts;
> +
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
> +			return;
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
> +					&ctrl->ctrl_config))
> +			return;
> +
> +		if ((ctrl->ctrl_config & NVME_CC_ENABLE)
> +					&& (csts & NVME_CSTS_PP)) {
> +			if (ctrl->mtfa)
> +				ctrl->fw_act_timeout = jiffies +
> +					msecs_to_jiffies(ctrl->mtfa * 100);
> +			else
> +				ctrl->fw_act_timeout = jiffies +
> +					msecs_to_jiffies(admin_timeout * 1000);
> +
> +			schedule_delayed_work(&ctrl->fw_act_work, 0);
> +		}
> +		break;

Please split this code into a helper.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme: Adding support for FW Slot info log
  2017-05-29 17:54       ` Christoph Hellwig
@ 2017-05-30 11:30         ` Sagi Grimberg
       [not found]         ` <CGME20170530140219epcas1p31163ac83162e9fa69328f5484e4a1daa@epcas1p3.samsung.com>
  1 sibling, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2017-05-30 11:30 UTC (permalink / raw)



> This functioin has basically no common code.  I think the current
> nvme_get_log_page should be renamed to nvme_get_smart_log and moved
> to scsi.c, and you'll just add a new helper for your log page or
> even just open code it in the caller.  A little inline helper for
> setting up dword10 would be nice, though.

Agree, that'd be cleaner.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme: Add support for FW activation without reset
  2017-05-29 14:18     ` [PATCH v2 2/2] nvme: Add support for FW activation without reset Arnav Dawn
  2017-05-29 17:57       ` Christoph Hellwig
@ 2017-05-30 11:38       ` Sagi Grimberg
  2017-06-07 10:47         ` Arnav Dawn
  1 sibling, 1 reply; 11+ messages in thread
From: Sagi Grimberg @ 2017-05-30 11:38 UTC (permalink / raw)




On 29/05/17 17:18, Arnav Dawn wrote:
> This patch adds support for handling FW activation without reset.
> On completion of FW-activation-starting AER, all queues are
> paused till CSTS.PP is cleared or timed out (exceeds max time for
> fw activtion MTFA). If device fails to clear CSTS.PP within MTFA,
> driver issues reset controller.
>
> Signed-off-by: Arnav Dawn <a.dawn at samsung.com>
> ---
>  drivers/nvme/host/core.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/nvme/host/nvme.h |  3 ++
>  include/linux/nvme.h     |  3 ++
>  3 files changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 01d622e..4537346 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1610,7 +1610,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
>  	nvme_set_queue_limits(ctrl, ctrl->admin_q);
>  	ctrl->sgls = le32_to_cpu(id->sgls);
>  	ctrl->kas = le16_to_cpu(id->kas);
> -
> +	ctrl->mtfa = le16_to_cpu(id->mtfa);
>  	ctrl->npss = id->npss;
>  	prev_apsta = ctrl->apsta;
>  	if (ctrl->quirks & NVME_QUIRK_NO_APST) {
> @@ -2279,6 +2279,45 @@ static void nvme_async_event_work(struct work_struct *work)
>  	spin_unlock_irq(&ctrl->lock);
>  }
>
> +static void nvme_fw_act_work(struct work_struct *work)
> +{
> +	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
> +				struct nvme_ctrl, fw_act_work);
> +	struct nvme_fw_slot_info_log *log;
> +	u32 csts;
> +
> +	nvme_stop_queues(ctrl);
> +	while (1) {
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
> +			break;
> +
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
> +				&ctrl->ctrl_config))
> +			break;
> +
> +		if (csts == ~0 || ((ctrl->ctrl_config & NVME_CC_ENABLE)
> +				&& !(csts & NVME_CSTS_PP)))
> +			break;
> +
> +		if (time_after(jiffies, ctrl->fw_act_timeout)) {
> +			dev_warn(ctrl->device,
> +				"Fw activation timeout, reset controller\n");
> +			ctrl->ops->reset_ctrl(ctrl);
> +			break;

this triggers controller reset with the request queues stopped, which is
different than all the rest of the call sites. I didn't find anything
wrong with stopping the queues twice although I vaguely remember having
issues with these sort of flows in the past, but things changed since then.

> +		}
> +		msleep(100);
> +	}
> +	nvme_start_queues(ctrl);
> +	ctrl->fw_act_timeout = 0;

Why is this needed?

> +	log =  kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
> +	if (!log)
> +		return;
> +	if (nvme_get_log_page(ctrl, NVME_LOG_FW_SLOT, log))
> +		dev_warn(ctrl->device,
> +				"Get FW SLOT INFO log error\n");
> +	kfree(log);
> +}
> +
>  void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
>  		union nvme_result *res)
>  {
> @@ -2305,6 +2344,34 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
>  		dev_info(ctrl->device, "rescanning\n");
>  		nvme_queue_scan(ctrl);
>  		break;
> +	case NVME_AER_NOTICE_FW_ACT_STARTING:
> +	{
> +		u32 csts;
> +
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
> +			return;
> +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
> +					&ctrl->ctrl_config))
> +			return;
> +
> +		if ((ctrl->ctrl_config & NVME_CC_ENABLE)
> +					&& (csts & NVME_CSTS_PP)) {
> +			if (ctrl->mtfa)
> +				ctrl->fw_act_timeout = jiffies +
> +					msecs_to_jiffies(ctrl->mtfa * 100);
> +			else
> +				ctrl->fw_act_timeout = jiffies +
> +					msecs_to_jiffies(admin_timeout * 1000);
> +
> +			schedule_delayed_work(&ctrl->fw_act_work, 0);
> +		}
> +		break;
> +	}
> +	case NVME_AER_ERR_FW_IMG_LOAD:
> +		dev_warn(ctrl->device, "FW image load error\n");
> +		cancel_delayed_work(&ctrl->fw_act_work);
> +		ctrl->fw_act_timeout = 0;

Why is this needed?

> +		break;
>  	default:
>  		dev_warn(ctrl->device, "async event result %08x\n", result);
>  	}
> @@ -2351,6 +2418,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
>  {
>  	flush_work(&ctrl->async_event_work);
>  	flush_work(&ctrl->scan_work);
> +	cancel_delayed_work(&ctrl->fw_act_work);

Shouldn't this be cancel_delayed_work_sync?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme: Adding support for FW Slot info log
       [not found]         ` <CGME20170530140219epcas1p31163ac83162e9fa69328f5484e4a1daa@epcas1p3.samsung.com>
@ 2017-05-30 14:02           ` Arnav Dawn
  0 siblings, 0 replies; 11+ messages in thread
From: Arnav Dawn @ 2017-05-30 14:02 UTC (permalink / raw)



> 
> This functioin has basically no common code.  I think the current
> nvme_get_log_page should be renamed to nvme_get_smart_log and
> moved to scsi.c, and you'll just add a new helper for your log page or even
> just open code it in the caller.  A little inline helper for setting up dword10
> would be nice, though.

Agree, i'll change it in next version.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme: Add support for FW activation without reset
       [not found]         ` <CGME20170530140329epcas1p464afeb48d692b2108592a39a7593f6b3@epcas1p4.samsung.com>
@ 2017-05-30 14:03           ` Arnav Dawn
  0 siblings, 0 replies; 11+ messages in thread
From: Arnav Dawn @ 2017-05-30 14:03 UTC (permalink / raw)


> > +	nvme_stop_queues(ctrl);
> > +	while (1) {
> > +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
> > +			break;
> > +
> > +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
> > +				&ctrl->ctrl_config))
> > +			break;
> > +
> > +		if (csts == ~0 || ((ctrl->ctrl_config & NVME_CC_ENABLE)
> > +				&& !(csts & NVME_CSTS_PP)))
> > +			break;
> 
> Can you factor these checks into a helper that your loop checks in the while
> statement?

Sure, i'll take this in next version.

> > +	nvme_start_queues(ctrl);
> > +	ctrl->fw_act_timeout = 0;
> > +	log =  kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
> > +	if (!log)
> > +		return;
> > +	if (nvme_get_log_page(ctrl, NVME_LOG_FW_SLOT, log))
> > +		dev_warn(ctrl->device,
> > +				"Get FW SLOT INFO log error\n");
> > +	kfree(log);
> 
> So we don't do anything with the log.  It might be worth having a comment
> like:
> 
> 	/* read the firmware activation log to clear the AER */
> 
> for readers of the code that don't know the NVMe spec inside out.

Agree, that'll be helpful. I'll add.


> > +	case NVME_AER_NOTICE_FW_ACT_STARTING:
> > +	{
> > +		u32 csts;
> > +
> > +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
> > +			return;
> > +		if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
> > +					&ctrl->ctrl_config))
> > +			return;
> > +
> > +		if ((ctrl->ctrl_config & NVME_CC_ENABLE)
> > +					&& (csts & NVME_CSTS_PP)) {
> > +			if (ctrl->mtfa)
> > +				ctrl->fw_act_timeout = jiffies +
> > +					msecs_to_jiffies(ctrl->mtfa * 100);
> > +			else
> > +				ctrl->fw_act_timeout = jiffies +
> > +					msecs_to_jiffies(admin_timeout *
> 1000);
> > +
> > +			schedule_delayed_work(&ctrl->fw_act_work, 0);
> > +		}
> > +		break;
> 
> Please split this code into a helper.

Agree, that'll be lot cleaner.  

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme: Add support for FW activation without reset
  2017-05-30 11:38       ` Sagi Grimberg
@ 2017-06-07 10:47         ` Arnav Dawn
  2017-06-07 11:15           ` Sagi Grimberg
  0 siblings, 1 reply; 11+ messages in thread
From: Arnav Dawn @ 2017-06-07 10:47 UTC (permalink / raw)




On Tuesday 30 May 2017 05:08 PM, Sagi Grimberg wrote:
>
>
> On 29/05/17 17:18, Arnav Dawn wrote:
>>
>  

>> +        }
>> +        msleep(100);
>> +    }
>> +    nvme_start_queues(ctrl);
>> +    ctrl->fw_act_timeout = 0;
>
> Why is this needed?
>
Fw_act_timeout is reset to initial state once done.

>> +    log =  kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
>> +    if (!log)
>> +        return;
>> +    if (nvme_get_log_page(ctrl, NVME_LOG_FW_SLOT, log))
>> +        dev_warn(ctrl->device,
>> +                "Get FW SLOT INFO log error\n");
>> +    kfree(log);
>> +}
>> +
>>  void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
>>          union nvme_result *res)
>>  {
>> @@ -2305,6 +2344,34 @@ void nvme_complete_async_event(struct
>> nvme_ctrl *ctrl, __le16 status,
>>          dev_info(ctrl->device, "rescanning\n");
>>          nvme_queue_scan(ctrl);
>>          break;
>> +    case NVME_AER_NOTICE_FW_ACT_STARTING:
>> +    {
>> +        u32 csts;
>> +
>> +        if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
>> +            return;
>> +        if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
>> +                    &ctrl->ctrl_config))
>> +            return;
>> +
>> +        if ((ctrl->ctrl_config & NVME_CC_ENABLE)
>> +                    && (csts & NVME_CSTS_PP)) {
>> +            if (ctrl->mtfa)
>> +                ctrl->fw_act_timeout = jiffies +
>> +                    msecs_to_jiffies(ctrl->mtfa * 100);
>> +            else
>> +                ctrl->fw_act_timeout = jiffies +
>> +                    msecs_to_jiffies(admin_timeout * 1000);
>> +
>> +            schedule_delayed_work(&ctrl->fw_act_work, 0);
>> +        }
>> +        break;
>> +    }
>> +    case NVME_AER_ERR_FW_IMG_LOAD:
>> +        dev_warn(ctrl->device, "FW image load error\n");
>> +        cancel_delayed_work(&ctrl->fw_act_work);
>> +        ctrl->fw_act_timeout = 0;
>
> Why is this needed?
In case of  fw load fail AER, fw_act_timeout is reset to initial state.

>> +        break;
>>      default:
>>          dev_warn(ctrl->device, "async event result %08x\n", result);
>>      }
>> @@ -2351,6 +2418,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
>>  {
>>      flush_work(&ctrl->async_event_work);
>>      flush_work(&ctrl->scan_work);
>> +    cancel_delayed_work(&ctrl->fw_act_work);
>
> Shouldn't this be cancel_delayed_work_sync?
>
yes it should be. Thank you.
>
>
Regards
Arnav Dawn

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme: Add support for FW activation without reset
  2017-06-07 10:47         ` Arnav Dawn
@ 2017-06-07 11:15           ` Sagi Grimberg
  0 siblings, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2017-06-07 11:15 UTC (permalink / raw)



> Fw_act_timeout is reset to initial state once done.

But you set it before scheduling the delayed work, you either
set it always or you don't (AFACIT)

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-06-07 11:15 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20170529141634epcas5p3913fde49b40fcce3cc15971688820834@epcas5p3.samsung.com>
2017-05-29 14:16 ` [PATCH v2 0/2] nvme: Support for FW activation without reset Arnav Dawn
     [not found]   ` <CGME20170529141731epcas5p39667840c01555c7cf49527167535f82c@epcas5p3.samsung.com>
2017-05-29 14:17     ` [PATCH v2 1/2] nvme: Adding support for FW Slot info log Arnav Dawn
2017-05-29 17:54       ` Christoph Hellwig
2017-05-30 11:30         ` Sagi Grimberg
     [not found]         ` <CGME20170530140219epcas1p31163ac83162e9fa69328f5484e4a1daa@epcas1p3.samsung.com>
2017-05-30 14:02           ` Arnav Dawn
     [not found]   ` <CGME20170529141807epcas1p259e2e9b04327285d18f2d62bb6ccbce1@epcas1p2.samsung.com>
2017-05-29 14:18     ` [PATCH v2 2/2] nvme: Add support for FW activation without reset Arnav Dawn
2017-05-29 17:57       ` Christoph Hellwig
     [not found]         ` <CGME20170530140329epcas1p464afeb48d692b2108592a39a7593f6b3@epcas1p4.samsung.com>
2017-05-30 14:03           ` Arnav Dawn
2017-05-30 11:38       ` Sagi Grimberg
2017-06-07 10:47         ` Arnav Dawn
2017-06-07 11:15           ` Sagi Grimberg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.