All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] NVMe Async Event Notification updates
@ 2017-07-07 16:22 Keith Busch
  2017-07-07 16:22 ` [PATCH 1/7] nvme/fc: There is only one AEN request Keith Busch
                   ` (7 more replies)
  0 siblings, 8 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:22 UTC (permalink / raw)


This culminates with udev handling for AEN, but implementing this was much
easier without all the unreachable handling for multiple AEN requests.

Some background on why single AEN's are used today: we can't tell blk-mq
about those reserved tags lest we deadlock a freeze. These requests
are not worth the trouble to implement tag accounting required for
multiple requests outstanding, and one AEN request is sufficient for
all uses anyway.

I don't have a setup to try the nvme/fc parts, so while I think this
looks correct, it's only compile tested.

Keith Busch (7):
  nvme/fc: There is only one AEN request
  nvme/fc: Fix admin queue depth setup
  nvme/fc: remove unused "queue_size" field
  nvme: Centralize blk-mq tag and AEN counts
  nvme: Only one AEN request
  nvme: Unexport starting async event work
  nvme: Send change uevent when AEN completes

 drivers/nvme/host/core.c   |  46 ++++++------
 drivers/nvme/host/fc.c     | 175 ++++++++++++++++++---------------------------
 drivers/nvme/host/nvme.h   |   6 +-
 drivers/nvme/host/pci.c    |  10 +--
 drivers/nvme/host/rdma.c   |  19 ++---
 drivers/nvme/target/loop.c |  16 ++---
 include/linux/nvme.h       |   7 ++
 7 files changed, 107 insertions(+), 172 deletions(-)

-- 
2.5.5

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 1/7] nvme/fc: There is only one AEN request
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
@ 2017-07-07 16:22 ` Keith Busch
  2017-07-10 23:12   ` James Smart
  2017-07-07 16:22 ` [PATCH 2/7] nvme/fc: Fix admin queue depth setup Keith Busch
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:22 UTC (permalink / raw)


This patch removes the multiple AEN request handling. There can be
only one.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/fc.c | 145 +++++++++++++++++++++----------------------------
 1 file changed, 61 insertions(+), 84 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d666ada..8355937 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -165,7 +165,7 @@ struct nvme_fc_ctrl {
 	u32			iocnt;
 	wait_queue_head_t	ioabort_wait;
 
-	struct nvme_fc_fcp_op	aen_ops[NVME_FC_NR_AEN_COMMANDS];
+	struct nvme_fc_fcp_op	aen_op;
 
 	struct nvme_ctrl	ctrl;
 };
@@ -1192,39 +1192,33 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
 }
 
 static void
-nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
+nvme_fc_abort_aen_op(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
 	unsigned long flags;
-	int i, ret;
 
-	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
-		if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
-			continue;
+	if (atomic_read(&ctrl->aen_op.state) != FCPOP_STATE_ACTIVE)
+		return;
 
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (ctrl->flags & FCCTRL_TERMIO) {
+		ctrl->iocnt++;
+		ctrl->aen_op.flags |= FCOP_FLAGS_TERMIO;
+	}
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	if (__nvme_fc_abort_op(ctrl, &ctrl->aen_op)) {
+		/*
+		 * if __nvme_fc_abort_op failed the io wasn't
+		 * active. Thus this call path is running in
+		 * parallel to the io complete. Treat as non-error.
+		 */
+
+		/* back out the flags/counters */
 		spin_lock_irqsave(&ctrl->lock, flags);
-		if (ctrl->flags & FCCTRL_TERMIO) {
-			ctrl->iocnt++;
-			aen_op->flags |= FCOP_FLAGS_TERMIO;
-		}
+		if (ctrl->flags & FCCTRL_TERMIO)
+			ctrl->iocnt--;
+		ctrl->aen_op.flags &= ~FCOP_FLAGS_TERMIO;
 		spin_unlock_irqrestore(&ctrl->lock, flags);
-
-		ret = __nvme_fc_abort_op(ctrl, aen_op);
-		if (ret) {
-			/*
-			 * if __nvme_fc_abort_op failed the io wasn't
-			 * active. Thus this call path is running in
-			 * parallel to the io complete. Treat as non-error.
-			 */
-
-			/* back out the flags/counters */
-			spin_lock_irqsave(&ctrl->lock, flags);
-			if (ctrl->flags & FCCTRL_TERMIO)
-				ctrl->iocnt--;
-			aen_op->flags &= ~FCOP_FLAGS_TERMIO;
-			spin_unlock_irqrestore(&ctrl->lock, flags);
-			return;
-		}
 	}
 }
 
@@ -1454,59 +1448,49 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 }
 
 static int
-nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
+nvme_fc_init_aen_op(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_fcp_op *aen_op;
 	struct nvme_fc_cmd_iu *cmdiu;
 	struct nvme_command *sqe;
 	void *private;
-	int i, ret;
-
-	aen_op = ctrl->aen_ops;
-	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
-		private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
-						GFP_KERNEL);
-		if (!private)
-			return -ENOMEM;
-
-		cmdiu = &aen_op->cmd_iu;
-		sqe = &cmdiu->sqe;
-		ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
-				aen_op, (struct request *)NULL,
-				(AEN_CMDID_BASE + i));
-		if (ret) {
-			kfree(private);
-			return ret;
-		}
+	int ret;
 
-		aen_op->flags = FCOP_FLAGS_AEN;
-		aen_op->fcp_req.first_sgl = NULL; /* no sg list */
-		aen_op->fcp_req.private = private;
+	private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
+					GFP_KERNEL);
+	if (!private)
+		return -ENOMEM;
 
-		memset(sqe, 0, sizeof(*sqe));
-		sqe->common.opcode = nvme_admin_async_event;
-		/* Note: core layer may overwrite the sqe.command_id value */
-		sqe->common.command_id = AEN_CMDID_BASE + i;
+	cmdiu = &ctrl->aen_op.cmd_iu;
+	sqe = &cmdiu->sqe;
+	ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
+			&ctrl->aen_op, (struct request *)NULL,
+			AEN_CMDID_BASE);
+	if (ret) {
+		kfree(private);
+		return ret;
 	}
+
+	ctrl->aen_op.flags = FCOP_FLAGS_AEN;
+	ctrl->aen_op.fcp_req.first_sgl = NULL; /* no sg list */
+	ctrl->aen_op.fcp_req.private = private;
+
+	memset(sqe, 0, sizeof(*sqe));
+	sqe->common.opcode = nvme_admin_async_event;
+	sqe->common.command_id = AEN_CMDID_BASE;
+
 	return 0;
 }
 
 static void
-nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
+nvme_fc_term_aen_op(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_fcp_op *aen_op;
-	int i;
-
-	aen_op = ctrl->aen_ops;
-	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
-		if (!aen_op->fcp_req.private)
-			continue;
+	if (!ctrl->aen_op.fcp_req.private)
+		return;
 
-		__nvme_fc_exit_request(ctrl, aen_op);
+	__nvme_fc_exit_request(ctrl, &ctrl->aen_op);
 
-		kfree(aen_op->fcp_req.private);
-		aen_op->fcp_req.private = NULL;
-	}
+	kfree(ctrl->aen_op.fcp_req.private);
+	ctrl->aen_op.fcp_req.private = NULL;
 }
 
 static inline void
@@ -2041,10 +2025,8 @@ static void
 nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
-	struct nvme_fc_fcp_op *aen_op;
 	unsigned long flags;
 	bool terminating = false;
-	blk_status_t ret;
 
 	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
 		return;
@@ -2056,14 +2038,9 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 
 	if (terminating)
 		return;
-
-	aen_op = &ctrl->aen_ops[aer_idx];
-
-	ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
-					NVMEFC_FCP_NODATA);
-	if (ret)
-		dev_err(ctrl->ctrl.device,
-			"failed async event work [%d]\n", aer_idx);
+	if (nvme_fc_start_fcp_op(ctrl, &ctrl->queues[0], &ctrl->aen_op, 0,
+						NVMEFC_FCP_NODATA))
+		dev_err(ctrl->ctrl.device, "failed async event work\n");
 }
 
 static void
@@ -2376,9 +2353,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		opts->queue_size = ctrl->ctrl.maxcmd;
 	}
 
-	ret = nvme_fc_init_aen_ops(ctrl);
+	ret = nvme_fc_init_aen_op(ctrl);
 	if (ret)
-		goto out_term_aen_ops;
+		goto out_term_aen_op;
 
 	/*
 	 * Create the io queues
@@ -2390,7 +2367,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		else
 			ret = nvme_fc_reinit_io_queues(ctrl);
 		if (ret)
-			goto out_term_aen_ops;
+			goto out_term_aen_op;
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -2402,8 +2379,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	return 0;	/* Success */
 
-out_term_aen_ops:
-	nvme_fc_term_aen_ops(ctrl);
+out_term_aen_op:
+	nvme_fc_term_aen_op(ctrl);
 out_disconnect_admin_queue:
 	/* send a Disconnect(association) LS to fc-nvme target */
 	nvme_fc_xmt_disconnect_assoc(ctrl);
@@ -2471,7 +2448,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 
 	/* kill the aens as they are a separate path */
-	nvme_fc_abort_aen_ops(ctrl);
+	nvme_fc_abort_aen_op(ctrl);
 
 	/* wait for all io that had to be aborted */
 	spin_lock_irqsave(&ctrl->lock, flags);
@@ -2479,7 +2456,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	ctrl->flags &= ~FCCTRL_TERMIO;
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
-	nvme_fc_term_aen_ops(ctrl);
+	nvme_fc_term_aen_op(ctrl);
 
 	/*
 	 * send a Disconnect(association) LS to fc-nvme target
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 2/7] nvme/fc: Fix admin queue depth setup
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
  2017-07-07 16:22 ` [PATCH 1/7] nvme/fc: There is only one AEN request Keith Busch
@ 2017-07-07 16:22 ` Keith Busch
  2017-07-10  5:59   ` Sagi Grimberg
  2017-07-10 23:40   ` James Smart
  2017-07-07 16:22 ` [PATCH 3/7] nvme/fc: remove unused "queue_size" field Keith Busch
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:22 UTC (permalink / raw)


The NVME_FC_AQ_BLKMQ_DEPTH is one less than what we actually want the
admin queue's depth to be since it subtracts the AEN entry.

We also need to subtract 1 from the blk-mq depth to always leave an
empty queue entry, and to not collide command id with the AEN request.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/fc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 8355937..6328158 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2284,16 +2284,15 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * Create the admin queue
 	 */
 
-	nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+	nvme_fc_init_queue(ctrl, 0, NVME_AQ_DEPTH);
 
 	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
-				NVME_FC_AQ_BLKMQ_DEPTH);
+				NVME_AQ_DEPTH);
 	if (ret)
 		goto out_free_queue;
 
 	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-				NVME_FC_AQ_BLKMQ_DEPTH,
-				(NVME_FC_AQ_BLKMQ_DEPTH / 4));
+				NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));
 	if (ret)
 		goto out_delete_hw_queue;
 
@@ -2693,7 +2692,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
 	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
-	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH - 1;
 	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
 	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 3/7] nvme/fc: remove unused "queue_size" field
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
  2017-07-07 16:22 ` [PATCH 1/7] nvme/fc: There is only one AEN request Keith Busch
  2017-07-07 16:22 ` [PATCH 2/7] nvme/fc: Fix admin queue depth setup Keith Busch
@ 2017-07-07 16:22 ` Keith Busch
  2017-07-10  6:00   ` Sagi Grimberg
  2017-07-10 23:44   ` James Smart
  2017-07-07 16:22 ` [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts Keith Busch
                   ` (4 subsequent siblings)
  7 siblings, 2 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:22 UTC (permalink / raw)


This was being saved in a structure, but never used anywhere. The queue
size is obtained through other means, so there's no reason to duplicate
this without a user for it.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/fc.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6328158..26a3c5d 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -50,7 +50,6 @@ struct nvme_fc_queue {
 	struct device		*dev;
 	struct blk_mq_hw_ctx	*hctx;
 	void			*lldd_handle;
-	int			queue_size;
 	size_t			cmnd_capsule_len;
 	u32			qnum;
 	u32			rqcnt;
@@ -1526,7 +1525,7 @@ nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 }
 
 static void
-nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size)
+nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx)
 {
 	struct nvme_fc_queue *queue;
 
@@ -1542,8 +1541,6 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size)
 	else
 		queue->cmnd_capsule_len = sizeof(struct nvme_command);
 
-	queue->queue_size = queue_size;
-
 	/*
 	 * Considered whether we should allocate buffers for all SQEs
 	 * and CQEs and dma map them - mapping their respective entries
@@ -1667,7 +1664,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
 	int i;
 
 	for (i = 1; i < ctrl->ctrl.queue_count; i++)
-		nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
+		nvme_fc_init_queue(ctrl, i);
 }
 
 static void
@@ -2284,7 +2281,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * Create the admin queue
 	 */
 
-	nvme_fc_init_queue(ctrl, 0, NVME_AQ_DEPTH);
+	nvme_fc_init_queue(ctrl, 0);
 
 	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
 				NVME_AQ_DEPTH);
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
                   ` (2 preceding siblings ...)
  2017-07-07 16:22 ` [PATCH 3/7] nvme/fc: remove unused "queue_size" field Keith Busch
@ 2017-07-07 16:22 ` Keith Busch
  2017-07-10  6:01   ` Sagi Grimberg
  2017-07-10 23:51   ` James Smart
  2017-07-07 16:22 ` [PATCH 5/7] nvme: Only one AEN request Keith Busch
                   ` (3 subsequent siblings)
  7 siblings, 2 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:22 UTC (permalink / raw)


All the drivers were duplicating AEN accounting, yet they all depend on
the core to use the exact same settings of a single AEN to be deducted
from the tagset depth. This patch moves admin queue command accounting
to the core.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/fc.c     | 17 ++++-------------
 drivers/nvme/host/nvme.h   |  1 -
 drivers/nvme/host/pci.c    |  6 ------
 drivers/nvme/host/rdma.c   | 14 +++-----------
 drivers/nvme/target/loop.c | 14 +++-----------
 include/linux/nvme.h       |  7 +++++++
 6 files changed, 17 insertions(+), 42 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 26a3c5d..753761c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -30,15 +30,6 @@
 /* *************************** Data Structures/Defines ****************** */
 
 
-/*
- * We handle AEN commands ourselves and don't even let the
- * block layer know about them.
- */
-#define NVME_FC_NR_AEN_COMMANDS	1
-#define NVME_FC_AQ_BLKMQ_DEPTH	\
-	(NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
-#define AEN_CMDID_BASE		(NVME_FC_AQ_BLKMQ_DEPTH + 1)
-
 enum nvme_fc_queue_flags {
 	NVME_FC_Q_CONNECTED = (1 << 0),
 };
@@ -1463,7 +1454,7 @@ nvme_fc_init_aen_op(struct nvme_fc_ctrl *ctrl)
 	sqe = &cmdiu->sqe;
 	ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
 			&ctrl->aen_op, (struct request *)NULL,
-			AEN_CMDID_BASE);
+			NVME_AQ_BLKMQ_DEPTH);
 	if (ret) {
 		kfree(private);
 		return ret;
@@ -1475,7 +1466,7 @@ nvme_fc_init_aen_op(struct nvme_fc_ctrl *ctrl)
 
 	memset(sqe, 0, sizeof(*sqe));
 	sqe->common.opcode = nvme_admin_async_event;
-	sqe->common.command_id = AEN_CMDID_BASE;
+	sqe->common.command_id = NVME_AQ_BLKMQ_DEPTH;
 
 	return 0;
 }
@@ -2025,7 +2016,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	unsigned long flags;
 	bool terminating = false;
 
-	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
+	if (aer_idx > NVME_NR_AERS)
 		return;
 
 	spin_lock_irqsave(&ctrl->lock, flags);
@@ -2689,7 +2680,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
 	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
-	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH - 1;
+	ctrl->admin_tag_set.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
 	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
 	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8f2a168..0510a33 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -291,7 +291,6 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
 int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
 		bool send);
 
-#define NVME_NR_AERS	1
 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		union nvme_result *res);
 void nvme_queue_async_events(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 882ed36..6bd49b2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -39,12 +39,6 @@
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 
-/*
- * We handle AEN commands ourselves and don't even let the
- * block layer know about them.
- */
-#define NVME_AQ_BLKMQ_DEPTH	(NVME_AQ_DEPTH - NVME_NR_AERS)
-
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index da04df1..8d811f4 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -42,14 +42,6 @@
 
 #define NVME_RDMA_MAX_INLINE_SEGMENTS	1
 
-/*
- * We handle AEN commands ourselves and don't even let the
- * block layer know about them.
- */
-#define NVME_RDMA_NR_AEN_COMMANDS      1
-#define NVME_RDMA_AQ_BLKMQ_DEPTH       \
-	(NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
-
 struct nvme_rdma_device {
 	struct ib_device       *dev;
 	struct ib_pd	       *pd;
@@ -1116,7 +1108,7 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->common.opcode = nvme_admin_async_event;
-	cmd->common.command_id = NVME_RDMA_AQ_BLKMQ_DEPTH;
+	cmd->common.command_id = NVME_AQ_BLKMQ_DEPTH;
 	cmd->common.flags |= NVME_CMD_SGL_METABUF;
 	nvme_rdma_set_sg_null(cmd);
 
@@ -1178,7 +1170,7 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
 	 * for them but rather special case them here.
 	 */
 	if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
-			cqe->command_id >= NVME_RDMA_AQ_BLKMQ_DEPTH))
+			cqe->command_id == NVME_AQ_BLKMQ_DEPTH))
 		nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
 				&cqe->result);
 	else
@@ -1542,7 +1534,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
 	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
 	ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops;
-	ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
 	ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
 	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 717ed7d..336e46e 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -23,14 +23,6 @@
 
 #define NVME_LOOP_MAX_SEGMENTS		256
 
-/*
- * We handle AEN commands ourselves and don't even let the
- * block layer know about them.
- */
-#define NVME_LOOP_NR_AEN_COMMANDS	1
-#define NVME_LOOP_AQ_BLKMQ_DEPTH	\
-	(NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
-
 struct nvme_loop_iod {
 	struct nvme_request	nvme_req;
 	struct nvme_command	cmd;
@@ -113,7 +105,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
 	 * for them but rather special case them here.
 	 */
 	if (unlikely(nvme_loop_queue_idx(queue) == 0 &&
-			cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) {
+			cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
 		nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
 				&cqe->result);
 	} else {
@@ -201,7 +193,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 
 	memset(&iod->cmd, 0, sizeof(iod->cmd));
 	iod->cmd.common.opcode = nvme_admin_async_event;
-	iod->cmd.common.command_id = NVME_LOOP_AQ_BLKMQ_DEPTH;
+	iod->cmd.common.command_id = NVME_AQ_BLKMQ_DEPTH;
 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
 
 	if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq,
@@ -357,7 +349,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
 	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
 	ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
-	ctrl->admin_tag_set.queue_depth = NVME_LOOP_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
 	ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
 	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6b8ee9e..a253977 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -89,6 +89,13 @@ enum {
 
 #define NVME_AQ_DEPTH		32
 
+/*
+ * We handle AEN commands ourselves and don't even let the
+ * block layer know about them.
+ */
+#define NVME_NR_AERS	1
+#define NVME_AQ_BLKMQ_DEPTH	(NVME_AQ_DEPTH - NVME_NR_AERS)
+
 enum {
 	NVME_REG_CAP	= 0x0000,	/* Controller Capabilities */
 	NVME_REG_VS	= 0x0008,	/* Version */
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 5/7] nvme: Only one AEN request
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
                   ` (3 preceding siblings ...)
  2017-07-07 16:22 ` [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts Keith Busch
@ 2017-07-07 16:22 ` Keith Busch
  2017-07-10  6:06   ` Sagi Grimberg
  2017-07-10 23:53   ` James Smart
  2017-07-07 16:23 ` [PATCH 6/7] nvme: Unexport starting async event work Keith Busch
                   ` (2 subsequent siblings)
  7 siblings, 2 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:22 UTC (permalink / raw)


This driver issues only one AEN request, so all the logic to handle
multiple ones is unnecessary. Even if we did allow multiple ones, the
implementation is broken as it will create command id collisions. For
those two reasons, this patch deletes all the aer index usage, assuming
there's only one.

As a consequence of removing the event limit, the core's callback for
these events is simplified to only handle the successful case. The
"abort" status handling for AER has been broken for some time anyway:
it was meant to reclaim the aer count from a driver cancelled command
after a reset, but that was relying on blk-mq to manage the reserved
tags. We don't use blk-mq tags for AER, though, so we will never see a
synthesized abort status.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c   | 27 +++------------------------
 drivers/nvme/host/fc.c     |  5 +----
 drivers/nvme/host/nvme.h   |  3 +--
 drivers/nvme/host/pci.c    |  4 ++--
 drivers/nvme/host/rdma.c   |  5 +----
 drivers/nvme/target/loop.c |  2 +-
 6 files changed, 9 insertions(+), 37 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cb96f4a..bfd0045 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2512,36 +2512,15 @@ static void nvme_async_event_work(struct work_struct *work)
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, async_event_work);
 
-	spin_lock_irq(&ctrl->lock);
-	while (ctrl->event_limit > 0) {
-		int aer_idx = --ctrl->event_limit;
-
-		spin_unlock_irq(&ctrl->lock);
-		ctrl->ops->submit_async_event(ctrl, aer_idx);
-		spin_lock_irq(&ctrl->lock);
-	}
-	spin_unlock_irq(&ctrl->lock);
+	ctrl->ops->submit_async_event(ctrl);
 }
 
 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		union nvme_result *res)
 {
 	u32 result = le32_to_cpu(res->u32);
-	bool done = true;
 
-	switch (le16_to_cpu(status) >> 1) {
-	case NVME_SC_SUCCESS:
-		done = false;
-		/*FALLTHRU*/
-	case NVME_SC_ABORT_REQ:
-		++ctrl->event_limit;
-		queue_work(nvme_wq, &ctrl->async_event_work);
-		break;
-	default:
-		break;
-	}
-
-	if (done)
+	if ((le16_to_cpu(status) >> 1) != NVME_SC_SUCCESS)
 		return;
 
 	switch (result & 0xff07) {
@@ -2552,12 +2531,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 	default:
 		dev_warn(ctrl->device, "async event result %08x\n", result);
 	}
+	queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 
 void nvme_queue_async_events(struct nvme_ctrl *ctrl)
 {
-	ctrl->event_limit = NVME_NR_AERS;
 	queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_async_events);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 753761c..f1ac8c0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2010,15 +2010,12 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 }
 
 static void
-nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+nvme_fc_submit_async_event(struct nvme_ctrl *arg)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
 	unsigned long flags;
 	bool terminating = false;
 
-	if (aer_idx > NVME_NR_AERS)
-		return;
-
 	spin_lock_irqsave(&ctrl->lock, flags);
 	if (ctrl->flags & FCCTRL_TERMIO)
 		terminating = true;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0510a33..edca92d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -153,7 +153,6 @@ struct nvme_ctrl {
 	u16 nssa;
 	u16 nr_streams;
 	atomic_t abort_limit;
-	u8 event_limit;
 	u8 vwc;
 	u32 vs;
 	u32 sgls;
@@ -226,7 +225,7 @@ struct nvme_ctrl_ops {
 	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
-	void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
+	void (*submit_async_event)(struct nvme_ctrl *ctrl);
 	int (*delete_ctrl)(struct nvme_ctrl *ctrl);
 	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6bd49b2..a4e3261 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -846,7 +846,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	return __nvme_poll(nvmeq, tag);
 }
 
-static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
+static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
 	struct nvme_queue *nvmeq = dev->queues[0];
@@ -854,7 +854,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_async_event;
-	c.common.command_id = NVME_AQ_BLKMQ_DEPTH + aer_idx;
+	c.common.command_id = NVME_AQ_BLKMQ_DEPTH;
 
 	spin_lock_irq(&nvmeq->q_lock);
 	__nvme_submit_cmd(nvmeq, &c);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 8d811f4..f739540 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1091,7 +1091,7 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
 	return queue->ctrl->tag_set.tags[queue_idx - 1];
 }
 
-static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
 {
 	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
 	struct nvme_rdma_queue *queue = &ctrl->queues[0];
@@ -1101,9 +1101,6 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	struct ib_sge sge;
 	int ret;
 
-	if (WARN_ON_ONCE(aer_idx != 0))
-		return;
-
 	ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE);
 
 	memset(cmd, 0, sizeof(*cmd));
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 336e46e..af0d188 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -185,7 +185,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_OK;
 }
 
-static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+static void nvme_loop_submit_async_event(struct nvme_ctrl *arg)
 {
 	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg);
 	struct nvme_loop_queue *queue = &ctrl->queues[0];
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 6/7] nvme: Unexport starting async event work
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
                   ` (4 preceding siblings ...)
  2017-07-07 16:22 ` [PATCH 5/7] nvme: Only one AEN request Keith Busch
@ 2017-07-07 16:23 ` Keith Busch
  2017-07-07 18:11   ` Christoph Hellwig
  2017-07-07 16:23 ` [PATCH 7/7] nvme: Send change uevent when AEN completes Keith Busch
  2017-07-07 18:09 ` [PATCH 0/7] NVMe Async Event Notification updates Christoph Hellwig
  7 siblings, 1 reply; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:23 UTC (permalink / raw)


Async event work is for core use only and should not be called directly
from drivers.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 3 +--
 drivers/nvme/host/nvme.h | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bfd0045..783db84 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2535,11 +2535,10 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 }
 EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 
-void nvme_queue_async_events(struct nvme_ctrl *ctrl)
+static void nvme_queue_async_events(struct nvme_ctrl *ctrl)
 {
 	queue_work(nvme_wq, &ctrl->async_event_work);
 }
-EXPORT_SYMBOL_GPL(nvme_queue_async_events);
 
 static DEFINE_IDA(nvme_instance_ida);
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index edca92d..b65c22f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -292,7 +292,6 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
 
 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		union nvme_result *res);
-void nvme_queue_async_events(struct nvme_ctrl *ctrl);
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
                   ` (5 preceding siblings ...)
  2017-07-07 16:23 ` [PATCH 6/7] nvme: Unexport starting async event work Keith Busch
@ 2017-07-07 16:23 ` Keith Busch
  2017-07-07 18:14   ` Christoph Hellwig
                     ` (2 more replies)
  2017-07-07 18:09 ` [PATCH 0/7] NVMe Async Event Notification updates Christoph Hellwig
  7 siblings, 3 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-07 16:23 UTC (permalink / raw)


This will give udev a chance to handle asynchronous event notification
and clear the log to unmask future events of the same type. Since the
core driver allows only one AEN request at a time, we can only have one
possible oustanding uevent to send. This implementation saves the last
AEN result from the IRQ handler, and sends the uevent change notification
when the AEN work is rescheduled.

The udev rule used to test this was the following:

  ACTION=="change", SUBSYSTEM=="nvme", ENV{NVME_AEN}=="*", \
        RUN+="/bin/sh -c '/usr/local/sbin/nvme get-log $env{DEVNAME} --log-id=$(( ($env{NVME_AEN} >> 16) & 0xff )) --log-len=4096 >> /tmp/nvme-log'"

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 16 ++++++++++++++++
 drivers/nvme/host/nvme.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 783db84..4abde2da 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2507,11 +2507,26 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
+void nvme_aen_uevent(struct nvme_ctrl *ctrl)
+{
+	char buffer[20]; /* NVME_AEN=0xffffffff\0 */
+	char *envp[2] = {buffer, NULL};
+	u32 aen = ctrl->aen;
+
+	ctrl->aen = 0;
+	if (!aen)
+		return;
+
+	snprintf(buffer, sizeof(buffer), "NVME_AEN=%#08x", aen);
+	kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
+}
+
 static void nvme_async_event_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, async_event_work);
 
+	nvme_aen_uevent(ctrl);
 	ctrl->ops->submit_async_event(ctrl);
 }
 
@@ -2531,6 +2546,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 	default:
 		dev_warn(ctrl->device, "async event result %08x\n", result);
 	}
+	ctrl->aen = result;
 	queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_complete_async_event);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index b65c22f..898815f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -159,6 +159,7 @@ struct nvme_ctrl {
 	u16 kas;
 	u8 npss;
 	u8 apsta;
+	u32 aen;
 	unsigned int kato;
 	bool subsystem;
 	unsigned long quirks;
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 0/7] NVMe Async Event Notification updates
  2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
                   ` (6 preceding siblings ...)
  2017-07-07 16:23 ` [PATCH 7/7] nvme: Send change uevent when AEN completes Keith Busch
@ 2017-07-07 18:09 ` Christoph Hellwig
  7 siblings, 0 replies; 37+ messages in thread
From: Christoph Hellwig @ 2017-07-07 18:09 UTC (permalink / raw)


I do like the code consolidation.  I'll throw in a few comments
while at the airport, a real review will follow. 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 6/7] nvme: Unexport starting async event work
  2017-07-07 16:23 ` [PATCH 6/7] nvme: Unexport starting async event work Keith Busch
@ 2017-07-07 18:11   ` Christoph Hellwig
  2017-07-07 19:54     ` Keith Busch
  0 siblings, 1 reply; 37+ messages in thread
From: Christoph Hellwig @ 2017-07-07 18:11 UTC (permalink / raw)


> +static void nvme_queue_async_events(struct nvme_ctrl *ctrl)
>  {
>  	queue_work(nvme_wq, &ctrl->async_event_work);
>  }
> -EXPORT_SYMBOL_GPL(nvme_queue_async_events);

I'd just kill this helper now that it's not even exported.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-07 16:23 ` [PATCH 7/7] nvme: Send change uevent when AEN completes Keith Busch
@ 2017-07-07 18:14   ` Christoph Hellwig
  2017-07-13 21:29     ` Keith Busch
  2017-07-10 10:17   ` Zou Ming
  2017-07-10 23:57   ` James Smart
  2 siblings, 1 reply; 37+ messages in thread
From: Christoph Hellwig @ 2017-07-07 18:14 UTC (permalink / raw)


On Fri, Jul 07, 2017@12:23:01PM -0400, Keith Busch wrote:
> This will give udev a chance to handle asynchronous event notification
> and clear the log to unmask future events of the same type. Since the
> core driver allows only one AEN request at a time, we can only have one
> possible oustanding uevent to send. This implementation saves the last
> AEN result from the IRQ handler, and sends the uevent change notification
> when the AEN work is rescheduled.
> 
> The udev rule used to test this was the following:
> 
>   ACTION=="change", SUBSYSTEM=="nvme", ENV{NVME_AEN}=="*", \
>         RUN+="/bin/sh -c '/usr/local/sbin/nvme get-log $env{DEVNAME} --log-id=$(( ($env{NVME_AEN} >> 16) & 0xff )) --log-len=4096 >> /tmp/nvme-log'"


Can we have an nvme-aen helper that gets invoked by a trivial udev rule,
which would read a config file for policy?

Also I think we need to document our policy on which AERs
get forwarded to userspace. There are some we really should be
handling in the kernel (fw activation which is in progress, namespace
data changes really needs fixing, and ANA will also require kernel
handling).  Is there a way to state it's up to the kernel to reserve
any even for itself?  Or should we just forward very specific AERs
to userspace?

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 6/7] nvme: Unexport starting async event work
  2017-07-07 18:11   ` Christoph Hellwig
@ 2017-07-07 19:54     ` Keith Busch
  0 siblings, 0 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-07 19:54 UTC (permalink / raw)


On Fri, Jul 07, 2017@08:11:46PM +0200, Christoph Hellwig wrote:
> > +static void nvme_queue_async_events(struct nvme_ctrl *ctrl)
> >  {
> >  	queue_work(nvme_wq, &ctrl->async_event_work);
> >  }
> > -EXPORT_SYMBOL_GPL(nvme_queue_async_events);
> 
> I'd just kill this helper now that it's not even exported.

Agreed, I'll take that out for v2. I'll wait till next week to see if
there's any comments on the previous patches before sending.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 2/7] nvme/fc: Fix admin queue depth setup
  2017-07-07 16:22 ` [PATCH 2/7] nvme/fc: Fix admin queue depth setup Keith Busch
@ 2017-07-10  5:59   ` Sagi Grimberg
  2017-07-10 22:57     ` Keith Busch
  2017-07-10 23:40   ` James Smart
  1 sibling, 1 reply; 37+ messages in thread
From: Sagi Grimberg @ 2017-07-10  5:59 UTC (permalink / raw)


> The NVME_FC_AQ_BLKMQ_DEPTH is one less than what we actually want the
> admin queue's depth to be since it subtracts the AEN entry.
> 
> We also need to subtract 1 from the blk-mq depth to always leave an
> empty queue entry, and to not collide command id with the AEN request.
> 
> Signed-off-by: Keith Busch <keith.busch at intel.com>

nvme-loop needs the same patch.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 3/7] nvme/fc: remove unused "queue_size" field
  2017-07-07 16:22 ` [PATCH 3/7] nvme/fc: remove unused "queue_size" field Keith Busch
@ 2017-07-10  6:00   ` Sagi Grimberg
  2017-07-10 23:44   ` James Smart
  1 sibling, 0 replies; 37+ messages in thread
From: Sagi Grimberg @ 2017-07-10  6:00 UTC (permalink / raw)


Looks good,

Reviewed-by: Sagi Grimberg <sagi at grimberg.me>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts
  2017-07-07 16:22 ` [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts Keith Busch
@ 2017-07-10  6:01   ` Sagi Grimberg
  2017-07-10 23:51   ` James Smart
  1 sibling, 0 replies; 37+ messages in thread
From: Sagi Grimberg @ 2017-07-10  6:01 UTC (permalink / raw)


Looks good,

Reviewed-by: Sagi Grimberg <sagi at grimberg.me>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 5/7] nvme: Only one AEN request
  2017-07-07 16:22 ` [PATCH 5/7] nvme: Only one AEN request Keith Busch
@ 2017-07-10  6:06   ` Sagi Grimberg
  2017-07-10 23:53   ` James Smart
  1 sibling, 0 replies; 37+ messages in thread
From: Sagi Grimberg @ 2017-07-10  6:06 UTC (permalink / raw)


Looks good,

Does this apply on my patch?
[PATCH v3] nvme: split nvme_uninit_ctrl into stop and uninit

I'll update nvme-4.13

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-07 16:23 ` [PATCH 7/7] nvme: Send change uevent when AEN completes Keith Busch
  2017-07-07 18:14   ` Christoph Hellwig
@ 2017-07-10 10:17   ` Zou Ming
  2017-07-10 17:38     ` Keith Busch
  2017-07-10 23:57   ` James Smart
  2 siblings, 1 reply; 37+ messages in thread
From: Zou Ming @ 2017-07-10 10:17 UTC (permalink / raw)


Hi Keith:
   Thanks,Send All AEN info to user space looks good to me.

   In addition I have a question, is there a scene where repeated asynchronous events are received?
   Because the event is sent to the user space for asynchronous processing, and at the same time issued a new asynchronous event request command.
   The user space may be processed slower than the issued command, resulting in a duplicate asynchronous event report.

Best regards,
Zou Ming.

On 2017/7/8 0:23, Keith Busch wrote:
> This will give udev a chance to handle asynchronous event notification
> and clear the log to unmask future events of the same type. Since the
> core driver allows only one AEN request at a time, we can only have one
> possible oustanding uevent to send. This implementation saves the last
> AEN result from the IRQ handler, and sends the uevent change notification
> when the AEN work is rescheduled.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-10 10:17   ` Zou Ming
@ 2017-07-10 17:38     ` Keith Busch
  0 siblings, 0 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-10 17:38 UTC (permalink / raw)


On Mon, Jul 10, 2017@06:17:19PM +0800, Zou Ming wrote:
> Hi Keith:
>    Thanks,Send All AEN info to user space looks good to me.
> 
>    In addition I have a question, is there a scene where repeated asynchronous events are received?
>    Because the event is sent to the user space for asynchronous processing, and at the same time issued a new asynchronous event request command.
>    The user space may be processed slower than the issued command, resulting in a duplicate asynchronous event report.

The driver can have only one async event request outstanding, so it
can't have more than one uevent available to send. The driver will send
the KOBJ_CHANGE prior to sending the next AEN request, so that should be
sufficient if anything is monitoring the events.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 2/7] nvme/fc: Fix admin queue depth setup
  2017-07-10  5:59   ` Sagi Grimberg
@ 2017-07-10 22:57     ` Keith Busch
  0 siblings, 0 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-10 22:57 UTC (permalink / raw)


On Mon, Jul 10, 2017@08:59:02AM +0300, Sagi Grimberg wrote:
> > The NVME_FC_AQ_BLKMQ_DEPTH is one less than what we actually want the
> > admin queue's depth to be since it subtracts the AEN entry.
> > 
> > We also need to subtract 1 from the blk-mq depth to always leave an
> > empty queue entry, and to not collide command id with the AEN request.
> > 
> > Signed-off-by: Keith Busch <keith.busch at intel.com>
> 
> nvme-loop needs the same patch.

Yeah, it looks like it does, though it's inadvertently fixed in patch
4. In truth, patch 4 makes this one unnecessary for fc too, but I didn't
want to change the q-depth and centralize its definition in a single
patch. I'll spin a new version with the loop fix in the series, as well
as Christoph's suggestions.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 1/7] nvme/fc: There is only one AEN request
  2017-07-07 16:22 ` [PATCH 1/7] nvme/fc: There is only one AEN request Keith Busch
@ 2017-07-10 23:12   ` James Smart
  0 siblings, 0 replies; 37+ messages in thread
From: James Smart @ 2017-07-10 23:12 UTC (permalink / raw)


On 7/7/2017 9:22 AM, Keith Busch wrote:
> This patch removes the multiple AEN request handling. There can be
> only one.
>

Looks fine, although my style would have defined a local variable of 
aen_op rather than continued use of ctrl->aen_op.

Reviewed-By: James Smart <james.smart at broadcom.com>

-- james

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 2/7] nvme/fc: Fix admin queue depth setup
  2017-07-07 16:22 ` [PATCH 2/7] nvme/fc: Fix admin queue depth setup Keith Busch
  2017-07-10  5:59   ` Sagi Grimberg
@ 2017-07-10 23:40   ` James Smart
  2017-07-10 23:49     ` James Smart
  1 sibling, 1 reply; 37+ messages in thread
From: James Smart @ 2017-07-10 23:40 UTC (permalink / raw)


On 7/7/2017 9:22 AM, Keith Busch wrote:
> The NVME_FC_AQ_BLKMQ_DEPTH is one less than what we actually want the
> admin queue's depth to be since it subtracts the AEN entry.
>
> We also need to subtract 1 from the blk-mq depth to always leave an
> empty queue entry, and to not collide command id with the AEN request.

I don't have an issue with what changed, but have a few comments.

Why not kill the NVME_FC_NR_AEN_COMMANDS define and  correct the 
NVME_FC_AQ_BLKMQ_DEPTH define with NVME_NR_AERS as well ?

Given AEN_CMDID_BASE is NVME_FC_AQ_BLKMQ_DEPTH + 1 there never should 
have been a collision regardless.

Agree with the -1 for empty queue levels. Although the reserved tag for 
Connect assured it was never an issue.

Note: the logic for the above are in the rdma transport as well.

Reviewed-by: James Smart <james.smart at broadcom.com>

-- james

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 3/7] nvme/fc: remove unused "queue_size" field
  2017-07-07 16:22 ` [PATCH 3/7] nvme/fc: remove unused "queue_size" field Keith Busch
  2017-07-10  6:00   ` Sagi Grimberg
@ 2017-07-10 23:44   ` James Smart
  1 sibling, 0 replies; 37+ messages in thread
From: James Smart @ 2017-07-10 23:44 UTC (permalink / raw)


On 7/7/2017 9:22 AM, Keith Busch wrote:
> This was being saved in a structure, but never used anywhere. The queue
> size is obtained through other means, so there's no reason to duplicate
> this without a user for it.
>
Looks good

Reviewed-by:  James Smart <james.smart at broadcom.com>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 2/7] nvme/fc: Fix admin queue depth setup
  2017-07-10 23:40   ` James Smart
@ 2017-07-10 23:49     ` James Smart
  0 siblings, 0 replies; 37+ messages in thread
From: James Smart @ 2017-07-10 23:49 UTC (permalink / raw)


On 7/10/2017 4:40 PM, James Smart wrote:
> On 7/7/2017 9:22 AM, Keith Busch wrote:
>> The NVME_FC_AQ_BLKMQ_DEPTH is one less than what we actually want the
>> admin queue's depth to be since it subtracts the AEN entry.
>>
>> We also need to subtract 1 from the blk-mq depth to always leave an
>> empty queue entry, and to not collide command id with the AEN request.
>
> I don't have an issue with what changed, but have a few comments.
>
> Why not kill the NVME_FC_NR_AEN_COMMANDS define and  correct the 
> NVME_FC_AQ_BLKMQ_DEPTH define with NVME_NR_AERS as well ?
>
> Given AEN_CMDID_BASE is NVME_FC_AQ_BLKMQ_DEPTH + 1 there never should 
> have been a collision regardless.
>
> Agree with the -1 for empty queue levels. Although the reserved tag 
> for Connect assured it was never an issue.
>
> Note: the logic for the above are in the rdma transport as well.
>
> Reviewed-by: James Smart <james.smart at broadcom.com>
>
> -- james
>
ah - patch 4 did what I suggested. Looks good.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts
  2017-07-07 16:22 ` [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts Keith Busch
  2017-07-10  6:01   ` Sagi Grimberg
@ 2017-07-10 23:51   ` James Smart
  1 sibling, 0 replies; 37+ messages in thread
From: James Smart @ 2017-07-10 23:51 UTC (permalink / raw)


On 7/7/2017 9:22 AM, Keith Busch wrote:
> All the drivers were duplicating AEN accounting, yet they all depend on
> the core to use the exact same settings of a single AEN to be deducted
> from the tagset depth. This patch moves admin queue command accounting
> to the core.
>
>

Looks good.

Reviewed-by: James Smart <james.smart at broadcom.com>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 5/7] nvme: Only one AEN request
  2017-07-07 16:22 ` [PATCH 5/7] nvme: Only one AEN request Keith Busch
  2017-07-10  6:06   ` Sagi Grimberg
@ 2017-07-10 23:53   ` James Smart
  1 sibling, 0 replies; 37+ messages in thread
From: James Smart @ 2017-07-10 23:53 UTC (permalink / raw)


On 7/7/2017 9:22 AM, Keith Busch wrote:
> This driver issues only one AEN request, so all the logic to handle
> multiple ones is unnecessary. Even if we did allow multiple ones, the
> implementation is broken as it will create command id collisions. For
> those two reasons, this patch deletes all the aer index usage, assuming
> there's only one.
>
> As a consequence of removing the event limit, the core's callback for
> these events is simplified to only handle the successful case. The
> "abort" status handling for AER has been broken for some time anyway:
> it was meant to reclaim the aer count from a driver cancelled command
> after a reset, but that was relying on blk-mq to manage the reserved
> tags. We don't use blk-mq tags for AER, though, so we will never see a
> synthesized abort status.
>

Looks good

Reviewed-by: James Smart  <james.smart at broadcom.com>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-07 16:23 ` [PATCH 7/7] nvme: Send change uevent when AEN completes Keith Busch
  2017-07-07 18:14   ` Christoph Hellwig
  2017-07-10 10:17   ` Zou Ming
@ 2017-07-10 23:57   ` James Smart
  2017-07-11 14:43     ` Keith Busch
  2 siblings, 1 reply; 37+ messages in thread
From: James Smart @ 2017-07-10 23:57 UTC (permalink / raw)


On 7/7/2017 9:23 AM, Keith Busch wrote:
> This will give udev a chance to handle asynchronous event notification
> and clear the log to unmask future events of the same type. Since the
> core driver allows only one AEN request at a time, we can only have one
> possible oustanding uevent to send. This implementation saves the last
> AEN result from the IRQ handler, and sends the uevent change notification
> when the AEN work is rescheduled.
>
> The udev rule used to test this was the following:
>
>    ACTION=="change", SUBSYSTEM=="nvme", ENV{NVME_AEN}=="*", \
>          RUN+="/bin/sh -c '/usr/local/sbin/nvme get-log $env{DEVNAME} --log-id=$(( ($env{NVME_AEN} >> 16) & 0xff )) --log-len=4096 >> /tmp/nvme-log'"
>
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> ---
>   drivers/nvme/host/core.c | 16 ++++++++++++++++
>   drivers/nvme/host/nvme.h |  1 +
>   2 files changed, 17 insertions(+)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 783db84..4abde2da 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2507,11 +2507,26 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
>   }
>   EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
>   
> +void nvme_aen_uevent(struct nvme_ctrl *ctrl)
> +{
> +	char buffer[20]; /* NVME_AEN=0xffffffff\0 */
> +	char *envp[2] = {buffer, NULL};
> +	u32 aen = ctrl->aen;
> +
> +	ctrl->aen = 0;
> +	if (!aen)
> +		return;
> +
> +	snprintf(buffer, sizeof(buffer), "NVME_AEN=%#08x", aen);
> +	kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
> +}
> +

Passing on a comment from Christoph received on a similar fc udev event:
"Please use kasprintf so that we have a dynamic allocation and don't 
need to hardcode buffer sizes"

-- james

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-10 23:57   ` James Smart
@ 2017-07-11 14:43     ` Keith Busch
  2017-07-11 17:14       ` James Smart
  0 siblings, 1 reply; 37+ messages in thread
From: Keith Busch @ 2017-07-11 14:43 UTC (permalink / raw)


On Mon, Jul 10, 2017@04:57:43PM -0700, James Smart wrote:
> On 7/7/2017 9:23 AM, Keith Busch wrote:
> > +void nvme_aen_uevent(struct nvme_ctrl *ctrl)
> > +{
> > +	char buffer[20]; /* NVME_AEN=0xffffffff\0 */
> > +	char *envp[2] = {buffer, NULL};
> > +	u32 aen = ctrl->aen;
> > +
> > +	ctrl->aen = 0;
> > +	if (!aen)
> > +		return;
> > +
> > +	snprintf(buffer, sizeof(buffer), "NVME_AEN=%#08x", aen);
> > +	kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
> > +}
> > +
> 
> Passing on a comment from Christoph received on a similar fc udev event:
> "Please use kasprintf so that we have a dynamic allocation and don't need to
> hardcode buffer sizes"

I agree that usage for dynamic strings is better, but this is a fixed
sized string that will always be 20 bytes.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-11 14:43     ` Keith Busch
@ 2017-07-11 17:14       ` James Smart
  2017-07-13 16:09         ` Keith Busch
  0 siblings, 1 reply; 37+ messages in thread
From: James Smart @ 2017-07-11 17:14 UTC (permalink / raw)


On 7/11/2017 7:43 AM, Keith Busch wrote:
> On Mon, Jul 10, 2017@04:57:43PM -0700, James Smart wrote:
>> On 7/7/2017 9:23 AM, Keith Busch wrote:
>>> +void nvme_aen_uevent(struct nvme_ctrl *ctrl)
>>> +{
>>> +	char buffer[20]; /* NVME_AEN=0xffffffff\0 */
>>> +	char *envp[2] = {buffer, NULL};
>>> +	u32 aen = ctrl->aen;
>>> +
>>> +	ctrl->aen = 0;
>>> +	if (!aen)
>>> +		return;
>>> +
>>> +	snprintf(buffer, sizeof(buffer), "NVME_AEN=%#08x", aen);
>>> +	kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
>>> +}
>>> +
>> Passing on a comment from Christoph received on a similar fc udev event:
>> "Please use kasprintf so that we have a dynamic allocation and don't need to
>> hardcode buffer sizes"
> I agree that usage for dynamic strings is better, but this is a fixed
> sized string that will always be 20 bytes.

My scenario was the same - fixed sized, not dynamic.  Just being the 
messenger for consistency.

-- james

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-11 17:14       ` James Smart
@ 2017-07-13 16:09         ` Keith Busch
  0 siblings, 0 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-13 16:09 UTC (permalink / raw)


On Tue, Jul 11, 2017@10:14:20AM -0700, James Smart wrote:
> On 7/11/2017 7:43 AM, Keith Busch wrote:
> > I agree that usage for dynamic strings is better, but this is a fixed
> > sized string that will always be 20 bytes.
> 
> My scenario was the same - fixed sized, not dynamic.  Just being the
> messenger for consistency.

Wha?! That style point is not one I'm convinced is better. Not only is
it more complicated, the stack overhead of calling kasprintf is higher
than the 20 bytes I'm using here.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-07 18:14   ` Christoph Hellwig
@ 2017-07-13 21:29     ` Keith Busch
  2017-07-14 12:50       ` Guilherme G. Piccoli
  2017-07-14 12:53       ` Christoph Hellwig
  0 siblings, 2 replies; 37+ messages in thread
From: Keith Busch @ 2017-07-13 21:29 UTC (permalink / raw)


On Fri, Jul 07, 2017@08:14:36PM +0200, Christoph Hellwig wrote:
> 
> Can we have an nvme-aen helper that gets invoked by a trivial udev rule,
> which would read a config file for policy?

Sure, I can work on an aen helper for udev. Do you know if there is an
existing project for these sorts of things that would take this feature?
 
> Also I think we need to document our policy on which AERs
> get forwarded to userspace. There are some we really should be
> handling in the kernel (fw activation which is in progress, namespace
> data changes really needs fixing, and ANA will also require kernel
> handling).  Is there a way to state it's up to the kernel to reserve
> any even for itself?  Or should we just forward very specific AERs
> to userspace?

I was hoping to not have a policy in the kernel. If we filter them with
evolving black or white lists, user space would still need to account
for difference across kernel versions.

Instead, sending everything is a lot more straight forward policy,
and user space can decide what to do with it. It's future proof to any
new event and use case. Even if the driver handles the event, there
shouldn't be anything user space can do to harm it. In the worst case,
it's just giving information that udev doesn't need.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-13 21:29     ` Keith Busch
@ 2017-07-14 12:50       ` Guilherme G. Piccoli
  2017-07-14 12:54         ` Christoph Hellwig
  2017-07-14 12:53       ` Christoph Hellwig
  1 sibling, 1 reply; 37+ messages in thread
From: Guilherme G. Piccoli @ 2017-07-14 12:50 UTC (permalink / raw)


On 07/13/2017 06:29 PM, Keith Busch wrote:
> On Fri, Jul 07, 2017@08:14:36PM +0200, Christoph Hellwig wrote:
>>
>> Can we have an nvme-aen helper that gets invoked by a trivial udev rule,
>> which would read a config file for policy?
> 
> Sure, I can work on an aen helper for udev. Do you know if there is an
> existing project for these sorts of things that would take this feature?

We were planning to implement this here on LTC, and some ideas in
discussion is whether such tool could be integrated to nvme-cli, like a
"nvmed" tool, a daemon to handle the events. Advantages of the daemon
are possible implementation of RAS features like trigger actions based
on adapter's error, etc.

Do you think this kind of tool is interesting? Suggestions on this topic
are greatly appreciated. One of our concerns is that AEN events still
need to be enabled by the user, not all events are enabled by default.
One such tool would allow friendly configuration on which events user
wants to enable.

Thanks,



Guilherme

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-13 21:29     ` Keith Busch
  2017-07-14 12:50       ` Guilherme G. Piccoli
@ 2017-07-14 12:53       ` Christoph Hellwig
  2017-07-14 15:16         ` Keith Busch
  1 sibling, 1 reply; 37+ messages in thread
From: Christoph Hellwig @ 2017-07-14 12:53 UTC (permalink / raw)


On Thu, Jul 13, 2017@05:29:55PM -0400, Keith Busch wrote:
> On Fri, Jul 07, 2017@08:14:36PM +0200, Christoph Hellwig wrote:
> > 
> > Can we have an nvme-aen helper that gets invoked by a trivial udev rule,
> > which would read a config file for policy?
> 
> Sure, I can work on an aen helper for udev. Do you know if there is an
> existing project for these sorts of things that would take this feature?

I would have expected this to be part of nvme-cli in one form or another.

> I was hoping to not have a policy in the kernel. If we filter them with
> evolving black or white lists, user space would still need to account
> for difference across kernel versions.
> 
> Instead, sending everything is a lot more straight forward policy,
> and user space can decide what to do with it. It's future proof to any
> new event and use case. Even if the driver handles the event, there
> shouldn't be anything user space can do to harm it. In the worst case,
> it's just giving information that udev doesn't need.

Someone has to clear the log page after and AER, and if both the
kernel and the user tool do so we risk that the user tool clears
the next AER due to a race condition.

But maybe we can work around this by adding a bit to the event
that indicates if the kernel has cleared the event.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-14 12:50       ` Guilherme G. Piccoli
@ 2017-07-14 12:54         ` Christoph Hellwig
  0 siblings, 0 replies; 37+ messages in thread
From: Christoph Hellwig @ 2017-07-14 12:54 UTC (permalink / raw)


On Fri, Jul 14, 2017@09:50:20AM -0300, Guilherme G. Piccoli wrote:
> We were planning to implement this here on LTC, and some ideas in
> discussion is whether such tool could be integrated to nvme-cli, like a
> "nvmed" tool, a daemon to handle the events. Advantages of the daemon
> are possible implementation of RAS features like trigger actions based
> on adapter's error, etc.

Don't uevent call a binary each time?  Either way nvme-cli is the right
place for it.

> Do you think this kind of tool is interesting? Suggestions on this topic
> are greatly appreciated. One of our concerns is that AEN events still
> need to be enabled by the user, not all events are enabled by default.
> One such tool would allow friendly configuration on which events user
> wants to enable.

Yes, AERs generally are opt-in.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-14 12:53       ` Christoph Hellwig
@ 2017-07-14 15:16         ` Keith Busch
  2017-07-15  8:42           ` Christoph Hellwig
  0 siblings, 1 reply; 37+ messages in thread
From: Keith Busch @ 2017-07-14 15:16 UTC (permalink / raw)


On Fri, Jul 14, 2017@02:53:09PM +0200, Christoph Hellwig wrote:
> 
> Someone has to clear the log page after and AER, and if both the
> kernel and the user tool do so we risk that the user tool clears
> the next AER due to a race condition.

Sorry, bare with me a moment. I may be missing something obvious, as I'm
not seeing a problem. Reading the event's associated log only rearms
the AER for that event type. Even if the user tool reads the same log
as the driver, that doesn't stop the driver from seeing the next AER
completion and handle it the same as if user space wasn't there.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-14 15:16         ` Keith Busch
@ 2017-07-15  8:42           ` Christoph Hellwig
  2017-07-17 15:22             ` Keith Busch
  0 siblings, 1 reply; 37+ messages in thread
From: Christoph Hellwig @ 2017-07-15  8:42 UTC (permalink / raw)


On Fri, Jul 14, 2017@11:16:25AM -0400, Keith Busch wrote:
> On Fri, Jul 14, 2017@02:53:09PM +0200, Christoph Hellwig wrote:
> > 
> > Someone has to clear the log page after and AER, and if both the
> > kernel and the user tool do so we risk that the user tool clears
> > the next AER due to a race condition.
> 
> Sorry, bare with me a moment. I may be missing something obvious, as I'm
> not seeing a problem. Reading the event's associated log only rearms
> the AER for that event type. Even if the user tool reads the same log
> as the driver, that doesn't stop the driver from seeing the next AER
> completion and handle it the same as if user space wasn't there.

Take the Changed Namespace List log page for example.

We get an AER, and then both schedule a log page read in the kernel
(as we should, patch pending) and send the uevent.  As soon as the
read the log page in the kernel to clear the log page we get another
event due to another namespace change, but not userspace manages to
read the log page before the kernel acts on the next AER, so the
kernel won't see the changes.

And yes, this is probably a little constructed, but we'll get more
log pages with similar semantics, so I am worried a bit about this.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-15  8:42           ` Christoph Hellwig
@ 2017-07-17 15:22             ` Keith Busch
  2017-08-01 11:59               ` Guan Junxiong
  0 siblings, 1 reply; 37+ messages in thread
From: Keith Busch @ 2017-07-17 15:22 UTC (permalink / raw)


On Sat, Jul 15, 2017@10:42:18AM +0200, Christoph Hellwig wrote:
> 
> Take the Changed Namespace List log page for example.
> 
> We get an AER, and then both schedule a log page read in the kernel
> (as we should, patch pending) and send the uevent.  As soon as the
> read the log page in the kernel to clear the log page we get another
> event due to another namespace change, but not userspace manages to
> read the log page before the kernel acts on the next AER, so the
> kernel won't see the changes.
> 
> And yes, this is probably a little constructed, but we'll get more
> log pages with similar semantics, so I am worried a bit about this.

That event needs to already be handled by the driver without relying
on the Changed Namespace List log, like when the list exceeds the log
size. But okay, I see the concern in general if reading a log page may
change the results.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 7/7] nvme: Send change uevent when AEN completes
  2017-07-17 15:22             ` Keith Busch
@ 2017-08-01 11:59               ` Guan Junxiong
  0 siblings, 0 replies; 37+ messages in thread
From: Guan Junxiong @ 2017-08-01 11:59 UTC (permalink / raw)


Hi, Keith

Any update about this patch?  We are keen on planning to use this kind of
feature to handle some events of controller/subsystem with multipath-tools.

Thanks

Best wishes
Guan Junxiong

On 2017/7/17 23:22, Keith Busch wrote:
> On Sat, Jul 15, 2017@10:42:18AM +0200, Christoph Hellwig wrote:
>>
>> Take the Changed Namespace List log page for example.
>>
>> We get an AER, and then both schedule a log page read in the kernel
>> (as we should, patch pending) and send the uevent.  As soon as the
>> read the log page in the kernel to clear the log page we get another
>> event due to another namespace change, but not userspace manages to
>> read the log page before the kernel acts on the next AER, so the
>> kernel won't see the changes.
>>
>> And yes, this is probably a little constructed, but we'll get more
>> log pages with similar semantics, so I am worried a bit about this.
> 
> That event needs to already be handled by the driver without relying
> on the Changed Namespace List log, like when the list exceeds the log
> size. But okay, I see the concern in general if reading a log page may
> change the results.
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme
> 
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2017-08-01 11:59 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-07 16:22 [PATCH 0/7] NVMe Async Event Notification updates Keith Busch
2017-07-07 16:22 ` [PATCH 1/7] nvme/fc: There is only one AEN request Keith Busch
2017-07-10 23:12   ` James Smart
2017-07-07 16:22 ` [PATCH 2/7] nvme/fc: Fix admin queue depth setup Keith Busch
2017-07-10  5:59   ` Sagi Grimberg
2017-07-10 22:57     ` Keith Busch
2017-07-10 23:40   ` James Smart
2017-07-10 23:49     ` James Smart
2017-07-07 16:22 ` [PATCH 3/7] nvme/fc: remove unused "queue_size" field Keith Busch
2017-07-10  6:00   ` Sagi Grimberg
2017-07-10 23:44   ` James Smart
2017-07-07 16:22 ` [PATCH 4/7] nvme: Centralize blk-mq tag and AEN counts Keith Busch
2017-07-10  6:01   ` Sagi Grimberg
2017-07-10 23:51   ` James Smart
2017-07-07 16:22 ` [PATCH 5/7] nvme: Only one AEN request Keith Busch
2017-07-10  6:06   ` Sagi Grimberg
2017-07-10 23:53   ` James Smart
2017-07-07 16:23 ` [PATCH 6/7] nvme: Unexport starting async event work Keith Busch
2017-07-07 18:11   ` Christoph Hellwig
2017-07-07 19:54     ` Keith Busch
2017-07-07 16:23 ` [PATCH 7/7] nvme: Send change uevent when AEN completes Keith Busch
2017-07-07 18:14   ` Christoph Hellwig
2017-07-13 21:29     ` Keith Busch
2017-07-14 12:50       ` Guilherme G. Piccoli
2017-07-14 12:54         ` Christoph Hellwig
2017-07-14 12:53       ` Christoph Hellwig
2017-07-14 15:16         ` Keith Busch
2017-07-15  8:42           ` Christoph Hellwig
2017-07-17 15:22             ` Keith Busch
2017-08-01 11:59               ` Guan Junxiong
2017-07-10 10:17   ` Zou Ming
2017-07-10 17:38     ` Keith Busch
2017-07-10 23:57   ` James Smart
2017-07-11 14:43     ` Keith Busch
2017-07-11 17:14       ` James Smart
2017-07-13 16:09         ` Keith Busch
2017-07-07 18:09 ` [PATCH 0/7] NVMe Async Event Notification updates Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.