All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 17:40 ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)
  To: linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg, Keith Busch

Drivers may need to know the state of their requets.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 block/blk-mq.h         | 9 ---------
 include/linux/blkdev.h | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq.h b/block/blk-mq.h
index c11353a3749d..99ab7e472e62 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 
 void blk_mq_release(struct request_queue *q);
 
-/**
- * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
- * @rq: target request.
- */
-static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
-{
-	return READ_ONCE(rq->state);
-}
-
 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
 					   unsigned int cpu)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index faed9d9eb84c..db113aee48bb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -241,6 +241,15 @@ struct request {
 	struct request *next_rq;
 };
 
+/**
+ * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
+ * @rq: target request.
+ */
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
+{
+	return READ_ONCE(rq->state);
+}
+
 static inline bool blk_op_is_scsi(unsigned int op)
 {
 	return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 17:40 ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)


Drivers may need to know the state of their requets.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 block/blk-mq.h         | 9 ---------
 include/linux/blkdev.h | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq.h b/block/blk-mq.h
index c11353a3749d..99ab7e472e62 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 
 void blk_mq_release(struct request_queue *q);
 
-/**
- * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
- * @rq: target request.
- */
-static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
-{
-	return READ_ONCE(rq->state);
-}
-
 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
 					   unsigned int cpu)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index faed9d9eb84c..db113aee48bb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -241,6 +241,15 @@ struct request {
 	struct request *next_rq;
 };
 
+/**
+ * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
+ * @rq: target request.
+ */
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
+{
+	return READ_ONCE(rq->state);
+}
+
 static inline bool blk_op_is_scsi(unsigned int op)
 {
 	return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 2/5] blk-mq: Export iterating queue requests
  2019-03-08 17:40 ` Keith Busch
@ 2019-03-08 17:40   ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)
  To: linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg, Keith Busch

A driver may need to iterate a particular queue's tagged request rather
than the whole tagset.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 block/blk-mq-tag.c     | 1 +
 block/blk-mq-tag.h     | 2 --
 include/linux/blk-mq.h | 2 ++
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a4931fc7be8a..a4ba91b332b0 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -407,6 +407,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 	}
 	blk_queue_exit(q);
 }
+EXPORT_SYMBOL(blk_mq_queue_tag_busy_iter);
 
 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
 		    bool round_robin, int node)
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 61deab0b5a5a..5af7ff94b400 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -33,8 +33,6 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_tags **tags,
 					unsigned int depth, bool can_grow);
 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
-void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
-		void *priv);
 
 static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
 						 struct blk_mq_hw_ctx *hctx)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b0c814bcc7e3..a64b3fdce0b0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -321,6 +321,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 		busy_tag_iter_fn *fn, void *priv);
+void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
+		void *priv);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_freeze_queue_start(struct request_queue *q);
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 2/5] blk-mq: Export iterating queue requests
@ 2019-03-08 17:40   ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)


A driver may need to iterate a particular queue's tagged request rather
than the whole tagset.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 block/blk-mq-tag.c     | 1 +
 block/blk-mq-tag.h     | 2 --
 include/linux/blk-mq.h | 2 ++
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a4931fc7be8a..a4ba91b332b0 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -407,6 +407,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 	}
 	blk_queue_exit(q);
 }
+EXPORT_SYMBOL(blk_mq_queue_tag_busy_iter);
 
 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
 		    bool round_robin, int node)
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 61deab0b5a5a..5af7ff94b400 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -33,8 +33,6 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_tags **tags,
 					unsigned int depth, bool can_grow);
 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
-void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
-		void *priv);
 
 static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
 						 struct blk_mq_hw_ctx *hctx)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b0c814bcc7e3..a64b3fdce0b0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -321,6 +321,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 		busy_tag_iter_fn *fn, void *priv);
+void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
+		void *priv);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_freeze_queue_start(struct request_queue *q);
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 3/5] blk-mq: Iterate tagset over all requests
  2019-03-08 17:40 ` Keith Busch
@ 2019-03-08 17:40   ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)
  To: linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg, Keith Busch

Let the driver's iterator callback decide what request state it should
handle. While there may be other uses, the intention here is to allow
drivers to terminate requests that have entered a dying queue without
needing to unquiesce that queue.

It also turns out no current callback wanted to see COMPLETED requests
that were being returned before, so this also fixes that for all existing
callback handlers.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 block/blk-mq-tag.c                | 12 ++++++------
 drivers/block/mtip32xx/mtip32xx.c |  6 ++++++
 drivers/block/nbd.c               |  2 ++
 drivers/block/skd_main.c          |  4 ++++
 drivers/nvme/host/core.c          |  2 ++
 drivers/nvme/host/fc.c            |  2 ++
 6 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a4ba91b332b0..012f6d8e9631 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -288,7 +288,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	 * test and set the bit before assining ->rqs[].
 	 */
 	rq = tags->rqs[bitnr];
-	if (rq && blk_mq_request_started(rq))
+	if (rq)
 		return iter_data->fn(rq, iter_data->data, reserved);
 
 	return true;
@@ -299,7 +299,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  * @tags:	Tag map to iterate over.
  * @bt:		sbitmap to examine. This is either the breserved_tags member
  *		or the bitmap_tags member of struct blk_mq_tags.
- * @fn:		Pointer to the function that will be called for each started
+ * @fn:		Pointer to the function that will be called for each
  *		request. @fn will be called as follows: @fn(rq, @data,
  *		@reserved) where rq is a pointer to a request. Return true
  *		to continue iterating tags, false to stop.
@@ -322,9 +322,9 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
 }
 
 /**
- * blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map
+ * blk_mq_all_tag_busy_iter - iterate over all requests in a tag map
  * @tags:	Tag map to iterate over.
- * @fn:		Pointer to the function that will be called for each started
+ * @fn:		Pointer to the function that will be called for each
  *		request. @fn will be called as follows: @fn(rq, @priv,
  *		reserved) where rq is a pointer to a request. 'reserved'
  *		indicates whether or not @rq is a reserved request. Return
@@ -340,9 +340,9 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
 }
 
 /**
- * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
+ * blk_mq_tagset_busy_iter - iterate over all requests in a tag set
  * @tagset:	Tag set to iterate over.
- * @fn:		Pointer to the function that will be called for each started
+ * @fn:		Pointer to the function that will be called for each
  *		request. @fn will be called as follows: @fn(rq, @priv,
  *		reserved) where rq is a pointer to a request. 'reserved'
  *		indicates whether or not @rq is a reserved request. Return
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 9a6f40cd8df6..88df90ee0a3c 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2690,6 +2690,8 @@ static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2702,6 +2704,8 @@ static bool mtip_queue_cmd(struct request *req, void *data, bool reserved)
 {
 	struct driver_data *dd = data;
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	set_bit(req->tag, dd->port->cmds_to_issue);
 	blk_abort_request(req);
 	return true;
@@ -3852,6 +3856,8 @@ static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
+	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
+		return true;
 	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(rq);
 	return true;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 90ba9f4c03f3..4811746f189b 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -739,6 +739,8 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(req);
 	return true;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 7d3ad6c22ee5..530d14e6b97b 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -387,6 +387,8 @@ static bool skd_inc_in_flight(struct request *rq, void *data, bool reserved)
 {
 	int *count = data;
 
+	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
+		return true;
 	count++;
 	return true;
 }
@@ -1899,6 +1901,8 @@ static bool skd_recover_request(struct request *req, void *data, bool reserved)
 	struct skd_device *const skdev = data;
 	struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	if (skreq->state != SKD_REQ_STATE_BUSY)
 		return true;
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1597efae6af8..cc5d9a83d5af 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -284,6 +284,8 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq);
 
 bool nvme_cancel_request(struct request *req, void *data, bool reserved)
 {
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
 				"Cancelling I/O %d", req->tag);
 
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b29b12498a1a..0deba51575b0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2373,6 +2373,8 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	__nvme_fc_abort_op(ctrl, op);
 	return true;
 }
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 3/5] blk-mq: Iterate tagset over all requests
@ 2019-03-08 17:40   ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)


Let the driver's iterator callback decide what request state it should
handle. While there may be other uses, the intention here is to allow
drivers to terminate requests that have entered a dying queue without
needing to unquiesce that queue.

It also turns out no current callback wanted to see COMPLETED requests
that were being returned before, so this also fixes that for all existing
callback handlers.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 block/blk-mq-tag.c                | 12 ++++++------
 drivers/block/mtip32xx/mtip32xx.c |  6 ++++++
 drivers/block/nbd.c               |  2 ++
 drivers/block/skd_main.c          |  4 ++++
 drivers/nvme/host/core.c          |  2 ++
 drivers/nvme/host/fc.c            |  2 ++
 6 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a4ba91b332b0..012f6d8e9631 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -288,7 +288,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	 * test and set the bit before assining ->rqs[].
 	 */
 	rq = tags->rqs[bitnr];
-	if (rq && blk_mq_request_started(rq))
+	if (rq)
 		return iter_data->fn(rq, iter_data->data, reserved);
 
 	return true;
@@ -299,7 +299,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  * @tags:	Tag map to iterate over.
  * @bt:		sbitmap to examine. This is either the breserved_tags member
  *		or the bitmap_tags member of struct blk_mq_tags.
- * @fn:		Pointer to the function that will be called for each started
+ * @fn:		Pointer to the function that will be called for each
  *		request. @fn will be called as follows: @fn(rq, @data,
  *		@reserved) where rq is a pointer to a request. Return true
  *		to continue iterating tags, false to stop.
@@ -322,9 +322,9 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
 }
 
 /**
- * blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map
+ * blk_mq_all_tag_busy_iter - iterate over all requests in a tag map
  * @tags:	Tag map to iterate over.
- * @fn:		Pointer to the function that will be called for each started
+ * @fn:		Pointer to the function that will be called for each
  *		request. @fn will be called as follows: @fn(rq, @priv,
  *		reserved) where rq is a pointer to a request. 'reserved'
  *		indicates whether or not @rq is a reserved request. Return
@@ -340,9 +340,9 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
 }
 
 /**
- * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
+ * blk_mq_tagset_busy_iter - iterate over all requests in a tag set
  * @tagset:	Tag set to iterate over.
- * @fn:		Pointer to the function that will be called for each started
+ * @fn:		Pointer to the function that will be called for each
  *		request. @fn will be called as follows: @fn(rq, @priv,
  *		reserved) where rq is a pointer to a request. 'reserved'
  *		indicates whether or not @rq is a reserved request. Return
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 9a6f40cd8df6..88df90ee0a3c 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2690,6 +2690,8 @@ static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2702,6 +2704,8 @@ static bool mtip_queue_cmd(struct request *req, void *data, bool reserved)
 {
 	struct driver_data *dd = data;
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	set_bit(req->tag, dd->port->cmds_to_issue);
 	blk_abort_request(req);
 	return true;
@@ -3852,6 +3856,8 @@ static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
+	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
+		return true;
 	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(rq);
 	return true;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 90ba9f4c03f3..4811746f189b 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -739,6 +739,8 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(req);
 	return true;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 7d3ad6c22ee5..530d14e6b97b 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -387,6 +387,8 @@ static bool skd_inc_in_flight(struct request *rq, void *data, bool reserved)
 {
 	int *count = data;
 
+	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
+		return true;
 	count++;
 	return true;
 }
@@ -1899,6 +1901,8 @@ static bool skd_recover_request(struct request *req, void *data, bool reserved)
 	struct skd_device *const skdev = data;
 	struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	if (skreq->state != SKD_REQ_STATE_BUSY)
 		return true;
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1597efae6af8..cc5d9a83d5af 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -284,6 +284,8 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq);
 
 bool nvme_cancel_request(struct request *req, void *data, bool reserved)
 {
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
 				"Cancelling I/O %d", req->tag);
 
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b29b12498a1a..0deba51575b0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2373,6 +2373,8 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
 
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
+		return true;
 	__nvme_fc_abort_op(ctrl, op);
 	return true;
 }
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
  2019-03-08 17:40 ` Keith Busch
@ 2019-03-08 17:40   ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)
  To: linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg, Keith Busch

End the entered requests on a quieced queue directly rather than flush
them through the low level driver's queue_rq().

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cc5d9a83d5af..7095406bb293 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
 
+static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
+			      void *data, bool reserved)
+{
+	blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_set_queue_dying(struct nvme_ns *ns)
 {
 	/*
@@ -104,8 +111,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
 		return;
 	revalidate_disk(ns->disk);
 	blk_set_queue_dying(ns->queue);
-	/* Forcibly unquiesce queues to avoid blocking dispatch */
-	blk_mq_unquiesce_queue(ns->queue);
+	blk_mq_queue_tag_busy_iter(ns->queue, nvme_fail_request, NULL);
 }
 
 static void nvme_queue_scan(struct nvme_ctrl *ctrl)
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
@ 2019-03-08 17:40   ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)


End the entered requests on a quieced queue directly rather than flush
them through the low level driver's queue_rq().

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cc5d9a83d5af..7095406bb293 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
 
+static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
+			      void *data, bool reserved)
+{
+	blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_set_queue_dying(struct nvme_ns *ns)
 {
 	/*
@@ -104,8 +111,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
 		return;
 	revalidate_disk(ns->disk);
 	blk_set_queue_dying(ns->queue);
-	/* Forcibly unquiesce queues to avoid blocking dispatch */
-	blk_mq_unquiesce_queue(ns->queue);
+	blk_mq_queue_tag_busy_iter(ns->queue, nvme_fail_request, NULL);
 }
 
 static void nvme_queue_scan(struct nvme_ctrl *ctrl)
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-08 17:40 ` Keith Busch
@ 2019-03-08 17:40   ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)
  To: linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg, Keith Busch

The nvme driver checked the queue state on every IO so this path could
be used to flush out entered requests to a failed completion. The code
even mentions in comments that we shouldn't have to do this, so let's
not do it.

Use the blk-mq's tag iterator to end all entered requests when the queue
isn't going to be restarted so the IO path doesn't have to deal with
these conditions.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/pci.c | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f54718b63637..398c6333cf77 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -918,13 +918,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command cmnd;
 	blk_status_t ret;
 
-	/*
-	 * We should not need to do this, but we're still using this to
-	 * ensure we can drain requests on a dying queue.
-	 */
-	if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
-		return BLK_STS_IOERR;
-
 	ret = nvme_setup_cmd(ns, req, &cmnd);
 	if (ret)
 		return ret;
@@ -1403,10 +1396,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
 	if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
 		return 1;
-
-	/* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
-	mb();
-
 	nvmeq->dev->online_queues--;
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
 		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
@@ -1616,15 +1605,29 @@ static const struct blk_mq_ops nvme_mq_ops = {
 	.poll		= nvme_poll,
 };
 
+static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = iod->nvmeq;
+
+	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
+		blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
 	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
 		/*
 		 * If the controller was reset during removal, it's possible
-		 * user requests may be waiting on a stopped queue. Start the
-		 * queue to flush these to completion.
+		 * user requests may be waiting on a stopped queue. End all
+		 * entered requests after preventing new requests from
+		 * entering.
 		 */
-		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+		blk_set_queue_dying(dev->ctrl.admin_q);
+		blk_mq_tagset_busy_iter(&dev->admin_tagset,
+					nvme_fail_queue_request,
+					NULL);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -2435,6 +2438,14 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 	}
 }
 
+static void nvme_fail_requests(struct nvme_dev *dev)
+{
+	if (!dev->ctrl.tagset)
+		return;
+	blk_mq_tagset_busy_iter(dev->ctrl.tagset, nvme_fail_queue_request,
+				NULL);
+}
+
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	bool dead = true;
@@ -2475,11 +2486,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
 	/*
 	 * The driver will not be starting up queues again if shutting down so
-	 * must flush all entered requests to their failed completion to avoid
+	 * must end all entered requests to their failed completion to avoid
 	 * deadlocking blk-mq hot-cpu notifier.
 	 */
 	if (shutdown)
-		nvme_start_queues(&dev->ctrl);
+		nvme_fail_requests(dev);
 	mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -2624,6 +2635,8 @@ static void nvme_reset_work(struct work_struct *work)
 		nvme_remove_namespaces(&dev->ctrl);
 		new_state = NVME_CTRL_ADMIN_ONLY;
 	} else {
+		/* Fail requests that entered an hctx that no longer exists */
+		nvme_fail_requests(dev);
 		nvme_start_queues(&dev->ctrl);
 		nvme_wait_freeze(&dev->ctrl);
 		/* hit this only when allocate tagset fails */
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-08 17:40   ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 17:40 UTC (permalink / raw)


The nvme driver checked the queue state on every IO so this path could
be used to flush out entered requests to a failed completion. The code
even mentions in comments that we shouldn't have to do this, so let's
not do it.

Use the blk-mq's tag iterator to end all entered requests when the queue
isn't going to be restarted so the IO path doesn't have to deal with
these conditions.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/pci.c | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f54718b63637..398c6333cf77 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -918,13 +918,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command cmnd;
 	blk_status_t ret;
 
-	/*
-	 * We should not need to do this, but we're still using this to
-	 * ensure we can drain requests on a dying queue.
-	 */
-	if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
-		return BLK_STS_IOERR;
-
 	ret = nvme_setup_cmd(ns, req, &cmnd);
 	if (ret)
 		return ret;
@@ -1403,10 +1396,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
 	if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
 		return 1;
-
-	/* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
-	mb();
-
 	nvmeq->dev->online_queues--;
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
 		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
@@ -1616,15 +1605,29 @@ static const struct blk_mq_ops nvme_mq_ops = {
 	.poll		= nvme_poll,
 };
 
+static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = iod->nvmeq;
+
+	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
+		blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
 	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
 		/*
 		 * If the controller was reset during removal, it's possible
-		 * user requests may be waiting on a stopped queue. Start the
-		 * queue to flush these to completion.
+		 * user requests may be waiting on a stopped queue. End all
+		 * entered requests after preventing new requests from
+		 * entering.
 		 */
-		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+		blk_set_queue_dying(dev->ctrl.admin_q);
+		blk_mq_tagset_busy_iter(&dev->admin_tagset,
+					nvme_fail_queue_request,
+					NULL);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -2435,6 +2438,14 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 	}
 }
 
+static void nvme_fail_requests(struct nvme_dev *dev)
+{
+	if (!dev->ctrl.tagset)
+		return;
+	blk_mq_tagset_busy_iter(dev->ctrl.tagset, nvme_fail_queue_request,
+				NULL);
+}
+
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	bool dead = true;
@@ -2475,11 +2486,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
 	/*
 	 * The driver will not be starting up queues again if shutting down so
-	 * must flush all entered requests to their failed completion to avoid
+	 * must end all entered requests to their failed completion to avoid
 	 * deadlocking blk-mq hot-cpu notifier.
 	 */
 	if (shutdown)
-		nvme_start_queues(&dev->ctrl);
+		nvme_fail_requests(dev);
 	mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -2624,6 +2635,8 @@ static void nvme_reset_work(struct work_struct *work)
 		nvme_remove_namespaces(&dev->ctrl);
 		new_state = NVME_CTRL_ADMIN_ONLY;
 	} else {
+		/* Fail requests that entered an hctx that no longer exists */
+		nvme_fail_requests(dev);
 		nvme_start_queues(&dev->ctrl);
 		nvme_wait_freeze(&dev->ctrl);
 		/* hit this only when allocate tagset fails */
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 17:40 ` Keith Busch
@ 2019-03-08 18:07   ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:07 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> Drivers may need to know the state of their requets.

Hi Keith,

What makes you think that drivers should be able to check the state of their
requests? Please elaborate.

> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index faed9d9eb84c..db113aee48bb 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -241,6 +241,15 @@ struct request {
>  	struct request *next_rq;
>  };
>  
> +/**
> + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> + * @rq: target request.
> + */
> +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> +{
> +	return READ_ONCE(rq->state);
> +}

Please also explain how drivers can use this function without triggering a
race condition with the code that modifies rq->state.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 18:07   ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:07 UTC (permalink / raw)


On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> Drivers may need to know the state of their requets.

Hi Keith,

What makes you think that drivers should be able to check the state of their
requests? Please elaborate.

> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index faed9d9eb84c..db113aee48bb 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -241,6 +241,15 @@ struct request {
>  	struct request *next_rq;
>  };
>  
> +/**
> + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> + * @rq: target request.
> + */
> +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> +{
> +	return READ_ONCE(rq->state);
> +}

Please also explain how drivers can use this function without triggering a
race condition with the code that modifies rq->state.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 2/5] blk-mq: Export iterating queue requests
  2019-03-08 17:40   ` Keith Busch
@ 2019-03-08 18:08     ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:08 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> A driver may need to iterate a particular queue's tagged request rather
> than the whole tagset.

Since iterating over requests triggers race conditions with request execution
please explain what use case(s) you have in mind and what your plan is to handle
such race conditions.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 2/5] blk-mq: Export iterating queue requests
@ 2019-03-08 18:08     ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:08 UTC (permalink / raw)


On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> A driver may need to iterate a particular queue's tagged request rather
> than the whole tagset.

Since iterating over requests triggers race conditions with request execution
please explain what use case(s) you have in mind and what your plan is to handle
such race conditions.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 2/5] blk-mq: Export iterating queue requests
  2019-03-08 18:08     ` Bart Van Assche
@ 2019-03-08 18:13       ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 18:13 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, Mar 08, 2019 at 10:08:47AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> > A driver may need to iterate a particular queue's tagged request rather
> > than the whole tagset.
> 
> Since iterating over requests triggers race conditions with request execution
> please explain what use case(s) you have in mind and what your plan is to handle
> such race conditions.

That race isn't new. You should only iterate when your queues are quieced
to ensure the request sent to a callback is stable.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 2/5] blk-mq: Export iterating queue requests
@ 2019-03-08 18:13       ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 18:13 UTC (permalink / raw)


On Fri, Mar 08, 2019@10:08:47AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> > A driver may need to iterate a particular queue's tagged request rather
> > than the whole tagset.
> 
> Since iterating over requests triggers race conditions with request execution
> please explain what use case(s) you have in mind and what your plan is to handle
> such race conditions.

That race isn't new. You should only iterate when your queues are quieced
to ensure the request sent to a callback is stable.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/5] nvme: Fail dead namespace's entered requests
  2019-03-08 17:40   ` Keith Busch
@ 2019-03-08 18:15     ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:15 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> End the entered requests on a quieced queue directly rather than flush
> them through the low level driver's queue_rq().
> 
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
>  drivers/nvme/host/core.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index cc5d9a83d5af..7095406bb293 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
>  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
>  					   unsigned nsid);
>  
> +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> +			      void *data, bool reserved)
> +{
> +	blk_mq_end_request(req, BLK_STS_IOERR);
> +	return true;
> +}

Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
callback functions is wrong. Did you perhaps want to call
blk_mq_complete_request()?

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
@ 2019-03-08 18:15     ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:15 UTC (permalink / raw)


On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> End the entered requests on a quieced queue directly rather than flush
> them through the low level driver's queue_rq().
> 
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> ---
>  drivers/nvme/host/core.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index cc5d9a83d5af..7095406bb293 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
>  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
>  					   unsigned nsid);
>  
> +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> +			      void *data, bool reserved)
> +{
> +	blk_mq_end_request(req, BLK_STS_IOERR);
> +	return true;
> +}

Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
callback functions is wrong. Did you perhaps want to call
blk_mq_complete_request()?

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 18:07   ` Bart Van Assche
@ 2019-03-08 18:15     ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 18:15 UTC (permalink / raw)
  To: Bart Van Assche, t
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, Mar 08, 2019 at 10:07:23AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> > Drivers may need to know the state of their requets.
> 
> Hi Keith,
> 
> What makes you think that drivers should be able to check the state of their
> requests? Please elaborate.

Patches 4 and 5 in this series.
 
> > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > index faed9d9eb84c..db113aee48bb 100644
> > --- a/include/linux/blkdev.h
> > +++ b/include/linux/blkdev.h
> > @@ -241,6 +241,15 @@ struct request {
> >  	struct request *next_rq;
> >  };
> >  
> > +/**
> > + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> > + * @rq: target request.
> > + */
> > +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> > +{
> > +	return READ_ONCE(rq->state);
> > +}
> 
> Please also explain how drivers can use this function without triggering a
> race condition with the code that modifies rq->state.

Either queisced or within a timeout handler that already locks the
request lifetime.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 18:15     ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 18:15 UTC (permalink / raw)


On Fri, Mar 08, 2019@10:07:23AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> > Drivers may need to know the state of their requets.
> 
> Hi Keith,
> 
> What makes you think that drivers should be able to check the state of their
> requests? Please elaborate.

Patches 4 and 5 in this series.
 
> > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > index faed9d9eb84c..db113aee48bb 100644
> > --- a/include/linux/blkdev.h
> > +++ b/include/linux/blkdev.h
> > @@ -241,6 +241,15 @@ struct request {
> >  	struct request *next_rq;
> >  };
> >  
> > +/**
> > + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> > + * @rq: target request.
> > + */
> > +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> > +{
> > +	return READ_ONCE(rq->state);
> > +}
> 
> Please also explain how drivers can use this function without triggering a
> race condition with the code that modifies rq->state.

Either queisced or within a timeout handler that already locks the
request lifetime.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/5] nvme: Fail dead namespace's entered requests
  2019-03-08 18:15     ` Bart Van Assche
@ 2019-03-08 18:19       ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 18:19 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, Mar 08, 2019 at 10:15:27AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> > End the entered requests on a quieced queue directly rather than flush
> > them through the low level driver's queue_rq().
> > 
> > Signed-off-by: Keith Busch <keith.busch@intel.com>
> > ---
> >  drivers/nvme/host/core.c | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index cc5d9a83d5af..7095406bb293 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
> >  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
> >  					   unsigned nsid);
> >  
> > +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> > +			      void *data, bool reserved)
> > +{
> > +	blk_mq_end_request(req, BLK_STS_IOERR);
> > +	return true;
> > +}
> 
> Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
> callback functions is wrong. Did you perhaps want to call
> blk_mq_complete_request()?

This callback can only see requests in MQ_RQ_IDLE state, and
bkl_mq_end_request() is the correct way to end those that never entered
a driver's queue_rq().

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
@ 2019-03-08 18:19       ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 18:19 UTC (permalink / raw)


On Fri, Mar 08, 2019@10:15:27AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> > End the entered requests on a quieced queue directly rather than flush
> > them through the low level driver's queue_rq().
> > 
> > Signed-off-by: Keith Busch <keith.busch at intel.com>
> > ---
> >  drivers/nvme/host/core.c | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index cc5d9a83d5af..7095406bb293 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
> >  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
> >  					   unsigned nsid);
> >  
> > +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> > +			      void *data, bool reserved)
> > +{
> > +	blk_mq_end_request(req, BLK_STS_IOERR);
> > +	return true;
> > +}
> 
> Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
> callback functions is wrong. Did you perhaps want to call
> blk_mq_complete_request()?

This callback can only see requests in MQ_RQ_IDLE state, and
bkl_mq_end_request() is the correct way to end those that never entered
a driver's queue_rq().

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-08 17:40   ` Keith Busch
@ 2019-03-08 18:19     ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:19 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> +static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
> +{
> +	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> +	struct nvme_queue *nvmeq = iod->nvmeq;
> +
> +	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
> +		blk_mq_end_request(req, BLK_STS_IOERR);
> +	return true;
> +}

Same question here as for patch 4/5: what prevents concurrent calls of
blk_mq_complete_request() with the blk_mq_end_request() call in the
above function?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-08 18:19     ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:19 UTC (permalink / raw)


On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> +static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
> +{
> +	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> +	struct nvme_queue *nvmeq = iod->nvmeq;
> +
> +	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
> +		blk_mq_end_request(req, BLK_STS_IOERR);
> +	return true;
> +}

Same question here as for patch 4/5: what prevents concurrent calls of
blk_mq_complete_request() with the blk_mq_end_request() call in the
above function?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 18:15     ` Keith Busch
@ 2019-03-08 18:42       ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:42 UTC (permalink / raw)
  To: Keith Busch, t
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 11:15 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019 at 10:07:23AM -0800, Bart Van Assche wrote:
> > On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> > > Drivers may need to know the state of their requets.
> > 
> > Hi Keith,
> > 
> > What makes you think that drivers should be able to check the state of their
> > requests? Please elaborate.
> 
> Patches 4 and 5 in this series.
>  
> > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > index faed9d9eb84c..db113aee48bb 100644
> > > --- a/include/linux/blkdev.h
> > > +++ b/include/linux/blkdev.h
> > > @@ -241,6 +241,15 @@ struct request {
> > >  	struct request *next_rq;
> > >  };
> > >  
> > > +/**
> > > + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> > > + * @rq: target request.
> > > + */
> > > +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> > > +{
> > > +	return READ_ONCE(rq->state);
> > > +}
> > 
> > Please also explain how drivers can use this function without triggering a
> > race condition with the code that modifies rq->state.
> 
> Either queisced or within a timeout handler that already locks the
> request lifetime.

Hi Keith,

For future patch series submissions please include a cover letter. The two patch
series that you posted today don't have a cover letter so I can only guess what
the purpose of these two patch series is. Is the purpose of this patch series
perhaps to speed up error handling? If so, why did you choose the approach of
iterating over outstanding requests and telling the block layer to terminate
these requests? I think that the NVMe spec provides a more elegant mechanism,
namely deleting the I/O submission queues. According to what I read in the
1.3c spec deleting an I/O submission queue forces an NVMe controller to post a 
completion for every outstanding request. See also section 5.6 in the NVMe
1.3c spec.

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 18:42       ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 18:42 UTC (permalink / raw)


On Fri, 2019-03-08@11:15 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019@10:07:23AM -0800, Bart Van Assche wrote:
> > On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> > > Drivers may need to know the state of their requets.
> > 
> > Hi Keith,
> > 
> > What makes you think that drivers should be able to check the state of their
> > requests? Please elaborate.
> 
> Patches 4 and 5 in this series.
>  
> > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > index faed9d9eb84c..db113aee48bb 100644
> > > --- a/include/linux/blkdev.h
> > > +++ b/include/linux/blkdev.h
> > > @@ -241,6 +241,15 @@ struct request {
> > >  	struct request *next_rq;
> > >  };
> > >  
> > > +/**
> > > + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> > > + * @rq: target request.
> > > + */
> > > +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> > > +{
> > > +	return READ_ONCE(rq->state);
> > > +}
> > 
> > Please also explain how drivers can use this function without triggering a
> > race condition with the code that modifies rq->state.
> 
> Either queisced or within a timeout handler that already locks the
> request lifetime.

Hi Keith,

For future patch series submissions please include a cover letter. The two patch
series that you posted today don't have a cover letter so I can only guess what
the purpose of these two patch series is. Is the purpose of this patch series
perhaps to speed up error handling? If so, why did you choose the approach of
iterating over outstanding requests and telling the block layer to terminate
these requests? I think that the NVMe spec provides a more elegant mechanism,
namely deleting the I/O submission queues. According to what I read in the
1.3c spec deleting an I/O submission queue forces an NVMe controller to post a 
completion for every outstanding request. See also section 5.6 in the NVMe
1.3c spec.

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 18:42       ` Bart Van Assche
@ 2019-03-08 19:19         ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 19:19 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: t, linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, Mar 08, 2019 at 10:42:17AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08 at 11:15 -0700, Keith Busch wrote:
> > On Fri, Mar 08, 2019 at 10:07:23AM -0800, Bart Van Assche wrote:
> > > On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> > > > Drivers may need to know the state of their requets.
> > > 
> > > Hi Keith,
> > > 
> > > What makes you think that drivers should be able to check the state of their
> > > requests? Please elaborate.
> > 
> > Patches 4 and 5 in this series.
> >  
> > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > index faed9d9eb84c..db113aee48bb 100644
> > > > --- a/include/linux/blkdev.h
> > > > +++ b/include/linux/blkdev.h
> > > > @@ -241,6 +241,15 @@ struct request {
> > > >  	struct request *next_rq;
> > > >  };
> > > >  
> > > > +/**
> > > > + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> > > > + * @rq: target request.
> > > > + */
> > > > +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> > > > +{
> > > > +	return READ_ONCE(rq->state);
> > > > +}
> > > 
> > > Please also explain how drivers can use this function without triggering a
> > > race condition with the code that modifies rq->state.
> > 
> > Either queisced or within a timeout handler that already locks the
> > request lifetime.
> 
> Hi Keith,
> 
> For future patch series submissions please include a cover letter. The two patch
> series that you posted today don't have a cover letter so I can only guess what
> the purpose of these two patch series is. Is the purpose of this patch series
> perhaps to speed up error handling? If so, why did you choose the approach of
> iterating over outstanding requests and telling the block layer to terminate
> these requests? 

Okay, good point. Will do.

> I think that the NVMe spec provides a more elegant mechanism,
> namely deleting the I/O submission queues. According to what I read in the
> 1.3c spec deleting an I/O submission queue forces an NVMe controller to post a 
> completion for every outstanding request. See also section 5.6 in the NVMe
> 1.3c spec.

That's actually not what it says. The controller may or may not post a
completion entry with a delete SQ command. The first behavior is defined
in the spec as "explicit" and the second as "implicit". For implicit,
we have to iterate inflight tags.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 19:19         ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 19:19 UTC (permalink / raw)


On Fri, Mar 08, 2019@10:42:17AM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08@11:15 -0700, Keith Busch wrote:
> > On Fri, Mar 08, 2019@10:07:23AM -0800, Bart Van Assche wrote:
> > > On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> > > > Drivers may need to know the state of their requets.
> > > 
> > > Hi Keith,
> > > 
> > > What makes you think that drivers should be able to check the state of their
> > > requests? Please elaborate.
> > 
> > Patches 4 and 5 in this series.
> >  
> > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > index faed9d9eb84c..db113aee48bb 100644
> > > > --- a/include/linux/blkdev.h
> > > > +++ b/include/linux/blkdev.h
> > > > @@ -241,6 +241,15 @@ struct request {
> > > >  	struct request *next_rq;
> > > >  };
> > > >  
> > > > +/**
> > > > + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
> > > > + * @rq: target request.
> > > > + */
> > > > +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
> > > > +{
> > > > +	return READ_ONCE(rq->state);
> > > > +}
> > > 
> > > Please also explain how drivers can use this function without triggering a
> > > race condition with the code that modifies rq->state.
> > 
> > Either queisced or within a timeout handler that already locks the
> > request lifetime.
> 
> Hi Keith,
> 
> For future patch series submissions please include a cover letter. The two patch
> series that you posted today don't have a cover letter so I can only guess what
> the purpose of these two patch series is. Is the purpose of this patch series
> perhaps to speed up error handling? If so, why did you choose the approach of
> iterating over outstanding requests and telling the block layer to terminate
> these requests? 

Okay, good point. Will do.

> I think that the NVMe spec provides a more elegant mechanism,
> namely deleting the I/O submission queues. According to what I read in the
> 1.3c spec deleting an I/O submission queue forces an NVMe controller to post a 
> completion for every outstanding request. See also section 5.6 in the NVMe
> 1.3c spec.

That's actually not what it says. The controller may or may not post a
completion entry with a delete SQ command. The first behavior is defined
in the spec as "explicit" and the second as "implicit". For implicit,
we have to iterate inflight tags.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 17:40 ` Keith Busch
@ 2019-03-08 20:21   ` Sagi Grimberg
  -1 siblings, 0 replies; 60+ messages in thread
From: Sagi Grimberg @ 2019-03-08 20:21 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, linux-block, Jens Axboe; +Cc: Christoph Hellwig

For some reason I didn't get patches 2/5 and 3/5...

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 20:21   ` Sagi Grimberg
  0 siblings, 0 replies; 60+ messages in thread
From: Sagi Grimberg @ 2019-03-08 20:21 UTC (permalink / raw)


For some reason I didn't get patches 2/5 and 3/5...

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 20:21   ` Sagi Grimberg
@ 2019-03-08 20:29     ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 20:29 UTC (permalink / raw)
  To: Sagi Grimberg; +Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig

On Fri, Mar 08, 2019 at 12:21:16PM -0800, Sagi Grimberg wrote:
> For some reason I didn't get patches 2/5 and 3/5...

Unreliable 'git send-email'?! :)

They're copied to patchwork too:

  https://patchwork.kernel.org/patch/10845225/
  https://patchwork.kernel.org/patch/10845229/

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 20:29     ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 20:29 UTC (permalink / raw)


On Fri, Mar 08, 2019@12:21:16PM -0800, Sagi Grimberg wrote:
> For some reason I didn't get patches 2/5 and 3/5...

Unreliable 'git send-email'?! :)

They're copied to patchwork too:

  https://patchwork.kernel.org/patch/10845225/
  https://patchwork.kernel.org/patch/10845229/

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 19:19         ` Keith Busch
@ 2019-03-08 20:47           ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 20:47 UTC (permalink / raw)
  To: Keith Busch
  Cc: t, linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 12:19 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019 at 10:42:17AM -0800, Bart Van Assche wrote:
> > I think that the NVMe spec provides a more elegant mechanism,
> > namely deleting the I/O submission queues. According to what I read in the
> > 1.3c spec deleting an I/O submission queue forces an NVMe controller to post a 
> > completion for every outstanding request. See also section 5.6 in the NVMe
> > 1.3c spec.
> 
> That's actually not what it says. The controller may or may not post a
> completion entry with a delete SQ command. The first behavior is defined
> in the spec as "explicit" and the second as "implicit". For implicit,
> we have to iterate inflight tags.

Hi Keith,

Thanks for the clarification. Are you aware of any mechanism in the NVMe spec
that causes all outstanding requests to fail? With RDMA this is easy - all
one has to do is to change the queue pair state into IB_QPS_ERR. See also
ib_drain_qp() in the RDMA core.

If no such mechanism has been defined in the NVMe spec: have you considered
to cancel all outstanding requests instead of calling blk_mq_end_request() for
all outstanding requests?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 20:47           ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 20:47 UTC (permalink / raw)


On Fri, 2019-03-08@12:19 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019@10:42:17AM -0800, Bart Van Assche wrote:
> > I think that the NVMe spec provides a more elegant mechanism,
> > namely deleting the I/O submission queues. According to what I read in the
> > 1.3c spec deleting an I/O submission queue forces an NVMe controller to post a 
> > completion for every outstanding request. See also section 5.6 in the NVMe
> > 1.3c spec.
> 
> That's actually not what it says. The controller may or may not post a
> completion entry with a delete SQ command. The first behavior is defined
> in the spec as "explicit" and the second as "implicit". For implicit,
> we have to iterate inflight tags.

Hi Keith,

Thanks for the clarification. Are you aware of any mechanism in the NVMe spec
that causes all outstanding requests to fail? With RDMA this is easy - all
one has to do is to change the queue pair state into IB_QPS_ERR. See also
ib_drain_qp() in the RDMA core.

If no such mechanism has been defined in the NVMe spec: have you considered
to cancel all outstanding requests instead of calling blk_mq_end_request() for
all outstanding requests?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 20:47           ` Bart Van Assche
@ 2019-03-08 21:14             ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 21:14 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: t, linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, Mar 08, 2019 at 12:47:10PM -0800, Bart Van Assche wrote:
> Thanks for the clarification. Are you aware of any mechanism in the NVMe spec
> that causes all outstanding requests to fail? With RDMA this is easy - all
> one has to do is to change the queue pair state into IB_QPS_ERR. See also
> ib_drain_qp() in the RDMA core.

Well, we can't always rely on hardware to provide completions. The
driver must be able to make forward progress if the device decides to
stop responding, or maybe it was disconnected, asserts, or experiences
an unsafe shutdown/powerloss.

> If no such mechanism has been defined in the NVMe spec: have you considered
> to cancel all outstanding requests instead of calling blk_mq_end_request() for
> all outstanding requests?

Isn't this cancelling requests? Is there an existing block interface
that accomplishes this?

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 21:14             ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 21:14 UTC (permalink / raw)


On Fri, Mar 08, 2019@12:47:10PM -0800, Bart Van Assche wrote:
> Thanks for the clarification. Are you aware of any mechanism in the NVMe spec
> that causes all outstanding requests to fail? With RDMA this is easy - all
> one has to do is to change the queue pair state into IB_QPS_ERR. See also
> ib_drain_qp() in the RDMA core.

Well, we can't always rely on hardware to provide completions. The
driver must be able to make forward progress if the device decides to
stop responding, or maybe it was disconnected, asserts, or experiences
an unsafe shutdown/powerloss.

> If no such mechanism has been defined in the NVMe spec: have you considered
> to cancel all outstanding requests instead of calling blk_mq_end_request() for
> all outstanding requests?

Isn't this cancelling requests? Is there an existing block interface
that accomplishes this?

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 21:14             ` Keith Busch
@ 2019-03-08 21:25               ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 21:25 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 14:14 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019 at 12:47:10PM -0800, Bart Van Assche wrote:
> > If no such mechanism has been defined in the NVMe spec: have you considered
> > to cancel all outstanding requests instead of calling blk_mq_end_request() for
> > all outstanding requests?
> 
> Isn't this cancelling requests? Is there an existing block interface
> that accomplishes this?

Hi Keith,

Sorry if I was unclear. With "canceling outstanding requests" I was referring to
the NVMe abort command. I think aborting outstanding requests has the advantage
that no new explicit call to blk_mq_end_request() call has to be added.

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 21:25               ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 21:25 UTC (permalink / raw)


On Fri, 2019-03-08@14:14 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019@12:47:10PM -0800, Bart Van Assche wrote:
> > If no such mechanism has been defined in the NVMe spec: have you considered
> > to cancel all outstanding requests instead of calling blk_mq_end_request() for
> > all outstanding requests?
> 
> Isn't this cancelling requests? Is there an existing block interface
> that accomplishes this?

Hi Keith,

Sorry if I was unclear. With "canceling outstanding requests" I was referring to
the NVMe abort command. I think aborting outstanding requests has the advantage
that no new explicit call to blk_mq_end_request() call has to be added.

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/5] blk-mq: Export reading mq request state
  2019-03-08 21:25               ` Bart Van Assche
@ 2019-03-08 21:31                 ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 21:31 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, Mar 08, 2019 at 01:25:16PM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08 at 14:14 -0700, Keith Busch wrote:
> > On Fri, Mar 08, 2019 at 12:47:10PM -0800, Bart Van Assche wrote:
> > > If no such mechanism has been defined in the NVMe spec: have you considered
> > > to cancel all outstanding requests instead of calling blk_mq_end_request() for
> > > all outstanding requests?
> > 
> > Isn't this cancelling requests? Is there an existing block interface
> > that accomplishes this?
> 
> Hi Keith,
> 
> Sorry if I was unclear. With "canceling outstanding requests" I was referring to
> the NVMe abort command. I think aborting outstanding requests has the advantage
> that no new explicit call to blk_mq_end_request() call has to be added.

Ah, gotchya. NVMe abort usage is different than what this series wants to
do: we've determined the device is no longer usable, we need to terminate
all requests that are queued in software, but haven't been dispatched
to hardware.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/5] blk-mq: Export reading mq request state
@ 2019-03-08 21:31                 ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 21:31 UTC (permalink / raw)


On Fri, Mar 08, 2019@01:25:16PM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08@14:14 -0700, Keith Busch wrote:
> > On Fri, Mar 08, 2019@12:47:10PM -0800, Bart Van Assche wrote:
> > > If no such mechanism has been defined in the NVMe spec: have you considered
> > > to cancel all outstanding requests instead of calling blk_mq_end_request() for
> > > all outstanding requests?
> > 
> > Isn't this cancelling requests? Is there an existing block interface
> > that accomplishes this?
> 
> Hi Keith,
> 
> Sorry if I was unclear. With "canceling outstanding requests" I was referring to
> the NVMe abort command. I think aborting outstanding requests has the advantage
> that no new explicit call to blk_mq_end_request() call has to be added.

Ah, gotchya. NVMe abort usage is different than what this series wants to
do: we've determined the device is no longer usable, we need to terminate
all requests that are queued in software, but haven't been dispatched
to hardware.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/5] nvme: Fail dead namespace's entered requests
  2019-03-08 18:19       ` Keith Busch
@ 2019-03-08 21:54         ` Bart Van Assche
  -1 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 21:54 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, 2019-03-08 at 11:19 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019 at 10:15:27AM -0800, Bart Van Assche wrote:
> > On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> > > End the entered requests on a quieced queue directly rather than flush
> > > them through the low level driver's queue_rq().
> > > 
> > > Signed-off-by: Keith Busch <keith.busch@intel.com>
> > > ---
> > >  drivers/nvme/host/core.c | 10 ++++++++--
> > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > > index cc5d9a83d5af..7095406bb293 100644
> > > --- a/drivers/nvme/host/core.c
> > > +++ b/drivers/nvme/host/core.c
> > > @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
> > >  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
> > >  					   unsigned nsid);
> > >  
> > > +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> > > +			      void *data, bool reserved)
> > > +{
> > > +	blk_mq_end_request(req, BLK_STS_IOERR);
> > > +	return true;
> > > +}
> > 
> > Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
> > callback functions is wrong. Did you perhaps want to call
> > blk_mq_complete_request()?
> 
> This callback can only see requests in MQ_RQ_IDLE state, and
> bkl_mq_end_request() is the correct way to end those that never entered
> a driver's queue_rq().

Hi Keith,

What guarantees that nvme_fail_request() only sees requests in the idle state?
From block/blk-mq-tag.c:

/**
 * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
 * [ ... ]
 */

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
@ 2019-03-08 21:54         ` Bart Van Assche
  0 siblings, 0 replies; 60+ messages in thread
From: Bart Van Assche @ 2019-03-08 21:54 UTC (permalink / raw)


On Fri, 2019-03-08@11:19 -0700, Keith Busch wrote:
> On Fri, Mar 08, 2019@10:15:27AM -0800, Bart Van Assche wrote:
> > On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> > > End the entered requests on a quieced queue directly rather than flush
> > > them through the low level driver's queue_rq().
> > > 
> > > Signed-off-by: Keith Busch <keith.busch at intel.com>
> > > ---
> > >  drivers/nvme/host/core.c | 10 ++++++++--
> > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > > index cc5d9a83d5af..7095406bb293 100644
> > > --- a/drivers/nvme/host/core.c
> > > +++ b/drivers/nvme/host/core.c
> > > @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
> > >  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
> > >  					   unsigned nsid);
> > >  
> > > +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> > > +			      void *data, bool reserved)
> > > +{
> > > +	blk_mq_end_request(req, BLK_STS_IOERR);
> > > +	return true;
> > > +}
> > 
> > Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
> > callback functions is wrong. Did you perhaps want to call
> > blk_mq_complete_request()?
> 
> This callback can only see requests in MQ_RQ_IDLE state, and
> bkl_mq_end_request() is the correct way to end those that never entered
> a driver's queue_rq().

Hi Keith,

What guarantees that nvme_fail_request() only sees requests in the idle state?
>From block/blk-mq-tag.c:

/**
 * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
 * [ ... ]
 */

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/5] nvme: Fail dead namespace's entered requests
  2019-03-08 21:54         ` Bart Van Assche
@ 2019-03-08 22:06           ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 22:06 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

On Fri, Mar 08, 2019 at 01:54:06PM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08 at 11:19 -0700, Keith Busch wrote:
> > On Fri, Mar 08, 2019 at 10:15:27AM -0800, Bart Van Assche wrote:
> > > On Fri, 2019-03-08 at 10:40 -0700, Keith Busch wrote:
> > > > End the entered requests on a quieced queue directly rather than flush
> > > > them through the low level driver's queue_rq().
> > > > 
> > > > Signed-off-by: Keith Busch <keith.busch@intel.com>
> > > > ---
> > > >  drivers/nvme/host/core.c | 10 ++++++++--
> > > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > > > index cc5d9a83d5af..7095406bb293 100644
> > > > --- a/drivers/nvme/host/core.c
> > > > +++ b/drivers/nvme/host/core.c
> > > > @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
> > > >  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
> > > >  					   unsigned nsid);
> > > >  
> > > > +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> > > > +			      void *data, bool reserved)
> > > > +{
> > > > +	blk_mq_end_request(req, BLK_STS_IOERR);
> > > > +	return true;
> > > > +}
> > > 
> > > Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
> > > callback functions is wrong. Did you perhaps want to call
> > > blk_mq_complete_request()?
> > 
> > This callback can only see requests in MQ_RQ_IDLE state, and
> > bkl_mq_end_request() is the correct way to end those that never entered
> > a driver's queue_rq().
> 
> Hi Keith,
> 
> What guarantees that nvme_fail_request() only sees requests in the idle state?
> From block/blk-mq-tag.c:
> 
> /**
>  * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
>  * [ ... ]
>  */

It's the driver's responsibility to ensure the queue is quiesced before
requesting the iteration. When we call it through nvme_kill_queues(),
the queues were already quiesced before calling that. The only other
place it's called is on a frozen queue, so it's actually a no-op there
since there no requests once frozen.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
@ 2019-03-08 22:06           ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-08 22:06 UTC (permalink / raw)


On Fri, Mar 08, 2019@01:54:06PM -0800, Bart Van Assche wrote:
> On Fri, 2019-03-08@11:19 -0700, Keith Busch wrote:
> > On Fri, Mar 08, 2019@10:15:27AM -0800, Bart Van Assche wrote:
> > > On Fri, 2019-03-08@10:40 -0700, Keith Busch wrote:
> > > > End the entered requests on a quieced queue directly rather than flush
> > > > them through the low level driver's queue_rq().
> > > > 
> > > > Signed-off-by: Keith Busch <keith.busch at intel.com>
> > > > ---
> > > >  drivers/nvme/host/core.c | 10 ++++++++--
> > > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > > > index cc5d9a83d5af..7095406bb293 100644
> > > > --- a/drivers/nvme/host/core.c
> > > > +++ b/drivers/nvme/host/core.c
> > > > @@ -94,6 +94,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
> > > >  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
> > > >  					   unsigned nsid);
> > > >  
> > > > +static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
> > > > +			      void *data, bool reserved)
> > > > +{
> > > > +	blk_mq_end_request(req, BLK_STS_IOERR);
> > > > +	return true;
> > > > +}
> > > 
> > > Calling blk_mq_end_request() from outside the .queue_rq() or .complete()
> > > callback functions is wrong. Did you perhaps want to call
> > > blk_mq_complete_request()?
> > 
> > This callback can only see requests in MQ_RQ_IDLE state, and
> > bkl_mq_end_request() is the correct way to end those that never entered
> > a driver's queue_rq().
> 
> Hi Keith,
> 
> What guarantees that nvme_fail_request() only sees requests in the idle state?
> From block/blk-mq-tag.c:
> 
> /**
>  * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
>  * [ ... ]
>  */

It's the driver's responsibility to ensure the queue is quiesced before
requesting the iteration. When we call it through nvme_kill_queues(),
the queues were already quiesced before calling that. The only other
place it's called is on a frozen queue, so it's actually a no-op there
since there no requests once frozen.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/5] nvme: Fail dead namespace's entered requests
  2019-03-08 17:40   ` Keith Busch
@ 2019-03-11  3:58     ` jianchao.wang
  -1 siblings, 0 replies; 60+ messages in thread
From: jianchao.wang @ 2019-03-11  3:58 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, linux-block, Jens Axboe
  Cc: Christoph Hellwig, Sagi Grimberg

Hi Keith

How about introducing a per hctx queue_rq callback, then install a
separate .queue_rq callback for the dead hctx. Then we just need to
start and complete the request there.

Thanks
Jianchao


^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
@ 2019-03-11  3:58     ` jianchao.wang
  0 siblings, 0 replies; 60+ messages in thread
From: jianchao.wang @ 2019-03-11  3:58 UTC (permalink / raw)


Hi Keith

How about introducing a per hctx queue_rq callback, then install a
separate .queue_rq callback for the dead hctx. Then we just need to
start and complete the request there.

Thanks
Jianchao

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/5] nvme: Fail dead namespace's entered requests
  2019-03-11  3:58     ` jianchao.wang
@ 2019-03-11 15:42       ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-11 15:42 UTC (permalink / raw)
  To: jianchao.wang
  Cc: Busch, Keith, linux-nvme, linux-block, Jens Axboe,
	Christoph Hellwig, Sagi Grimberg

On Sun, Mar 10, 2019 at 08:58:21PM -0700, jianchao.wang wrote:
> Hi Keith
> 
> How about introducing a per hctx queue_rq callback, then install a
> separate .queue_rq callback for the dead hctx. Then we just need to
> start and complete the request there.

That sounds like it could work, though I think just returning
BLK_STS_IOERR like we currently do is better than starting a completing
it. But adding a new callback that can change at runtime is a more
complicated, and would affect a lot more drivers that I am not able
to test.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 4/5] nvme: Fail dead namespace's entered requests
@ 2019-03-11 15:42       ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-11 15:42 UTC (permalink / raw)


On Sun, Mar 10, 2019@08:58:21PM -0700, jianchao.wang wrote:
> Hi Keith
> 
> How about introducing a per hctx queue_rq callback, then install a
> separate .queue_rq callback for the dead hctx. Then we just need to
> start and complete the request there.

That sounds like it could work, though I think just returning
BLK_STS_IOERR like we currently do is better than starting a completing
it. But adding a new callback that can change at runtime is a more
complicated, and would affect a lot more drivers that I am not able
to test.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-08 17:40   ` Keith Busch
@ 2019-03-11 18:40     ` Christoph Hellwig
  -1 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2019-03-11 18:40 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-nvme, linux-block, Jens Axboe, Christoph Hellwig, Sagi Grimberg

From a quick look the code seems reasonably sensible here,
but any chance we could have this in common code?

> +static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
> +{
> +	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> +	struct nvme_queue *nvmeq = iod->nvmeq;
> +
> +	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
> +		blk_mq_end_request(req, BLK_STS_IOERR);
> +	return true;
> +}

The only thing not purely block layer here is the enabled flag.
So if we had a per-hctx enabled flag we could lift this out of nvme,
and hopefully start reusing it in other drivers.


^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-11 18:40     ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2019-03-11 18:40 UTC (permalink / raw)


>From a quick look the code seems reasonably sensible here,
but any chance we could have this in common code?

> +static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
> +{
> +	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> +	struct nvme_queue *nvmeq = iod->nvmeq;
> +
> +	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
> +		blk_mq_end_request(req, BLK_STS_IOERR);
> +	return true;
> +}

The only thing not purely block layer here is the enabled flag.
So if we had a per-hctx enabled flag we could lift this out of nvme,
and hopefully start reusing it in other drivers.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-11 18:40     ` Christoph Hellwig
@ 2019-03-11 19:37       ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-11 19:37 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, linux-nvme, linux-block, Jens Axboe, Sagi Grimberg

On Mon, Mar 11, 2019 at 07:40:31PM +0100, Christoph Hellwig wrote:
> From a quick look the code seems reasonably sensible here,
> but any chance we could have this in common code?
> 
> > +static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
> > +{
> > +	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> > +	struct nvme_queue *nvmeq = iod->nvmeq;
> > +
> > +	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
> > +		blk_mq_end_request(req, BLK_STS_IOERR);
> > +	return true;
> > +}
> 
> The only thing not purely block layer here is the enabled flag.
> So if we had a per-hctx enabled flag we could lift this out of nvme,
> and hopefully start reusing it in other drivers.

Okay, I may even be able to drop the new block exports if we do request
termination in generic block layer. That's probably the right thing
anyway since that layer is in a better position to check the necessary
conditions that make tag iteration safe. Bart did point out that is
generally not safe for drives to do, so it'd be good to safegaurd against
incorrect usage.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-11 19:37       ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-11 19:37 UTC (permalink / raw)


On Mon, Mar 11, 2019@07:40:31PM +0100, Christoph Hellwig wrote:
> From a quick look the code seems reasonably sensible here,
> but any chance we could have this in common code?
> 
> > +static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
> > +{
> > +	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> > +	struct nvme_queue *nvmeq = iod->nvmeq;
> > +
> > +	if (!test_bit(NVMEQ_ENABLED, &nvmeq->flags))
> > +		blk_mq_end_request(req, BLK_STS_IOERR);
> > +	return true;
> > +}
> 
> The only thing not purely block layer here is the enabled flag.
> So if we had a per-hctx enabled flag we could lift this out of nvme,
> and hopefully start reusing it in other drivers.

Okay, I may even be able to drop the new block exports if we do request
termination in generic block layer. That's probably the right thing
anyway since that layer is in a better position to check the necessary
conditions that make tag iteration safe. Bart did point out that is
generally not safe for drives to do, so it'd be good to safegaurd against
incorrect usage.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-11 19:37       ` Keith Busch
@ 2019-03-27  8:31         ` Christoph Hellwig
  -1 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2019-03-27  8:31 UTC (permalink / raw)
  To: Keith Busch
  Cc: Christoph Hellwig, Keith Busch, linux-nvme, linux-block,
	Jens Axboe, Sagi Grimberg

On Mon, Mar 11, 2019 at 01:37:53PM -0600, Keith Busch wrote:
> > The only thing not purely block layer here is the enabled flag.
> > So if we had a per-hctx enabled flag we could lift this out of nvme,
> > and hopefully start reusing it in other drivers.
> 
> Okay, I may even be able to drop the new block exports if we do request
> termination in generic block layer. That's probably the right thing
> anyway since that layer is in a better position to check the necessary
> conditions that make tag iteration safe. Bart did point out that is
> generally not safe for drives to do, so it'd be good to safegaurd against
> incorrect usage.

Did you get a chance to look into this?

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-27  8:31         ` Christoph Hellwig
  0 siblings, 0 replies; 60+ messages in thread
From: Christoph Hellwig @ 2019-03-27  8:31 UTC (permalink / raw)


On Mon, Mar 11, 2019@01:37:53PM -0600, Keith Busch wrote:
> > The only thing not purely block layer here is the enabled flag.
> > So if we had a per-hctx enabled flag we could lift this out of nvme,
> > and hopefully start reusing it in other drivers.
> 
> Okay, I may even be able to drop the new block exports if we do request
> termination in generic block layer. That's probably the right thing
> anyway since that layer is in a better position to check the necessary
> conditions that make tag iteration safe. Bart did point out that is
> generally not safe for drives to do, so it'd be good to safegaurd against
> incorrect usage.

Did you get a chance to look into this?

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-27  8:31         ` Christoph Hellwig
@ 2019-03-27 13:21           ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-27 13:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, linux-block, Sagi Grimberg, linux-nvme, Busch, Keith

On Wed, Mar 27, 2019 at 01:31:42AM -0700, Christoph Hellwig wrote:
> > Okay, I may even be able to drop the new block exports if we do request
> > termination in generic block layer. That's probably the right thing
> > anyway since that layer is in a better position to check the necessary
> > conditions that make tag iteration safe. Bart did point out that is
> > generally not safe for drives to do, so it'd be good to safegaurd against
> > incorrect usage.
> 
> Did you get a chance to look into this?

I haven't had a chance to put it through the proper tests (I no longer
have a hotplug machine), but this is what I'd written if you can give it
a quick look:

From 5afd8e3765eabf859100fda84e646a96683d7751 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 12 Mar 2019 13:58:12 -0600
Subject: [PATCH] blk-mq: Provide request termination API

A driver that determined hardware contexts backing its request queue
is unable to service new commands, it would have to end those commands
in its IO path. This requires unlikely checks per-IO.

Create a new API that terminates all requests directly rather than
requiring those requests get flushed through the low level driver. This
is safe only if the current request allocation state is unchanging, so
driver must have quiesced and initiated a freeze on its queue, and after
it has reclaimed any in flight requests so that the tag iterator is not
racing with in flux requests. The new API enforces these conditions in
order to successfully terminate requests.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 block/blk-mq.c         | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 37 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9c181603cbd..ad98c27e2b34 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -952,6 +952,42 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	blk_queue_exit(q);
 }
 
+static bool blk_mq_terminate_request(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, void *priv, bool reserved)
+{
+	int *hctx_idx = priv;
+
+	if (WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE))
+		return false;
+	if (hctx->queue_num >= *hctx_idx)
+		blk_mq_end_request(rq, BLK_STS_IOERR);
+	return true;
+}
+
+/**
+ * blk_mq_terminate_queued_requests() - end requests with an error queued on
+ *					hardware contexts at and above the
+ *					provided index.
+ * @q: request queue
+ * @hctx_idx: starting hardware context, 0 for all hctx's
+ *
+ * A low level driver should invoke this routine when its hardware contexts are
+ * not capable of handling future requests. The caller must ensure their
+ * request_queue is quiesced, freeze initiated, and all dispatched requests
+ * have been reclaimed so the tag iteration has a static request allocation to
+ * consider.
+ */
+void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx)
+{
+	if (WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)))
+		return;
+	if (WARN_ON_ONCE(!blk_queue_quiesced(q)))
+		return;
+	blk_sync_queue(q);
+	blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_request, &hctx_idx);
+}
+EXPORT_SYMBOL(blk_mq_terminate_queued_requests);
+
 struct flush_busy_ctx_data {
 	struct blk_mq_hw_ctx *hctx;
 	struct list_head *list;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a64b3fdce0b0..d47cd4575abb 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -334,6 +334,7 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx);
 
 unsigned int blk_mq_rq_cpu(struct request *rq);
 
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-27 13:21           ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-27 13:21 UTC (permalink / raw)


On Wed, Mar 27, 2019@01:31:42AM -0700, Christoph Hellwig wrote:
> > Okay, I may even be able to drop the new block exports if we do request
> > termination in generic block layer. That's probably the right thing
> > anyway since that layer is in a better position to check the necessary
> > conditions that make tag iteration safe. Bart did point out that is
> > generally not safe for drives to do, so it'd be good to safegaurd against
> > incorrect usage.
> 
> Did you get a chance to look into this?

I haven't had a chance to put it through the proper tests (I no longer
have a hotplug machine), but this is what I'd written if you can give it
a quick look:

>From 5afd8e3765eabf859100fda84e646a96683d7751 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 12 Mar 2019 13:58:12 -0600
Subject: [PATCH] blk-mq: Provide request termination API

A driver that determined hardware contexts backing its request queue
is unable to service new commands, it would have to end those commands
in its IO path. This requires unlikely checks per-IO.

Create a new API that terminates all requests directly rather than
requiring those requests get flushed through the low level driver. This
is safe only if the current request allocation state is unchanging, so
driver must have quiesced and initiated a freeze on its queue, and after
it has reclaimed any in flight requests so that the tag iterator is not
racing with in flux requests. The new API enforces these conditions in
order to successfully terminate requests.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 block/blk-mq.c         | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 37 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9c181603cbd..ad98c27e2b34 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -952,6 +952,42 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	blk_queue_exit(q);
 }
 
+static bool blk_mq_terminate_request(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, void *priv, bool reserved)
+{
+	int *hctx_idx = priv;
+
+	if (WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE))
+		return false;
+	if (hctx->queue_num >= *hctx_idx)
+		blk_mq_end_request(rq, BLK_STS_IOERR);
+	return true;
+}
+
+/**
+ * blk_mq_terminate_queued_requests() - end requests with an error queued on
+ *					hardware contexts at and above the
+ *					provided index.
+ * @q: request queue
+ * @hctx_idx: starting hardware context, 0 for all hctx's
+ *
+ * A low level driver should invoke this routine when its hardware contexts are
+ * not capable of handling future requests. The caller must ensure their
+ * request_queue is quiesced, freeze initiated, and all dispatched requests
+ * have been reclaimed so the tag iteration has a static request allocation to
+ * consider.
+ */
+void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx)
+{
+	if (WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)))
+		return;
+	if (WARN_ON_ONCE(!blk_queue_quiesced(q)))
+		return;
+	blk_sync_queue(q);
+	blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_request, &hctx_idx);
+}
+EXPORT_SYMBOL(blk_mq_terminate_queued_requests);
+
 struct flush_busy_ctx_data {
 	struct blk_mq_hw_ctx *hctx;
 	struct list_head *list;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a64b3fdce0b0..d47cd4575abb 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -334,6 +334,7 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx);
 
 unsigned int blk_mq_rq_cpu(struct request *rq);
 
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-27 13:21           ` Keith Busch
@ 2019-03-28  1:42             ` jianchao.wang
  -1 siblings, 0 replies; 60+ messages in thread
From: jianchao.wang @ 2019-03-28  1:42 UTC (permalink / raw)
  To: Keith Busch, Christoph Hellwig
  Cc: Jens Axboe, linux-block, Busch, Keith, Sagi Grimberg, linux-nvme



On 3/27/19 9:21 PM, Keith Busch wrote:
> On Wed, Mar 27, 2019 at 01:31:42AM -0700, Christoph Hellwig wrote:
>>> Okay, I may even be able to drop the new block exports if we do request
>>> termination in generic block layer. That's probably the right thing
>>> anyway since that layer is in a better position to check the necessary
>>> conditions that make tag iteration safe. Bart did point out that is
>>> generally not safe for drives to do, so it'd be good to safegaurd against
>>> incorrect usage.
>>
>> Did you get a chance to look into this?
> 
> I haven't had a chance to put it through the proper tests (I no longer
> have a hotplug machine), but this is what I'd written if you can give it
> a quick look:
> 
> From 5afd8e3765eabf859100fda84e646a96683d7751 Mon Sep 17 00:00:00 2001
> From: Keith Busch <keith.busch@intel.com>
> Date: Tue, 12 Mar 2019 13:58:12 -0600
> Subject: [PATCH] blk-mq: Provide request termination API
> 
> A driver that determined hardware contexts backing its request queue
> is unable to service new commands, it would have to end those commands
> in its IO path. This requires unlikely checks per-IO.
> 
> Create a new API that terminates all requests directly rather than
> requiring those requests get flushed through the low level driver. This
> is safe only if the current request allocation state is unchanging, so
> driver must have quiesced and initiated a freeze on its queue, and after
> it has reclaimed any in flight requests so that the tag iterator is not
> racing with in flux requests. The new API enforces these conditions in
> order to successfully terminate requests.
> 
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
>  block/blk-mq.c         | 36 ++++++++++++++++++++++++++++++++++++
>  include/linux/blk-mq.h |  1 +
>  2 files changed, 37 insertions(+)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index a9c181603cbd..ad98c27e2b34 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -952,6 +952,42 @@ static void blk_mq_timeout_work(struct work_struct *work)
>  	blk_queue_exit(q);
>  }
>  
> +static bool blk_mq_terminate_request(struct blk_mq_hw_ctx *hctx,
> +		struct request *rq, void *priv, bool reserved)
> +{
> +	int *hctx_idx = priv;
> +
> +	if (WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE))
> +		return false;
> +	if (hctx->queue_num >= *hctx_idx)
> +		blk_mq_end_request(rq, BLK_STS_IOERR);
> +	return true;
> +}
> +
> +/**
> + * blk_mq_terminate_queued_requests() - end requests with an error queued on
> + *					hardware contexts at and above the
> + *					provided index.
> + * @q: request queue
> + * @hctx_idx: starting hardware context, 0 for all hctx's
> + *
> + * A low level driver should invoke this routine when its hardware contexts are
> + * not capable of handling future requests. The caller must ensure their
> + * request_queue is quiesced, freeze initiated, and all dispatched requests
> + * have been reclaimed so the tag iteration has a static request allocation to
> + * consider.
> + */
> +void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx)
> +{
> +	if (WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)))
> +		return;
> +	if (WARN_ON_ONCE(!blk_queue_quiesced(q)))
> +		return;
> +	blk_sync_queue(q);
> +	blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_request, &hctx_idx);
> +}


Is it really OK to end these requests directly w/o dequeue ?
All of them are on ctx->rq_list, or hctx->dispatch list or internal queue of io scheduler.

Terrible things may happen after we unquiesce the queue.

Thanks
Jinchao

> +EXPORT_SYMBOL(blk_mq_terminate_queued_requests);
> +
>  struct flush_busy_ctx_data {
>  	struct blk_mq_hw_ctx *hctx;
>  	struct list_head *list;
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index a64b3fdce0b0..d47cd4575abb 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -334,6 +334,7 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
>  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
>  
>  void blk_mq_quiesce_queue_nowait(struct request_queue *q);
> +void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx);
>  
>  unsigned int blk_mq_rq_cpu(struct request *rq);
>  
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-28  1:42             ` jianchao.wang
  0 siblings, 0 replies; 60+ messages in thread
From: jianchao.wang @ 2019-03-28  1:42 UTC (permalink / raw)




On 3/27/19 9:21 PM, Keith Busch wrote:
> On Wed, Mar 27, 2019@01:31:42AM -0700, Christoph Hellwig wrote:
>>> Okay, I may even be able to drop the new block exports if we do request
>>> termination in generic block layer. That's probably the right thing
>>> anyway since that layer is in a better position to check the necessary
>>> conditions that make tag iteration safe. Bart did point out that is
>>> generally not safe for drives to do, so it'd be good to safegaurd against
>>> incorrect usage.
>>
>> Did you get a chance to look into this?
> 
> I haven't had a chance to put it through the proper tests (I no longer
> have a hotplug machine), but this is what I'd written if you can give it
> a quick look:
> 
> From 5afd8e3765eabf859100fda84e646a96683d7751 Mon Sep 17 00:00:00 2001
> From: Keith Busch <keith.busch at intel.com>
> Date: Tue, 12 Mar 2019 13:58:12 -0600
> Subject: [PATCH] blk-mq: Provide request termination API
> 
> A driver that determined hardware contexts backing its request queue
> is unable to service new commands, it would have to end those commands
> in its IO path. This requires unlikely checks per-IO.
> 
> Create a new API that terminates all requests directly rather than
> requiring those requests get flushed through the low level driver. This
> is safe only if the current request allocation state is unchanging, so
> driver must have quiesced and initiated a freeze on its queue, and after
> it has reclaimed any in flight requests so that the tag iterator is not
> racing with in flux requests. The new API enforces these conditions in
> order to successfully terminate requests.
> 
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> ---
>  block/blk-mq.c         | 36 ++++++++++++++++++++++++++++++++++++
>  include/linux/blk-mq.h |  1 +
>  2 files changed, 37 insertions(+)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index a9c181603cbd..ad98c27e2b34 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -952,6 +952,42 @@ static void blk_mq_timeout_work(struct work_struct *work)
>  	blk_queue_exit(q);
>  }
>  
> +static bool blk_mq_terminate_request(struct blk_mq_hw_ctx *hctx,
> +		struct request *rq, void *priv, bool reserved)
> +{
> +	int *hctx_idx = priv;
> +
> +	if (WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE))
> +		return false;
> +	if (hctx->queue_num >= *hctx_idx)
> +		blk_mq_end_request(rq, BLK_STS_IOERR);
> +	return true;
> +}
> +
> +/**
> + * blk_mq_terminate_queued_requests() - end requests with an error queued on
> + *					hardware contexts at and above the
> + *					provided index.
> + * @q: request queue
> + * @hctx_idx: starting hardware context, 0 for all hctx's
> + *
> + * A low level driver should invoke this routine when its hardware contexts are
> + * not capable of handling future requests. The caller must ensure their
> + * request_queue is quiesced, freeze initiated, and all dispatched requests
> + * have been reclaimed so the tag iteration has a static request allocation to
> + * consider.
> + */
> +void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx)
> +{
> +	if (WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)))
> +		return;
> +	if (WARN_ON_ONCE(!blk_queue_quiesced(q)))
> +		return;
> +	blk_sync_queue(q);
> +	blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_request, &hctx_idx);
> +}


Is it really OK to end these requests directly w/o dequeue ?
All of them are on ctx->rq_list, or hctx->dispatch list or internal queue of io scheduler.

Terrible things may happen after we unquiesce the queue.

Thanks
Jinchao

> +EXPORT_SYMBOL(blk_mq_terminate_queued_requests);
> +
>  struct flush_busy_ctx_data {
>  	struct blk_mq_hw_ctx *hctx;
>  	struct list_head *list;
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index a64b3fdce0b0..d47cd4575abb 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -334,6 +334,7 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
>  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
>  
>  void blk_mq_quiesce_queue_nowait(struct request_queue *q);
> +void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx);
>  
>  unsigned int blk_mq_rq_cpu(struct request *rq);
>  
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
  2019-03-28  1:42             ` jianchao.wang
@ 2019-03-28  3:33               ` Keith Busch
  -1 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-28  3:33 UTC (permalink / raw)
  To: jianchao.wang
  Cc: Christoph Hellwig, Jens Axboe, linux-block, Busch, Keith,
	Sagi Grimberg, linux-nvme

On Thu, Mar 28, 2019 at 09:42:51AM +0800, jianchao.wang wrote:
> On 3/27/19 9:21 PM, Keith Busch wrote:
> > +void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx)
> > +{
> > +	if (WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)))
> > +		return;
> > +	if (WARN_ON_ONCE(!blk_queue_quiesced(q)))
> > +		return;
> > +	blk_sync_queue(q);
> > +	blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_request, &hctx_idx);
> > +}
> 
> 
> Is it really OK to end these requests directly w/o dequeue ?
> All of them are on ctx->rq_list, or hctx->dispatch list or internal queue of io scheduler.
> 
> Terrible things may happen after we unquiesce the queue.

Good point. This was intended as a last action before killing an hctx or
the entire request queue, so I didn't expect they'd be turned back on, but
it's easy enough to splice lists and handle those requests directly. We
wouldn'even need to iterate tags that way, and may be better way to
handle this.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 5/5] nvme/pci: Remove queue IO flushing hack
@ 2019-03-28  3:33               ` Keith Busch
  0 siblings, 0 replies; 60+ messages in thread
From: Keith Busch @ 2019-03-28  3:33 UTC (permalink / raw)


On Thu, Mar 28, 2019@09:42:51AM +0800, jianchao.wang wrote:
> On 3/27/19 9:21 PM, Keith Busch wrote:
> > +void blk_mq_terminate_queued_requests(struct request_queue *q, int hctx_idx)
> > +{
> > +	if (WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)))
> > +		return;
> > +	if (WARN_ON_ONCE(!blk_queue_quiesced(q)))
> > +		return;
> > +	blk_sync_queue(q);
> > +	blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_request, &hctx_idx);
> > +}
> 
> 
> Is it really OK to end these requests directly w/o dequeue ?
> All of them are on ctx->rq_list, or hctx->dispatch list or internal queue of io scheduler.
> 
> Terrible things may happen after we unquiesce the queue.

Good point. This was intended as a last action before killing an hctx or
the entire request queue, so I didn't expect they'd be turned back on, but
it's easy enough to splice lists and handle those requests directly. We
wouldn'even need to iterate tags that way, and may be better way to
handle this.

^ permalink raw reply	[flat|nested] 60+ messages in thread

end of thread, other threads:[~2019-03-28  3:33 UTC | newest]

Thread overview: 60+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-08 17:40 [PATCH 1/5] blk-mq: Export reading mq request state Keith Busch
2019-03-08 17:40 ` Keith Busch
2019-03-08 17:40 ` [PATCH 2/5] blk-mq: Export iterating queue requests Keith Busch
2019-03-08 17:40   ` Keith Busch
2019-03-08 18:08   ` Bart Van Assche
2019-03-08 18:08     ` Bart Van Assche
2019-03-08 18:13     ` Keith Busch
2019-03-08 18:13       ` Keith Busch
2019-03-08 17:40 ` [PATCH 3/5] blk-mq: Iterate tagset over all requests Keith Busch
2019-03-08 17:40   ` Keith Busch
2019-03-08 17:40 ` [PATCH 4/5] nvme: Fail dead namespace's entered requests Keith Busch
2019-03-08 17:40   ` Keith Busch
2019-03-08 18:15   ` Bart Van Assche
2019-03-08 18:15     ` Bart Van Assche
2019-03-08 18:19     ` Keith Busch
2019-03-08 18:19       ` Keith Busch
2019-03-08 21:54       ` Bart Van Assche
2019-03-08 21:54         ` Bart Van Assche
2019-03-08 22:06         ` Keith Busch
2019-03-08 22:06           ` Keith Busch
2019-03-11  3:58   ` jianchao.wang
2019-03-11  3:58     ` jianchao.wang
2019-03-11 15:42     ` Keith Busch
2019-03-11 15:42       ` Keith Busch
2019-03-08 17:40 ` [PATCH 5/5] nvme/pci: Remove queue IO flushing hack Keith Busch
2019-03-08 17:40   ` Keith Busch
2019-03-08 18:19   ` Bart Van Assche
2019-03-08 18:19     ` Bart Van Assche
2019-03-11 18:40   ` Christoph Hellwig
2019-03-11 18:40     ` Christoph Hellwig
2019-03-11 19:37     ` Keith Busch
2019-03-11 19:37       ` Keith Busch
2019-03-27  8:31       ` Christoph Hellwig
2019-03-27  8:31         ` Christoph Hellwig
2019-03-27 13:21         ` Keith Busch
2019-03-27 13:21           ` Keith Busch
2019-03-28  1:42           ` jianchao.wang
2019-03-28  1:42             ` jianchao.wang
2019-03-28  3:33             ` Keith Busch
2019-03-28  3:33               ` Keith Busch
2019-03-08 18:07 ` [PATCH 1/5] blk-mq: Export reading mq request state Bart Van Assche
2019-03-08 18:07   ` Bart Van Assche
2019-03-08 18:15   ` Keith Busch
2019-03-08 18:15     ` Keith Busch
2019-03-08 18:42     ` Bart Van Assche
2019-03-08 18:42       ` Bart Van Assche
2019-03-08 19:19       ` Keith Busch
2019-03-08 19:19         ` Keith Busch
2019-03-08 20:47         ` Bart Van Assche
2019-03-08 20:47           ` Bart Van Assche
2019-03-08 21:14           ` Keith Busch
2019-03-08 21:14             ` Keith Busch
2019-03-08 21:25             ` Bart Van Assche
2019-03-08 21:25               ` Bart Van Assche
2019-03-08 21:31               ` Keith Busch
2019-03-08 21:31                 ` Keith Busch
2019-03-08 20:21 ` Sagi Grimberg
2019-03-08 20:21   ` Sagi Grimberg
2019-03-08 20:29   ` Keith Busch
2019-03-08 20:29     ` Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.