All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHSET v4 0/6] Various block optimizations
@ 2018-11-23 18:34 Jens Axboe
  2018-11-23 18:34 ` [PATCH 1/6] blk-mq: when polling for IO, look for any completion Jens Axboe
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Jens Axboe @ 2018-11-23 18:34 UTC (permalink / raw)
  To: linux-block

Some of these are optimizations, the latter part is prep work
for supporting polling with aio.

Patches against my for-4.21/block branch. These patches can also
be found in my mq-perf branch, though there are other patches
sitting on top of this series (notably aio polling, as mentioned).

Changes since v3:

- Remove ->poll_queue() from nvme-fc
- Fold __nvme_rdma_recv_done() and nvme_rdma_recv_done()
- Drop merged patches
- Merge __blk_mq_poll() and blk_mq_poll()
- Add patch to never redirect polled completions

Changes since v2:

- Include polled swap IO in the poll optimizations
- Get rid of unnecessary write barrier for DIO wakeup
- Fix a potential stall if need_resched() was set and preempt
  wasn't enabled
- Provide separate mq_ops for NVMe with poll queues
- Drop q->mq_ops patch
- Rebase on top of for-4.21/block

Changes since v1:

- Improve nvme irq disabling for polled IO
- Fix barriers in the ordered wakeup for polled O_DIRECT
- Add patch to allow polling to find any command that is done
- Add patch to control whether polling spins or not
- Have async O_DIRECT mark a bio as pollable
- Don't plug for polling

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/6] blk-mq: when polling for IO, look for any completion
  2018-11-23 18:34 [PATCHSET v4 0/6] Various block optimizations Jens Axboe
@ 2018-11-23 18:34 ` Jens Axboe
  2018-11-26  8:16   ` Christoph Hellwig
  2018-11-23 18:34 ` [PATCH 2/6] blk-mq: remove 'tag' parameter from mq_ops->poll() Jens Axboe
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2018-11-23 18:34 UTC (permalink / raw)
  To: linux-block; +Cc: Jens Axboe

If we want to support async IO polling, then we have to allow finding
completions that aren't just for the one we are looking for. Always pass
in -1 to the mq_ops->poll() helper, and have that return how many events
were found in this poll loop.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c         |  2 +-
 block/blk-mq.c           | 71 ++++++++++++++++++++--------------------
 drivers/nvme/host/pci.c  | 14 ++++----
 drivers/nvme/host/rdma.c | 39 +++++++++-------------
 include/linux/blkdev.h   |  2 +-
 5 files changed, 60 insertions(+), 68 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 04f5be473638..9e99aa852d6e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1273,7 +1273,7 @@ blk_qc_t submit_bio(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+int blk_poll(struct request_queue *q, blk_qc_t cookie)
 {
 	if (!q->poll_fn || !blk_qc_t_valid(cookie))
 		return false;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b16204df65d1..ec6c79578332 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3285,15 +3285,12 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 		return false;
 
 	/*
-	 * poll_nsec can be:
+	 * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
 	 *
-	 * -1:	don't ever hybrid sleep
 	 *  0:	use half of prev avg
 	 * >0:	use this specific value
 	 */
-	if (q->poll_nsec == -1)
-		return false;
-	else if (q->poll_nsec > 0)
+	if (q->poll_nsec > 0)
 		nsecs = q->poll_nsec;
 	else
 		nsecs = blk_mq_poll_nsecs(q, hctx, rq);
@@ -3330,11 +3327,41 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	return true;
 }
 
-static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static bool blk_mq_poll_hybrid(struct request_queue *q,
+			       struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
 {
-	struct request_queue *q = hctx->queue;
+	struct request *rq;
+
+	if (q->poll_nsec == -1)
+		return false;
+
+	if (!blk_qc_t_is_internal(cookie))
+		rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
+	else {
+		rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
+		/*
+		 * With scheduling, if the request has completed, we'll
+		 * get a NULL return here, as we clear the sched tag when
+		 * that happens. The request still remains valid, like always,
+		 * so we should be safe with just the NULL check.
+		 */
+		if (!rq)
+			return false;
+	}
+
+	return blk_mq_poll_hybrid_sleep(q, hctx, rq);
+}
+
+static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
+{
+	struct blk_mq_hw_ctx *hctx;
 	long state;
 
+	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+		return 0;
+
+	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
+
 	/*
 	 * If we sleep, have the caller restart the poll loop to reset
 	 * the state. Like for the other success return cases, the
@@ -3342,7 +3369,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	 * the IO isn't complete, we'll get called again and will go
 	 * straight to the busy poll loop.
 	 */
-	if (blk_mq_poll_hybrid_sleep(q, hctx, rq))
+	if (blk_mq_poll_hybrid(q, hctx, cookie))
 		return 1;
 
 	hctx->poll_considered++;
@@ -3353,7 +3380,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 
 		hctx->poll_invoked++;
 
-		ret = q->mq_ops->poll(hctx, rq->tag);
+		ret = q->mq_ops->poll(hctx, -1U);
 		if (ret > 0) {
 			hctx->poll_success++;
 			__set_current_state(TASK_RUNNING);
@@ -3374,32 +3401,6 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return 0;
 }
 
-static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
-{
-	struct blk_mq_hw_ctx *hctx;
-	struct request *rq;
-
-	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
-		return 0;
-
-	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
-	if (!blk_qc_t_is_internal(cookie))
-		rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
-	else {
-		rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
-		/*
-		 * With scheduling, if the request has completed, we'll
-		 * get a NULL return here, as we clear the sched tag when
-		 * that happens. The request still remains valid, like always,
-		 * so we should be safe with just the NULL check.
-		 */
-		if (!rq)
-			return 0;
-	}
-
-	return __blk_mq_poll(hctx, rq);
-}
-
 unsigned int blk_mq_rq_cpu(struct request *rq)
 {
 	return rq->mq_ctx->cpu;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 57e790391b82..de50d80ecc84 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1012,15 +1012,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
 	}
 }
 
-static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
-		u16 *end, int tag)
+static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+				  u16 *end, unsigned int tag)
 {
-	bool found = false;
+	int found = 0;
 
 	*start = nvmeq->cq_head;
-	while (!found && nvme_cqe_pending(nvmeq)) {
-		if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
-			found = true;
+	while (nvme_cqe_pending(nvmeq)) {
+		if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+			found++;
 		nvme_update_cq_head(nvmeq);
 	}
 	*end = nvmeq->cq_head;
@@ -1062,7 +1062,7 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
 	u16 start, end;
-	bool found;
+	int found;
 
 	if (!nvme_cqe_pending(nvmeq))
 		return 0;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d181cafedc58..c2c3e1a5b7af 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1409,12 +1409,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
 	WARN_ON_ONCE(ret);
 }
 
-static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
-		struct nvme_completion *cqe, struct ib_wc *wc, int tag)
+static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
+		struct nvme_completion *cqe, struct ib_wc *wc)
 {
 	struct request *rq;
 	struct nvme_rdma_request *req;
-	int ret = 0;
 
 	rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
 	if (!rq) {
@@ -1422,7 +1421,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 			"tag 0x%x on QP %#x not found\n",
 			cqe->command_id, queue->qp->qp_num);
 		nvme_rdma_error_recovery(queue->ctrl);
-		return ret;
+		return;
 	}
 	req = blk_mq_rq_to_pdu(rq);
 
@@ -1437,6 +1436,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 			nvme_rdma_error_recovery(queue->ctrl);
 		}
 	} else if (req->mr) {
+		int ret;
+
 		ret = nvme_rdma_inv_rkey(queue, req);
 		if (unlikely(ret < 0)) {
 			dev_err(queue->ctrl->ctrl.device,
@@ -1445,19 +1446,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 			nvme_rdma_error_recovery(queue->ctrl);
 		}
 		/* the local invalidation completion will end the request */
-		return 0;
+		return;
 	}
 
-	if (refcount_dec_and_test(&req->ref)) {
-		if (rq->tag == tag)
-			ret = 1;
+	if (refcount_dec_and_test(&req->ref))
 		nvme_end_request(rq, req->status, req->result);
-	}
-
-	return ret;
 }
 
-static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
+static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvme_rdma_qe *qe =
 		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
@@ -1465,11 +1461,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
 	struct ib_device *ibdev = queue->device->dev;
 	struct nvme_completion *cqe = qe->data;
 	const size_t len = sizeof(struct nvme_completion);
-	int ret = 0;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		nvme_rdma_wr_error(cq, wc, "RECV");
-		return 0;
+		return;
 	}
 
 	ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
@@ -1484,16 +1479,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
 		nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
 				&cqe->result);
 	else
-		ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag);
+		nvme_rdma_process_nvme_rsp(queue, cqe, wc);
 	ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
 
 	nvme_rdma_post_recv(queue, qe);
-	return ret;
-}
-
-static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
-{
-	__nvme_rdma_recv_done(cq, wc, -1);
 }
 
 static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
@@ -1758,10 +1747,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 		struct ib_cqe *cqe = wc.wr_cqe;
 
 		if (cqe) {
-			if (cqe->done == nvme_rdma_recv_done)
-				found |= __nvme_rdma_recv_done(cq, &wc, tag);
-			else
+			if (cqe->done == nvme_rdma_recv_done) {
+				nvme_rdma_recv_done(cq, &wc);
+				found++;
+			} else {
 				cqe->done(cq, &wc);
+			}
 		}
 	}
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9b53db06ad08..f3015e9b5ae3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 int blk_status_to_errno(blk_status_t status);
 blk_status_t errno_to_blk_status(int errno);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+int blk_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/6] blk-mq: remove 'tag' parameter from mq_ops->poll()
  2018-11-23 18:34 [PATCHSET v4 0/6] Various block optimizations Jens Axboe
  2018-11-23 18:34 ` [PATCH 1/6] blk-mq: when polling for IO, look for any completion Jens Axboe
@ 2018-11-23 18:34 ` Jens Axboe
  2018-11-23 18:34 ` [PATCH 3/6] nvme: remove opportunistic polling from bdev target Jens Axboe
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2018-11-23 18:34 UTC (permalink / raw)
  To: linux-block; +Cc: Jens Axboe

We always pass in -1 now and none of the callers use the tag value,
remove the parameter.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c           | 2 +-
 drivers/nvme/host/pci.c  | 8 ++++----
 drivers/nvme/host/rdma.c | 2 +-
 include/linux/blk-mq.h   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ec6c79578332..b66cca3ce1e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3380,7 +3380,7 @@ static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
 
 		hctx->poll_invoked++;
 
-		ret = q->mq_ops->poll(hctx, -1U);
+		ret = q->mq_ops->poll(hctx);
 		if (ret > 0) {
 			hctx->poll_success++;
 			__set_current_state(TASK_RUNNING);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index de50d80ecc84..73effe586e5f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1075,14 +1075,14 @@ static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 	return found;
 }
 
-static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int nvme_poll(struct blk_mq_hw_ctx *hctx)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
 
-	return __nvme_poll(nvmeq, tag);
+	return __nvme_poll(nvmeq, -1);
 }
 
-static int nvme_poll_noirq(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int nvme_poll_noirq(struct blk_mq_hw_ctx *hctx)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
 	u16 start, end;
@@ -1092,7 +1092,7 @@ static int nvme_poll_noirq(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 		return 0;
 
 	spin_lock(&nvmeq->cq_lock);
-	found = nvme_process_cq(nvmeq, &start, &end, tag);
+	found = nvme_process_cq(nvmeq, &start, &end, -1);
 	spin_unlock(&nvmeq->cq_lock);
 
 	nvme_complete_cqes(nvmeq, start, end);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c2c3e1a5b7af..ccfde6c7c0a5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1736,7 +1736,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_IOERR;
 }
 
-static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
 {
 	struct nvme_rdma_queue *queue = hctx->driver_data;
 	struct ib_cq *cq = queue->ib_cq;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 929e8abc5535..ca0520ca6437 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -132,7 +132,7 @@ typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *,
 typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
 		bool);
 typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
-typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
+typedef int (poll_fn)(struct blk_mq_hw_ctx *);
 typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
 typedef bool (busy_fn)(struct request_queue *);
 typedef void (complete_fn)(struct request *);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/6] nvme: remove opportunistic polling from bdev target
  2018-11-23 18:34 [PATCHSET v4 0/6] Various block optimizations Jens Axboe
  2018-11-23 18:34 ` [PATCH 1/6] blk-mq: when polling for IO, look for any completion Jens Axboe
  2018-11-23 18:34 ` [PATCH 2/6] blk-mq: remove 'tag' parameter from mq_ops->poll() Jens Axboe
@ 2018-11-23 18:34 ` Jens Axboe
  2018-11-26  8:17   ` Christoph Hellwig
  2018-11-23 18:34 ` [PATCH 4/6] block: make blk_poll() take a parameter on whether to spin or not Jens Axboe
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2018-11-23 18:34 UTC (permalink / raw)
  To: linux-block; +Cc: Jens Axboe

It doesn't set HIPRI on the bio, so polling for it is pretty silly.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/io-cmd-bdev.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index c1ec3475a140..c1cb2ed5531c 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -115,8 +115,6 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 	}
 
 	cookie = submit_bio(bio);
-
-	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
 }
 
 static void nvmet_bdev_execute_flush(struct nvmet_req *req)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/6] block: make blk_poll() take a parameter on whether to spin or not
  2018-11-23 18:34 [PATCHSET v4 0/6] Various block optimizations Jens Axboe
                   ` (2 preceding siblings ...)
  2018-11-23 18:34 ` [PATCH 3/6] nvme: remove opportunistic polling from bdev target Jens Axboe
@ 2018-11-23 18:34 ` Jens Axboe
  2018-11-26  8:18   ` Christoph Hellwig
  2018-11-23 18:34 ` [PATCH 5/6] blk-mq: ensure mq_ops ->poll() is entered at least once Jens Axboe
  2018-11-23 18:34 ` [PATCH 6/6] blk-mq: never redirect polled IO completions Jens Axboe
  5 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2018-11-23 18:34 UTC (permalink / raw)
  To: linux-block; +Cc: Jens Axboe

blk_poll() has always kept spinning until it found an IO. This is
fine for SYNC polling, since we need to find one request we have
pending, but in preparation for ASYNC polling it can be beneficial
to just check if we have any entries available or not.

Existing callers are converted to pass in 'spin == true', to retain
the old behavior.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c              | 4 ++--
 block/blk-mq.c                | 6 +++---
 drivers/nvme/host/multipath.c | 4 ++--
 fs/block_dev.c                | 4 ++--
 fs/direct-io.c                | 2 +-
 fs/iomap.c                    | 2 +-
 include/linux/blkdev.h        | 4 ++--
 mm/page_io.c                  | 2 +-
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 9e99aa852d6e..f7ffc43ada14 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1273,14 +1273,14 @@ blk_qc_t submit_bio(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio);
 
-int blk_poll(struct request_queue *q, blk_qc_t cookie)
+int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 {
 	if (!q->poll_fn || !blk_qc_t_valid(cookie))
 		return false;
 
 	if (current->plug)
 		blk_flush_plug_list(current->plug, false);
-	return q->poll_fn(q, cookie);
+	return q->poll_fn(q, cookie, spin);
 }
 EXPORT_SYMBOL_GPL(blk_poll);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b66cca3ce1e5..c2751f0a3ccc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -38,7 +38,7 @@
 #include "blk-mq-sched.h"
 #include "blk-rq-qos.h"
 
-static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
+static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
@@ -3352,7 +3352,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
 	return blk_mq_poll_hybrid_sleep(q, hctx, rq);
 }
 
-static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
+static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 {
 	struct blk_mq_hw_ctx *hctx;
 	long state;
@@ -3392,7 +3392,7 @@ static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
 
 		if (current->state == TASK_RUNNING)
 			return 1;
-		if (ret < 0)
+		if (ret < 0 || !spin)
 			break;
 		cpu_relax();
 	}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f9eeb3b58632..ffebdd0ae34b 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -220,7 +220,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 	return ret;
 }
 
-static int nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
+static int nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc, bool spin)
 {
 	struct nvme_ns_head *head = q->queuedata;
 	struct nvme_ns *ns;
@@ -230,7 +230,7 @@ static int nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
 	srcu_idx = srcu_read_lock(&head->srcu);
 	ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu);
 	if (likely(ns && nvme_path_is_optimized(ns)))
-		found = ns->queue->poll_fn(q, qc);
+		found = ns->queue->poll_fn(q, qc, spin);
 	srcu_read_unlock(&head->srcu, srcu_idx);
 	return found;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 64ba27b8b754..d233a59ea364 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -243,7 +243,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 			break;
 
 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(bdev_get_queue(bdev), qc))
+		    !blk_poll(bdev_get_queue(bdev), qc, true))
 			io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -423,7 +423,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			break;
 
 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(bdev_get_queue(bdev), qc))
+		    !blk_poll(bdev_get_queue(bdev), qc, true))
 			io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ea07d5a34317..a5a4e5a1423e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -518,7 +518,7 @@ static struct bio *dio_await_one(struct dio *dio)
 		dio->waiter = current;
 		spin_unlock_irqrestore(&dio->bio_lock, flags);
 		if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(dio->bio_disk->queue, dio->bio_cookie))
+		    !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true))
 			io_schedule();
 		/* wake up sets us TASK_RUNNING */
 		spin_lock_irqsave(&dio->bio_lock, flags);
diff --git a/fs/iomap.c b/fs/iomap.c
index c5df035ace6f..74c1f37f0fd6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1896,7 +1896,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			if (!(iocb->ki_flags & IOCB_HIPRI) ||
 			    !dio->submit.last_queue ||
 			    !blk_poll(dio->submit.last_queue,
-					 dio->submit.cookie))
+					 dio->submit.cookie, true))
 				io_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3015e9b5ae3..e3c0a8ec16a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -283,7 +283,7 @@ static inline unsigned short req_get_ioprio(struct request *req)
 struct blk_queue_ctx;
 
 typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
-typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t);
+typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t, bool spin);
 
 struct bio_vec;
 typedef int (dma_drain_needed_fn)(struct request *);
@@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 int blk_status_to_errno(blk_status_t status);
 blk_status_t errno_to_blk_status(int errno);
 
-int blk_poll(struct request_queue *q, blk_qc_t cookie);
+int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
diff --git a/mm/page_io.c b/mm/page_io.c
index a7271fa481f6..5bdfd21c1bd9 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -410,7 +410,7 @@ int swap_readpage(struct page *page, bool synchronous)
 		if (!READ_ONCE(bio->bi_private))
 			break;
 
-		if (!blk_poll(disk->queue, qc))
+		if (!blk_poll(disk->queue, qc, true))
 			break;
 	}
 	__set_current_state(TASK_RUNNING);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 5/6] blk-mq: ensure mq_ops ->poll() is entered at least once
  2018-11-23 18:34 [PATCHSET v4 0/6] Various block optimizations Jens Axboe
                   ` (3 preceding siblings ...)
  2018-11-23 18:34 ` [PATCH 4/6] block: make blk_poll() take a parameter on whether to spin or not Jens Axboe
@ 2018-11-23 18:34 ` Jens Axboe
  2018-11-23 18:34 ` [PATCH 6/6] blk-mq: never redirect polled IO completions Jens Axboe
  5 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2018-11-23 18:34 UTC (permalink / raw)
  To: linux-block; +Cc: Jens Axboe

Right now we immediately bail if need_resched() is true, but
we need to do at least one loop in case we have entries waiting.
So just invert the need_resched() check, putting it at the
bottom of the loop.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2751f0a3ccc..ba3c7b6476b7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3375,7 +3375,7 @@ static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 	hctx->poll_considered++;
 
 	state = current->state;
-	while (!need_resched()) {
+	do {
 		int ret;
 
 		hctx->poll_invoked++;
@@ -3395,7 +3395,7 @@ static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 		if (ret < 0 || !spin)
 			break;
 		cpu_relax();
-	}
+	} while (!need_resched());
 
 	__set_current_state(TASK_RUNNING);
 	return 0;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 6/6] blk-mq: never redirect polled IO completions
  2018-11-23 18:34 [PATCHSET v4 0/6] Various block optimizations Jens Axboe
                   ` (4 preceding siblings ...)
  2018-11-23 18:34 ` [PATCH 5/6] blk-mq: ensure mq_ops ->poll() is entered at least once Jens Axboe
@ 2018-11-23 18:34 ` Jens Axboe
  2018-11-26  8:18   ` Christoph Hellwig
  5 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2018-11-23 18:34 UTC (permalink / raw)
  To: linux-block; +Cc: Jens Axboe

It's pointless to do so, we are by definition on the CPU we want/need
to be, as that's the one waiting for a completion event.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ba3c7b6476b7..37674c1766a7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -585,7 +585,12 @@ static void __blk_mq_complete_request(struct request *rq)
 		return;
 	}
 
-	if (!test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
+	/*
+	 * For a polled request, always complete locallly, it's pointless
+	 * to redirect the completion.
+	 */
+	if ((rq->cmd_flags & REQ_HIPRI) ||
+	    !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
 		q->mq_ops->complete(rq);
 		return;
 	}
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/6] blk-mq: when polling for IO, look for any completion
  2018-11-23 18:34 ` [PATCH 1/6] blk-mq: when polling for IO, look for any completion Jens Axboe
@ 2018-11-26  8:16   ` Christoph Hellwig
  2018-11-26 15:17     ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2018-11-26  8:16 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block

> -bool blk_poll(struct request_queue *q, blk_qc_t cookie)
> +int blk_poll(struct request_queue *q, blk_qc_t cookie)

Can you add a comment explaining the return value?

>  {
>  	if (!q->poll_fn || !blk_qc_t_valid(cookie))
>  		return false;

And false certainly isn't an integer constant..

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] nvme: remove opportunistic polling from bdev target
  2018-11-23 18:34 ` [PATCH 3/6] nvme: remove opportunistic polling from bdev target Jens Axboe
@ 2018-11-26  8:17   ` Christoph Hellwig
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2018-11-26  8:17 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block

On Fri, Nov 23, 2018 at 11:34:08AM -0700, Jens Axboe wrote:
> It doesn't set HIPRI on the bio, so polling for it is pretty silly.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>

Well, I know who forgot to mark it hipri :)

But for now this looks good, we need to revisit fabrics polling entirely
once everything has settled.

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/6] block: make blk_poll() take a parameter on whether to spin or not
  2018-11-23 18:34 ` [PATCH 4/6] block: make blk_poll() take a parameter on whether to spin or not Jens Axboe
@ 2018-11-26  8:18   ` Christoph Hellwig
  2018-11-26 15:28     ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2018-11-26  8:18 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block

> -int blk_poll(struct request_queue *q, blk_qc_t cookie)
> +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)

The parameter will need some documentation.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 6/6] blk-mq: never redirect polled IO completions
  2018-11-23 18:34 ` [PATCH 6/6] blk-mq: never redirect polled IO completions Jens Axboe
@ 2018-11-26  8:18   ` Christoph Hellwig
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2018-11-26  8:18 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block

On Fri, Nov 23, 2018 at 11:34:11AM -0700, Jens Axboe wrote:
> It's pointless to do so, we are by definition on the CPU we want/need
> to be, as that's the one waiting for a completion event.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/6] blk-mq: when polling for IO, look for any completion
  2018-11-26  8:16   ` Christoph Hellwig
@ 2018-11-26 15:17     ` Jens Axboe
  0 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2018-11-26 15:17 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-block

On 11/26/18 1:16 AM, Christoph Hellwig wrote:
>> -bool blk_poll(struct request_queue *q, blk_qc_t cookie)
>> +int blk_poll(struct request_queue *q, blk_qc_t cookie)
> 
> Can you add a comment explaining the return value?
> 
>>  {
>>  	if (!q->poll_fn || !blk_qc_t_valid(cookie))
>>  		return false;
> 
> And false certainly isn't an integer constant..

Done

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/6] block: make blk_poll() take a parameter on whether to spin or not
  2018-11-26  8:18   ` Christoph Hellwig
@ 2018-11-26 15:28     ` Jens Axboe
  0 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2018-11-26 15:28 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-block

On 11/26/18 1:18 AM, Christoph Hellwig wrote:
>> -int blk_poll(struct request_queue *q, blk_qc_t cookie)
>> +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
> 
> The parameter will need some documentation.

Done

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-11-26 15:28 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-23 18:34 [PATCHSET v4 0/6] Various block optimizations Jens Axboe
2018-11-23 18:34 ` [PATCH 1/6] blk-mq: when polling for IO, look for any completion Jens Axboe
2018-11-26  8:16   ` Christoph Hellwig
2018-11-26 15:17     ` Jens Axboe
2018-11-23 18:34 ` [PATCH 2/6] blk-mq: remove 'tag' parameter from mq_ops->poll() Jens Axboe
2018-11-23 18:34 ` [PATCH 3/6] nvme: remove opportunistic polling from bdev target Jens Axboe
2018-11-26  8:17   ` Christoph Hellwig
2018-11-23 18:34 ` [PATCH 4/6] block: make blk_poll() take a parameter on whether to spin or not Jens Axboe
2018-11-26  8:18   ` Christoph Hellwig
2018-11-26 15:28     ` Jens Axboe
2018-11-23 18:34 ` [PATCH 5/6] blk-mq: ensure mq_ops ->poll() is entered at least once Jens Axboe
2018-11-23 18:34 ` [PATCH 6/6] blk-mq: never redirect polled IO completions Jens Axboe
2018-11-26  8:18   ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.