All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly
@ 2014-09-07  8:39 Ming Lei
  2014-09-07  8:39 ` [PATCH 1/6] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Ming Lei @ 2014-09-07  8:39 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig

Hi,

This patchset introduces init_flush_rq and its pair callback to
initialize pdu of flush request explicitly, instead of using
copying from request which is inefficient and buggy, and implements
them in virtio-blk and scsi-lib.

 block/blk-flush.c          |   22 +++++++++++++++++++++-
 block/blk-mq.c             |   23 ++++++++---------------
 block/blk-mq.h             |    3 ++-
 drivers/block/virtio_blk.c |   15 ++++++++++++++-
 drivers/scsi/scsi_lib.c    |   32 +++++++++++++++++++++++++++-----
 include/linux/blk-mq.h     |    9 +++++++++
 6 files changed, 81 insertions(+), 23 deletions(-)

Thanks,
--
Ming Lei


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/6] blk-mq: allocate flush_rq in blk_mq_init_flush()
  2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
@ 2014-09-07  8:39 ` Ming Lei
  2014-09-07  8:39 ` [PATCH 2/6] blk-mq: introduce init_flush_rq callback and its pair Ming Lei
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Ming Lei @ 2014-09-07  8:39 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

It is reasonable to allocate flush req in blk_mq_init_flush().

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-flush.c |   11 ++++++++++-
 block/blk-mq.c    |   16 ++++++----------
 block/blk-mq.h    |    2 +-
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3cb5e9e..75ca6cd0 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -474,7 +474,16 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-void blk_mq_init_flush(struct request_queue *q)
+int blk_mq_init_flush(struct request_queue *q)
 {
+	struct blk_mq_tag_set *set = q->tag_set;
+
 	spin_lock_init(&q->mq_flush_lock);
+
+	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
+				set->cmd_size, cache_line_size()),
+				GFP_KERNEL);
+	if (!q->flush_rq)
+		return -ENOMEM;
+	return 0;
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4aac826..23386c0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1826,17 +1826,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (set->ops->complete)
 		blk_queue_softirq_done(q, set->ops->complete);
 
-	blk_mq_init_flush(q);
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
-				set->cmd_size, cache_line_size()),
-				GFP_KERNEL);
-	if (!q->flush_rq)
-		goto err_hw;
-
 	if (blk_mq_init_hw_queues(q, set))
-		goto err_flush_rq;
+		goto err_hw;
 
 	mutex_lock(&all_q_mutex);
 	list_add_tail(&q->all_q_node, &all_q_list);
@@ -1844,12 +1837,15 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
 	blk_mq_add_queue_tag_set(set, q);
 
+	if (blk_mq_init_flush(q))
+		goto err_hw_queues;
+
 	blk_mq_map_swqueue(q);
 
 	return q;
 
-err_flush_rq:
-	kfree(q->flush_rq);
+err_hw_queues:
+	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 err_hw:
 	blk_cleanup_queue(q);
 err_hctxs:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ca4964a..b0bd9bc 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,7 +27,7 @@ struct blk_mq_ctx {
 
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
-void blk_mq_init_flush(struct request_queue *q);
+int blk_mq_init_flush(struct request_queue *q);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/6] blk-mq: introduce init_flush_rq callback and its pair
  2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
  2014-09-07  8:39 ` [PATCH 1/6] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
@ 2014-09-07  8:39 ` Ming Lei
  2014-09-07  8:39 ` [PATCH 3/6] blk-mq: don't copy pdu if init_flush_rq is implemented Ming Lei
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Ming Lei @ 2014-09-07  8:39 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

Currently pdu of the flush rq is simlpy copied from another rq,
turns out it isn't a good approach:

	- it isn't enough to initialize pointer field well, and
	easy to cause bugs if some pointers filed are included
	in pdu

	- the copy isn't necessary, because the pdu should just
	serve for submitting flush req purpose, like non-flush
	case, and flush case of legacy block driver

	- actually init_flush_rq/exit_flush_rq is needed for
	seting up flush request.

So introduce these two callbacks for driver to handle the case
easily.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-flush.c      |   11 +++++++++++
 block/blk-mq.c         |    2 ++
 block/blk-mq.h         |    1 +
 include/linux/blk-mq.h |    9 +++++++++
 4 files changed, 23 insertions(+)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 75ca6cd0..14654e1 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -485,5 +485,16 @@ int blk_mq_init_flush(struct request_queue *q)
 				GFP_KERNEL);
 	if (!q->flush_rq)
 		return -ENOMEM;
+
+	if (q->mq_ops->init_flush_rq)
+		return q->mq_ops->init_flush_rq(q, q->flush_rq);
+
 	return 0;
 }
+
+void blk_mq_exit_flush(struct request_queue *q)
+{
+	/* flush_rq is freed centrally for both mq and legacy queue */
+	if (q->mq_ops->exit_flush_rq)
+		q->mq_ops->exit_flush_rq(q, q->flush_rq);
+}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 23386c0..1daef32 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1868,6 +1868,8 @@ void blk_mq_free_queue(struct request_queue *q)
 {
 	struct blk_mq_tag_set	*set = q->tag_set;
 
+	blk_mq_exit_flush(q);
+
 	blk_mq_del_queue_tag_set(q);
 
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index b0bd9bc..ea9974f 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -28,6 +28,7 @@ struct blk_mq_ctx {
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 int blk_mq_init_flush(struct request_queue *q);
+void blk_mq_exit_flush(struct request_queue *q);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f2..d24c13e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -85,6 +85,8 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int,
 		unsigned int, unsigned int);
 typedef void (exit_request_fn)(void *, struct request *, unsigned int,
 		unsigned int);
+typedef int (init_flush_rq_fn)(struct request_queue *, struct request *);
+typedef void (exit_flush_rq_fn)(struct request_queue *, struct request *);
 
 struct blk_mq_ops {
 	/*
@@ -119,6 +121,13 @@ struct blk_mq_ops {
 	 */
 	init_request_fn		*init_request;
 	exit_request_fn		*exit_request;
+
+	/*
+	 * Called after flush request allocated by the block layer to
+	 * allow the driver to set up driver specific data.
+	 */
+	init_flush_rq_fn	*init_flush_rq;
+	exit_flush_rq_fn	*exit_flush_rq;
 };
 
 enum {
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/6] blk-mq: don't copy pdu if init_flush_rq is implemented
  2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
  2014-09-07  8:39 ` [PATCH 1/6] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
  2014-09-07  8:39 ` [PATCH 2/6] blk-mq: introduce init_flush_rq callback and its pair Ming Lei
@ 2014-09-07  8:39 ` Ming Lei
  2014-09-07  8:39 ` [PATCH 4/6] virtio-blk: implement init_flush_rq Ming Lei
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Ming Lei @ 2014-09-07  8:39 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

Current copying serves purpose of initializing flush req's pdu,
so don't do that if init_flush_rq is implemented.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-mq.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1daef32..113d58d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -292,8 +292,10 @@ void blk_mq_clone_flush_request(struct request *flush_rq,
 
 	flush_rq->mq_ctx = orig_rq->mq_ctx;
 	flush_rq->tag = orig_rq->tag;
-	memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
-		hctx->cmd_size);
+
+	if (!orig_rq->q->mq_ops->init_flush_rq)
+		memcpy(blk_mq_rq_to_pdu(flush_rq),
+			blk_mq_rq_to_pdu(orig_rq), hctx->cmd_size);
 }
 
 inline void __blk_mq_end_io(struct request *rq, int error)
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/6] virtio-blk: implement init_flush_rq
  2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
                   ` (2 preceding siblings ...)
  2014-09-07  8:39 ` [PATCH 3/6] blk-mq: don't copy pdu if init_flush_rq is implemented Ming Lei
@ 2014-09-07  8:39 ` Ming Lei
  2014-09-07 18:52   ` Christoph Hellwig
  2014-09-07  8:39 ` [PATCH 5/6] scsi-lib: implement init_flush_rq and its pair Ming Lei
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2014-09-07  8:39 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

Now we use init_flush_rq callback to initialize pdu of
flush req.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 drivers/block/virtio_blk.c |   15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0a58140..f478ec8 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -177,7 +177,6 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 
 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
-	vbr->req = req;
 	if (req->cmd_flags & REQ_FLUSH) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
 		vbr->out_hdr.sector = 0;
@@ -556,6 +555,19 @@ static int virtblk_init_request(void *data, struct request *rq,
 	struct virtio_blk *vblk = data;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
 
+	vbr->req = rq;
+	sg_init_table(vbr->sg, vblk->sg_elems);
+	return 0;
+}
+
+static int virtblk_init_flush_rq(struct request_queue *q,
+		struct request *rq)
+{
+	/* q->queuedata isn't setup yet */
+	struct virtio_blk *vblk = q->tag_set->driver_data;
+	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
+
+	vbr->req = rq;
 	sg_init_table(vbr->sg, vblk->sg_elems);
 	return 0;
 }
@@ -565,6 +577,7 @@ static struct blk_mq_ops virtio_mq_ops = {
 	.map_queue	= blk_mq_map_queue,
 	.complete	= virtblk_request_done,
 	.init_request	= virtblk_init_request,
+	.init_flush_rq	= virtblk_init_flush_rq,
 };
 
 static unsigned int virtblk_queue_depth;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/6] scsi-lib: implement init_flush_rq and its pair
  2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
                   ` (3 preceding siblings ...)
  2014-09-07  8:39 ` [PATCH 4/6] virtio-blk: implement init_flush_rq Ming Lei
@ 2014-09-07  8:39 ` Ming Lei
  2014-09-07 18:53   ` Christoph Hellwig
  2014-09-07  8:39 ` [PATCH 6/6] blk-mq: don't copy pdu any more for flush req Ming Lei
  2014-09-07 18:49 ` [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Christoph Hellwig
  6 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2014-09-07  8:39 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

Now implement init_flush_rq callback to avoid the unnecessary
pdu copying done in blk_mq_clone_flush_request().

The sense buffer is introduced to flush req, but it won't be
a deal since there is only one flush request per queue. It still
may be borrowed from the sence buffer of the request cloned from,
but looks a bit ugly.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 drivers/scsi/scsi_lib.c |   32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b9a8ddd..9090418 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1930,9 +1930,7 @@ out:
 	return ret;
 }
 
-static int scsi_init_request(void *data, struct request *rq,
-		unsigned int hctx_idx, unsigned int request_idx,
-		unsigned int numa_node)
+static int __scsi_init_request(struct request *rq, int numa_node)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
 
@@ -1943,14 +1941,36 @@ static int scsi_init_request(void *data, struct request *rq,
 	return 0;
 }
 
-static void scsi_exit_request(void *data, struct request *rq,
-		unsigned int hctx_idx, unsigned int request_idx)
+static void __scsi_exit_request(struct request *rq)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
 
 	kfree(cmd->sense_buffer);
 }
 
+static int scsi_init_request(void *data, struct request *rq,
+		unsigned int hctx_idx, unsigned int request_idx,
+		unsigned int numa_node)
+{
+	return __scsi_init_request(rq, numa_node);
+}
+
+static void scsi_exit_request(void *data, struct request *rq,
+		unsigned int hctx_idx, unsigned int request_idx)
+{
+	__scsi_exit_request(rq);
+}
+
+static int scsi_init_flush_rq(struct request_queue *q, struct request *rq)
+{
+	return __scsi_init_request(rq, NUMA_NO_NODE);
+}
+
+static void scsi_exit_flush_rq(struct request_queue *q, struct request *rq)
+{
+	__scsi_exit_request(rq);
+}
+
 static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
 {
 	struct device *host_dev;
@@ -2044,6 +2064,8 @@ static struct blk_mq_ops scsi_mq_ops = {
 	.timeout	= scsi_times_out,
 	.init_request	= scsi_init_request,
 	.exit_request	= scsi_exit_request,
+	.init_flush_rq	= scsi_init_flush_rq,
+	.exit_flush_rq	= scsi_exit_flush_rq,
 };
 
 struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/6] blk-mq: don't copy pdu any more for flush req
  2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
                   ` (4 preceding siblings ...)
  2014-09-07  8:39 ` [PATCH 5/6] scsi-lib: implement init_flush_rq and its pair Ming Lei
@ 2014-09-07  8:39 ` Ming Lei
  2014-09-07 18:49 ` [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Christoph Hellwig
  6 siblings, 0 replies; 14+ messages in thread
From: Ming Lei @ 2014-09-07  8:39 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

The in-tree drivers which need to handle flush request
have implemented init_flush_rq already, so don't
copy pdu any more for flush req.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-mq.c |    7 -------
 1 file changed, 7 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 113d58d..16f595f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -287,15 +287,8 @@ void blk_mq_free_request(struct request *rq)
 void blk_mq_clone_flush_request(struct request *flush_rq,
 		struct request *orig_rq)
 {
-	struct blk_mq_hw_ctx *hctx =
-		orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
-
 	flush_rq->mq_ctx = orig_rq->mq_ctx;
 	flush_rq->tag = orig_rq->tag;
-
-	if (!orig_rq->q->mq_ops->init_flush_rq)
-		memcpy(blk_mq_rq_to_pdu(flush_rq),
-			blk_mq_rq_to_pdu(orig_rq), hctx->cmd_size);
 }
 
 inline void __blk_mq_end_io(struct request *rq, int error)
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly
  2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
                   ` (5 preceding siblings ...)
  2014-09-07  8:39 ` [PATCH 6/6] blk-mq: don't copy pdu any more for flush req Ming Lei
@ 2014-09-07 18:49 ` Christoph Hellwig
  2014-09-08 16:55   ` Ming Lei
  6 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2014-09-07 18:49 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

This works fine for me, although I still don't really like it very much.

If you really want to go down the path of major surgery in this area we
should probably allocate a flush request per hw_ctx, and initialize it
using the normal init/exit functions.  If we want to have proper multiqueue
performance on devices needing flushes we'll need something like that anyway.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] virtio-blk: implement init_flush_rq
  2014-09-07  8:39 ` [PATCH 4/6] virtio-blk: implement init_flush_rq Ming Lei
@ 2014-09-07 18:52   ` Christoph Hellwig
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2014-09-07 18:52 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi

A couple comments not directly related to this patch, it's just
a convenient vehicle for my rants :)

> @@ -556,6 +555,19 @@ static int virtblk_init_request(void *data, struct request *rq,
>  	struct virtio_blk *vblk = data;
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
>  
> +	vbr->req = rq;

I really hate how we need these backpointers in most drivers.  Given
that struct request and the driver privata data are allocated together
we should be able to do this with simple pointer arithmetics.

> +	sg_init_table(vbr->sg, vblk->sg_elems);

Jens, what do you think about moving of the SG list handling to the
core block layer?  I'd really like to have the S/G list in struct request,
and if we do that we could also take the scsi-mq code that allows small
S/G lists preallocated in blk-mq and allocating larger ones at runtime
there, avoiding the need to duplicate that code and the whole mempool
magic it requires in drivers that want to make use of it.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/6] scsi-lib: implement init_flush_rq and its pair
  2014-09-07  8:39 ` [PATCH 5/6] scsi-lib: implement init_flush_rq and its pair Ming Lei
@ 2014-09-07 18:53   ` Christoph Hellwig
  0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2014-09-07 18:53 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

> +static int __scsi_init_request(struct request *rq, int numa_node)

Nitpick: Please use a sane name here, e.g. scsi_mq_alloc/free_sense_buffer.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly
  2014-09-07 18:49 ` [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Christoph Hellwig
@ 2014-09-08 16:55   ` Ming Lei
  2014-09-08 19:03     ` Jens Axboe
  2014-09-08 22:53       ` Elliott, Robert (Server Storage)
  0 siblings, 2 replies; 14+ messages in thread
From: Ming Lei @ 2014-09-08 16:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Linux Kernel Mailing List, Linux SCSI List

On Mon, Sep 8, 2014 at 2:49 AM, Christoph Hellwig <hch@lst.de> wrote:
> This works fine for me, although I still don't really like it very much.
>
> If you really want to go down the path of major surgery in this area we
> should probably allocate a flush request per hw_ctx, and initialize it
> using the normal init/exit functions.  If we want to have proper multiqueue
> performance on devices needing flushes we'll need something like that anyway.

Yes, that should be the final solution for the problem, and looks the whole
flush machinery need to move into hctx, I will try to figure out one patch to
do that.

Thanks,

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly
  2014-09-08 16:55   ` Ming Lei
@ 2014-09-08 19:03     ` Jens Axboe
  2014-09-08 22:53       ` Elliott, Robert (Server Storage)
  1 sibling, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2014-09-08 19:03 UTC (permalink / raw)
  To: Ming Lei, Christoph Hellwig; +Cc: Linux Kernel Mailing List, Linux SCSI List

On 09/08/2014 10:55 AM, Ming Lei wrote:
> On Mon, Sep 8, 2014 at 2:49 AM, Christoph Hellwig <hch@lst.de> wrote:
>> This works fine for me, although I still don't really like it very much.
>>
>> If you really want to go down the path of major surgery in this area we
>> should probably allocate a flush request per hw_ctx, and initialize it
>> using the normal init/exit functions.  If we want to have proper multiqueue
>> performance on devices needing flushes we'll need something like that anyway.
> 
> Yes, that should be the final solution for the problem, and looks the whole
> flush machinery need to move into hctx, I will try to figure out one patch to
> do that.

I had not thought of that, but seems like a great way to clean this up a
lot. It just never felt like the right thing to do.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly
  2014-09-08 16:55   ` Ming Lei
@ 2014-09-08 22:53       ` Elliott, Robert (Server Storage)
  2014-09-08 22:53       ` Elliott, Robert (Server Storage)
  1 sibling, 0 replies; 14+ messages in thread
From: Elliott, Robert (Server Storage) @ 2014-09-08 22:53 UTC (permalink / raw)
  To: Ming Lei, Christoph Hellwig
  Cc: Jens Axboe, Linux Kernel Mailing List, Linux SCSI List

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2160 bytes --]



> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
> owner@vger.kernel.org] On Behalf Of Ming Lei
> Sent: Monday, 08 September, 2014 11:55 AM
> To: Christoph Hellwig
> Cc: Jens Axboe; Linux Kernel Mailing List; Linux SCSI List
> Subject: Re: [PATCH 0/6] blk-mq: initialize pdu of flush req
> explicitly
> 
> On Mon, Sep 8, 2014 at 2:49 AM, Christoph Hellwig <hch@lst.de> wrote:
> > This works fine for me, although I still don't really like it very
> much.
> >
> > If you really want to go down the path of major surgery in this
> > area we should probably allocate a flush request per hw_ctx, and
> > initialize it using the normal init/exit functions.  If we want
> > to have proper multiqueue performance on devices needing flushes
> > we'll need something like that anyway.
> 
> Yes, that should be the final solution for the problem, and looks the
> whole flush machinery need to move into hctx, I will try to figure
> out one patch to do that.

Please change flush_rq allocation from kzalloc to kzalloc_node 
while operating on that code (would have affected PATCH 1/6).

blk_mq_init_queue currently has this for q->flush_rq:
        q->flush_rq = kzalloc(round_up(sizeof(struct request) +
                                set->cmd_size, cache_line_size()),
                                GFP_KERNEL);

while all its other allocations are tied to set->numa_node:
        hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
                        set->numa_node);
        q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);

or, for per-CPU structures, tied to the appropriate node:
        for (i = 0; i < set->nr_hw_queues; i++) {
                int node = blk_mq_hw_queue_to_node(map, i);

                hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
                                        GFP_KERNEL, node);

Per-hctx flush requests would mean following the hctxs[i]
approach.


---
Rob Elliott    HP Server Storage



ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly
@ 2014-09-08 22:53       ` Elliott, Robert (Server Storage)
  0 siblings, 0 replies; 14+ messages in thread
From: Elliott, Robert (Server Storage) @ 2014-09-08 22:53 UTC (permalink / raw)
  To: Ming Lei, Christoph Hellwig
  Cc: Jens Axboe, Linux Kernel Mailing List, Linux SCSI List



> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
> owner@vger.kernel.org] On Behalf Of Ming Lei
> Sent: Monday, 08 September, 2014 11:55 AM
> To: Christoph Hellwig
> Cc: Jens Axboe; Linux Kernel Mailing List; Linux SCSI List
> Subject: Re: [PATCH 0/6] blk-mq: initialize pdu of flush req
> explicitly
> 
> On Mon, Sep 8, 2014 at 2:49 AM, Christoph Hellwig <hch@lst.de> wrote:
> > This works fine for me, although I still don't really like it very
> much.
> >
> > If you really want to go down the path of major surgery in this
> > area we should probably allocate a flush request per hw_ctx, and
> > initialize it using the normal init/exit functions.  If we want
> > to have proper multiqueue performance on devices needing flushes
> > we'll need something like that anyway.
> 
> Yes, that should be the final solution for the problem, and looks the
> whole flush machinery need to move into hctx, I will try to figure
> out one patch to do that.

Please change flush_rq allocation from kzalloc to kzalloc_node 
while operating on that code (would have affected PATCH 1/6).

blk_mq_init_queue currently has this for q->flush_rq:
        q->flush_rq = kzalloc(round_up(sizeof(struct request) +
                                set->cmd_size, cache_line_size()),
                                GFP_KERNEL);

while all its other allocations are tied to set->numa_node:
        hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
                        set->numa_node);
        q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);

or, for per-CPU structures, tied to the appropriate node:
        for (i = 0; i < set->nr_hw_queues; i++) {
                int node = blk_mq_hw_queue_to_node(map, i);

                hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
                                        GFP_KERNEL, node);

Per-hctx flush requests would mean following the hctxs[i]
approach.


---
Rob Elliott    HP Server Storage




^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2014-09-08 22:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-07  8:39 [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Ming Lei
2014-09-07  8:39 ` [PATCH 1/6] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
2014-09-07  8:39 ` [PATCH 2/6] blk-mq: introduce init_flush_rq callback and its pair Ming Lei
2014-09-07  8:39 ` [PATCH 3/6] blk-mq: don't copy pdu if init_flush_rq is implemented Ming Lei
2014-09-07  8:39 ` [PATCH 4/6] virtio-blk: implement init_flush_rq Ming Lei
2014-09-07 18:52   ` Christoph Hellwig
2014-09-07  8:39 ` [PATCH 5/6] scsi-lib: implement init_flush_rq and its pair Ming Lei
2014-09-07 18:53   ` Christoph Hellwig
2014-09-07  8:39 ` [PATCH 6/6] blk-mq: don't copy pdu any more for flush req Ming Lei
2014-09-07 18:49 ` [PATCH 0/6] blk-mq: initialize pdu of flush req explicitly Christoph Hellwig
2014-09-08 16:55   ` Ming Lei
2014-09-08 19:03     ` Jens Axboe
2014-09-08 22:53     ` Elliott, Robert (Server Storage)
2014-09-08 22:53       ` Elliott, Robert (Server Storage)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.