From: Jens Axboe <axboe@kernel.dk> To: linux-block@vger.kernel.org, linux-nvme@lists.infradead.org Cc: Jens Axboe <axboe@kernel.dk> Subject: [PATCH 4/8] nvme: implement mq_ops->commit_rqs() hook Date: Mon, 26 Nov 2018 09:35:52 -0700 [thread overview] Message-ID: <20181126163556.5181-5-axboe@kernel.dk> (raw) In-Reply-To: <20181126163556.5181-1-axboe@kernel.dk> Split the command submission and the SQ doorbell ring, and add the doorbell ring as our ->commit_rqs() hook. This allows a list of requests to be issued, with nvme only writing the SQ update when it's necessary. This is more efficient if we have lists of requests to issue, particularly on virtualized hardware, where writing the SQ doorbell is more expensive than on real hardware. For those cases, performance increases of 2-3x have been observed. The use case for this is plugged IO, where blk-mq flushes a batch of requests at the time. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/nvme/host/pci.c | 52 +++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 73effe586e5f..d503bf6cd8ba 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -203,6 +203,7 @@ struct nvme_queue { u16 q_depth; s16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 last_cq_head; u16 qid; @@ -522,22 +523,52 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) +{ + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) + writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; +} + +static inline int nvme_next_ring_index(struct nvme_queue *nvmeq, u16 index) +{ + if (++index == nvmeq->q_depth) + return 0; + + return index; +} + /** * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use * @cmd: The command to send + * @write_sq: whether to write to the SQ doorbell */ -static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + bool write_sq) { spin_lock(&nvmeq->sq_lock); memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); - if (++nvmeq->sq_tail == nvmeq->q_depth) - nvmeq->sq_tail = 0; - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, - nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) - writel(nvmeq->sq_tail, nvmeq->q_db); + /* + * Write sq tail if we have to, OR if the next command would wrap + */ + nvmeq->sq_tail = nvme_next_ring_index(nvmeq, nvmeq->sq_tail); + if (write_sq || + nvme_next_ring_index(nvmeq, nvmeq->sq_tail) == nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq); + spin_unlock(&nvmeq->sq_lock); +} + +static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + spin_lock(&nvmeq->sq_lock); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -923,7 +954,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } blk_mq_start_request(req); - nvme_submit_cmd(nvmeq, &cmnd); + nvme_submit_cmd(nvmeq, &cmnd, bd->last); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -999,8 +1030,7 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) { while (start != end) { nvme_handle_cqe(nvmeq, start); - if (++start == nvmeq->q_depth) - start = 0; + start = nvme_next_ring_index(nvmeq, start); } } @@ -1108,7 +1138,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - nvme_submit_cmd(nvmeq, &c); + nvme_submit_cmd(nvmeq, &c, true); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -1531,6 +1561,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) spin_lock_irq(&nvmeq->cq_lock); nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -1603,6 +1634,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { #define NVME_SHARED_MQ_OPS \ .queue_rq = nvme_queue_rq, \ + .commit_rqs = nvme_commit_rqs, \ .rq_flags_to_type = nvme_rq_flags_to_type, \ .complete = nvme_pci_complete_rq, \ .init_hctx = nvme_init_hctx, \ -- 2.17.1
WARNING: multiple messages have this Message-ID (diff)
From: axboe@kernel.dk (Jens Axboe) Subject: [PATCH 4/8] nvme: implement mq_ops->commit_rqs() hook Date: Mon, 26 Nov 2018 09:35:52 -0700 [thread overview] Message-ID: <20181126163556.5181-5-axboe@kernel.dk> (raw) In-Reply-To: <20181126163556.5181-1-axboe@kernel.dk> Split the command submission and the SQ doorbell ring, and add the doorbell ring as our ->commit_rqs() hook. This allows a list of requests to be issued, with nvme only writing the SQ update when it's necessary. This is more efficient if we have lists of requests to issue, particularly on virtualized hardware, where writing the SQ doorbell is more expensive than on real hardware. For those cases, performance increases of 2-3x have been observed. The use case for this is plugged IO, where blk-mq flushes a batch of requests at the time. Signed-off-by: Jens Axboe <axboe at kernel.dk> --- drivers/nvme/host/pci.c | 52 +++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 73effe586e5f..d503bf6cd8ba 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -203,6 +203,7 @@ struct nvme_queue { u16 q_depth; s16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 last_cq_head; u16 qid; @@ -522,22 +523,52 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) +{ + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) + writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; +} + +static inline int nvme_next_ring_index(struct nvme_queue *nvmeq, u16 index) +{ + if (++index == nvmeq->q_depth) + return 0; + + return index; +} + /** * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use * @cmd: The command to send + * @write_sq: whether to write to the SQ doorbell */ -static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + bool write_sq) { spin_lock(&nvmeq->sq_lock); memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); - if (++nvmeq->sq_tail == nvmeq->q_depth) - nvmeq->sq_tail = 0; - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, - nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) - writel(nvmeq->sq_tail, nvmeq->q_db); + /* + * Write sq tail if we have to, OR if the next command would wrap + */ + nvmeq->sq_tail = nvme_next_ring_index(nvmeq, nvmeq->sq_tail); + if (write_sq || + nvme_next_ring_index(nvmeq, nvmeq->sq_tail) == nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq); + spin_unlock(&nvmeq->sq_lock); +} + +static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + spin_lock(&nvmeq->sq_lock); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -923,7 +954,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } blk_mq_start_request(req); - nvme_submit_cmd(nvmeq, &cmnd); + nvme_submit_cmd(nvmeq, &cmnd, bd->last); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -999,8 +1030,7 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) { while (start != end) { nvme_handle_cqe(nvmeq, start); - if (++start == nvmeq->q_depth) - start = 0; + start = nvme_next_ring_index(nvmeq, start); } } @@ -1108,7 +1138,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - nvme_submit_cmd(nvmeq, &c); + nvme_submit_cmd(nvmeq, &c, true); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -1531,6 +1561,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) spin_lock_irq(&nvmeq->cq_lock); nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -1603,6 +1634,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { #define NVME_SHARED_MQ_OPS \ .queue_rq = nvme_queue_rq, \ + .commit_rqs = nvme_commit_rqs, \ .rq_flags_to_type = nvme_rq_flags_to_type, \ .complete = nvme_pci_complete_rq, \ .init_hctx = nvme_init_hctx, \ -- 2.17.1
next prev parent reply other threads:[~2018-11-26 16:36 UTC|newest] Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-11-26 16:35 [PATCHSET 0/8] block plugging improvements Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-26 16:35 ` [PATCH 1/8] block: sum requests in the plug structure Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-26 17:02 ` Christoph Hellwig 2018-11-26 17:02 ` Christoph Hellwig 2018-11-26 16:35 ` [PATCH 2/8] block: improve logic around when to sort a plug list Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-27 23:31 ` Omar Sandoval 2018-11-27 23:31 ` Omar Sandoval 2018-11-27 23:49 ` Jens Axboe 2018-11-27 23:49 ` Jens Axboe 2018-11-27 23:55 ` Omar Sandoval 2018-11-27 23:55 ` Omar Sandoval 2018-11-27 23:59 ` Jens Axboe 2018-11-27 23:59 ` Jens Axboe 2018-11-28 0:05 ` Omar Sandoval 2018-11-28 0:05 ` Omar Sandoval 2018-11-28 0:16 ` Jens Axboe 2018-11-28 0:16 ` Jens Axboe 2018-11-26 16:35 ` [PATCH 3/8] blk-mq: add mq_ops->commit_rqs() Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-27 23:43 ` Omar Sandoval 2018-11-27 23:43 ` Omar Sandoval 2018-11-28 1:38 ` Ming Lei 2018-11-28 1:38 ` Ming Lei 2018-11-28 7:16 ` Christoph Hellwig 2018-11-28 7:16 ` Christoph Hellwig 2018-11-28 12:54 ` Jens Axboe 2018-11-28 12:54 ` Jens Axboe 2018-11-26 16:35 ` Jens Axboe [this message] 2018-11-26 16:35 ` [PATCH 4/8] nvme: implement mq_ops->commit_rqs() hook Jens Axboe 2018-11-28 7:20 ` Christoph Hellwig 2018-11-28 7:20 ` Christoph Hellwig 2018-11-28 13:07 ` Jens Axboe 2018-11-28 13:07 ` Jens Axboe 2018-11-26 16:35 ` [PATCH 5/8] virtio_blk: " Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-27 23:45 ` Omar Sandoval 2018-11-27 23:45 ` Omar Sandoval 2018-11-28 3:05 ` Michael S. Tsirkin 2018-11-28 3:05 ` Michael S. Tsirkin 2018-11-28 2:10 ` Ming Lei 2018-11-28 2:10 ` Ming Lei 2018-11-28 2:34 ` Jens Axboe 2018-11-28 2:34 ` Jens Axboe 2018-11-29 1:23 ` Ming Lei 2018-11-29 1:23 ` Ming Lei 2018-11-29 2:19 ` Jens Axboe 2018-11-29 2:19 ` Jens Axboe 2018-11-29 2:51 ` Ming Lei 2018-11-29 2:51 ` Ming Lei 2018-11-29 3:13 ` Jens Axboe 2018-11-29 3:13 ` Jens Axboe 2018-11-29 3:27 ` Ming Lei 2018-11-29 3:27 ` Ming Lei 2018-11-29 3:53 ` Jens Axboe 2018-11-29 3:53 ` Jens Axboe 2018-11-28 7:21 ` Christoph Hellwig 2018-11-28 7:21 ` Christoph Hellwig 2018-11-26 16:35 ` [PATCH 6/8] ataflop: " Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-27 23:46 ` Omar Sandoval 2018-11-27 23:46 ` Omar Sandoval 2018-11-28 7:22 ` Christoph Hellwig 2018-11-28 7:22 ` Christoph Hellwig 2018-11-28 13:09 ` Jens Axboe 2018-11-28 13:09 ` Jens Axboe 2018-11-26 16:35 ` [PATCH 7/8] blk-mq: use bd->last == true for list inserts Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-27 23:49 ` Omar Sandoval 2018-11-27 23:49 ` Omar Sandoval 2018-11-27 23:51 ` Jens Axboe 2018-11-27 23:51 ` Jens Axboe 2018-11-28 1:49 ` Ming Lei 2018-11-28 1:49 ` Ming Lei 2018-11-28 2:37 ` Jens Axboe 2018-11-28 2:37 ` Jens Axboe 2018-11-26 16:35 ` [PATCH 8/8] blk-mq: add plug case for devices that implement ->commits_rqs() Jens Axboe 2018-11-26 16:35 ` Jens Axboe 2018-11-28 7:26 ` Christoph Hellwig 2018-11-28 7:26 ` Christoph Hellwig 2018-11-28 13:11 ` Jens Axboe 2018-11-28 13:11 ` Jens Axboe
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20181126163556.5181-5-axboe@kernel.dk \ --to=axboe@kernel.dk \ --cc=linux-block@vger.kernel.org \ --cc=linux-nvme@lists.infradead.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.