linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Max Gurtovoy <mgurtovoy@nvidia.com>
To: Feng Li <lifeng1519@gmail.com>
Cc: <hch@infradead.org>, <mst@redhat.com>,
	<virtualization@lists.linux-foundation.org>,
	<kvm@vger.kernel.org>, <stefanha@redhat.com>,
	<israelr@nvidia.com>, <nitzanc@nvidia.com>, <oren@nvidia.com>,
	linux-block <linux-block@vger.kernel.org>,
	Jens Axboe <axboe@kernel.dk>
Subject: Re: [PATCH 1/1] virtio-blk: avoid preallocating big SGL for data
Date: Wed, 1 Sep 2021 13:19:09 +0300	[thread overview]
Message-ID: <165359fc-8f97-ede3-8ab5-35329ca61dbd@nvidia.com> (raw)
In-Reply-To: <CAEK8JBBU3zNAWpC36-Lq0UBM1Dp+jYQG105psE38Fy8KRy=M-g@mail.gmail.com>


On 9/1/2021 6:38 AM, Feng Li wrote:
> Does this hurt the performance of virtio-blk?
> I think a fio result is needed here.

No, we use this mechanism in NVMe/NVMf for few years already and didn't 
see any performance issues.

Also with the fio tests I run with our NVIDIA's Virtio-blk SNAP devices 
showed same perf numbers.

I can add it to v2.

>
> On Tue, Aug 31, 2021 at 7:36 AM Max Gurtovoy <mgurtovoy@nvidia.com> wrote:
>> No need to pre-allocate a big buffer for the IO SGL anymore. If a device
>> has lots of deep queues, preallocation for the sg list can consume
>> substantial amounts of memory. For HW virtio-blk device, nr_hw_queues
>> can be 64 or 128 and each queue's depth might be 128. This means the
>> resulting preallocation for the data SGLs is big.
>>
>> Switch to runtime allocation for SGL for lists longer than 2 entries.
>> This is the approach used by NVMe drivers so it should be reasonable for
>> virtio block as well. Runtime SGL allocation has always been the case
>> for the legacy I/O path so this is nothing new.
>>
>> The preallocated small SGL depends on SG_CHAIN so if the ARCH doesn't
>> support SG_CHAIN, use only runtime allocation for the SGL.
>>
>> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
>> Reviewed-by: Israel Rukshin <israelr@nvidia.com>
>> ---
>>   drivers/block/virtio_blk.c | 37 ++++++++++++++++++++++---------------
>>   1 file changed, 22 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
>> index 77e8468e8593..9a4c5d428b58 100644
>> --- a/drivers/block/virtio_blk.c
>> +++ b/drivers/block/virtio_blk.c
>> @@ -24,6 +24,12 @@
>>   /* The maximum number of sg elements that fit into a virtqueue */
>>   #define VIRTIO_BLK_MAX_SG_ELEMS 32768
>>
>> +#ifdef CONFIG_ARCH_NO_SG_CHAIN
>> +#define VIRTIO_BLK_INLINE_SG_CNT       0
>> +#else
>> +#define VIRTIO_BLK_INLINE_SG_CNT       2
>> +#endif
>> +
>>   static int virtblk_queue_count_set(const char *val,
>>                  const struct kernel_param *kp)
>>   {
>> @@ -99,7 +105,7 @@ struct virtio_blk {
>>   struct virtblk_req {
>>          struct virtio_blk_outhdr out_hdr;
>>          u8 status;
>> -       struct scatterlist sg[];
>> +       struct sg_table sg_table;
>>   };
>>
>>   static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
>> @@ -188,6 +194,8 @@ static inline void virtblk_request_done(struct request *req)
>>   {
>>          struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>>
>> +       sg_free_table_chained(&vbr->sg_table, VIRTIO_BLK_INLINE_SG_CNT);
>> +
>>          if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
>>                  kfree(page_address(req->special_vec.bv_page) +
>>                        req->special_vec.bv_offset);
>> @@ -291,7 +299,15 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>>                          return BLK_STS_RESOURCE;
>>          }
>>
>> -       num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>> +       vbr->sg_table.sgl = (struct scatterlist *)(vbr + 1);
>> +       err = sg_alloc_table_chained(&vbr->sg_table,
>> +                                    blk_rq_nr_phys_segments(req),
>> +                                    vbr->sg_table.sgl,
>> +                                    VIRTIO_BLK_INLINE_SG_CNT);
>> +       if (err)
>> +               return BLK_STS_RESOURCE;
>> +
>> +       num = blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl);
>>          if (num) {
>>                  if (rq_data_dir(req) == WRITE)
>>                          vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
>> @@ -300,7 +316,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>>          }
>>
>>          spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
>> -       err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
>> +       err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num);
>>          if (err) {
>>                  virtqueue_kick(vblk->vqs[qid].vq);
>>                  /* Don't stop the queue if -ENOMEM: we may have failed to
>> @@ -309,6 +325,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>>                  if (err == -ENOSPC)
>>                          blk_mq_stop_hw_queue(hctx);
>>                  spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
>> +               sg_free_table_chained(&vbr->sg_table,
>> +                                     VIRTIO_BLK_INLINE_SG_CNT);
>>                  switch (err) {
>>                  case -ENOSPC:
>>                          return BLK_STS_DEV_RESOURCE;
>> @@ -687,16 +705,6 @@ static const struct attribute_group *virtblk_attr_groups[] = {
>>          NULL,
>>   };
>>
>> -static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
>> -               unsigned int hctx_idx, unsigned int numa_node)
>> -{
>> -       struct virtio_blk *vblk = set->driver_data;
>> -       struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
>> -
>> -       sg_init_table(vbr->sg, vblk->sg_elems);
>> -       return 0;
>> -}
>> -
>>   static int virtblk_map_queues(struct blk_mq_tag_set *set)
>>   {
>>          struct virtio_blk *vblk = set->driver_data;
>> @@ -709,7 +717,6 @@ static const struct blk_mq_ops virtio_mq_ops = {
>>          .queue_rq       = virtio_queue_rq,
>>          .commit_rqs     = virtio_commit_rqs,
>>          .complete       = virtblk_request_done,
>> -       .init_request   = virtblk_init_request,
>>          .map_queues     = virtblk_map_queues,
>>   };
>>
>> @@ -805,7 +812,7 @@ static int virtblk_probe(struct virtio_device *vdev)
>>          vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
>>          vblk->tag_set.cmd_size =
>>                  sizeof(struct virtblk_req) +
>> -               sizeof(struct scatterlist) * sg_elems;
>> +               sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
>>          vblk->tag_set.driver_data = vblk;
>>          vblk->tag_set.nr_hw_queues = vblk->num_vqs;
>>
>> --
>> 2.18.1
>>

  reply	other threads:[~2021-09-01 10:19 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-30 23:35 [PATCH 1/1] virtio-blk: avoid preallocating big SGL for data Max Gurtovoy
2021-09-01  3:38 ` Feng Li
2021-09-01 10:19   ` Max Gurtovoy [this message]
2021-09-01  6:18 ` Christoph Hellwig
2021-09-27 11:53 ` Christoph Hellwig
2021-09-27 11:54   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=165359fc-8f97-ede3-8ab5-35329ca61dbd@nvidia.com \
    --to=mgurtovoy@nvidia.com \
    --cc=axboe@kernel.dk \
    --cc=hch@infradead.org \
    --cc=israelr@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=lifeng1519@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=nitzanc@nvidia.com \
    --cc=oren@nvidia.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).