All of lore.kernel.org
 help / color / mirror / Atom feed
From: Keith Busch <kbusch@fb.com>
To: <linux-fsdevel@vger.kernel.org>, <linux-block@vger.kernel.org>
Cc: <axboe@kernel.dk>, Kernel Team <Kernel-team@fb.com>, <hch@lst.de>,
	<bvanassche@acm.org>, <damien.lemoal@opensource.wdc.com>,
	<ebiggers@kernel.org>, Keith Busch <kbusch@kernel.org>
Subject: [PATCHv3 6/6] block: relax direct io memory alignment
Date: Mon, 23 May 2022 14:01:19 -0700	[thread overview]
Message-ID: <20220523210119.2500150-7-kbusch@fb.com> (raw)
In-Reply-To: <20220523210119.2500150-1-kbusch@fb.com>

From: Keith Busch <kbusch@kernel.org>

Use the address alignment requirements from the hardware for direct io
instead of requiring addresses be aligned to the block size. User space
can discover the alignment requirements from the dma_alignment queue
attribute.

User space can specify any hardware compatible DMA offset for each
segment, but every segment length is still required to be a multiple of
the block size.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
v2->v3:

  Removed iomap support for now

  Added alignment help function instead of duplicating it (Christoph)

  Added comment explaining ALIGN_DOWN

  Added check for iov alignment in _async case

 block/bio.c  | 13 +++++++++++++
 block/fops.c | 41 ++++++++++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 55d2a9c4e312..c8ea14ad87f6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1205,6 +1205,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
 	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
 	ssize_t size, left;
@@ -1219,7 +1220,19 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
+	/*
+	 * Each segment in the iov is required to be a block size multiple.
+	 * However, we may not be able to get the entire segment if it spans
+	 * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the
+	 * result to ensure the bio's total size is correct. The remainder of
+	 * the iov data will be picked up in the next bio iteration.
+	 *
+	 * If the result is ever 0, that indicates the iov fails the segment
+	 * size requirement and is an error.
+	 */
 	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (size > 0)
+		size = ALIGN_DOWN(size, queue_logical_block_size(q));
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
diff --git a/block/fops.c b/block/fops.c
index b9b83030e0df..218e4a8b92aa 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -42,6 +42,16 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
 	return op;
 }
 
+static int blkdev_dio_aligned(struct block_device *bdev, loff_t pos,
+			      struct iov_iter *iter)
+{
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
+		return -EINVAL;
+	return 0;
+}
+
 #define DIO_INLINE_BIO_VECS 4
 
 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
@@ -54,9 +64,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	struct bio bio;
 	ssize_t ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
+	ret = blkdev_dio_aligned(bdev, pos, iter);
+	if (ret)
+		return ret;
 
 	if (nr_pages <= DIO_INLINE_BIO_VECS)
 		vecs = inline_vecs;
@@ -80,6 +90,11 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	ret = bio_iov_iter_get_pages(&bio, iter);
 	if (unlikely(ret))
 		goto out;
+	/* check if iov is not aligned */
+	if (unlikely(iov_iter_count(iter))) {
+		ret = -EINVAL;
+		goto out;
+	}
 	ret = bio.bi_iter.bi_size;
 
 	if (iov_iter_rw(iter) == WRITE)
@@ -171,11 +186,11 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 	loff_t pos = iocb->ki_pos;
-	int ret = 0;
+	int ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
+	ret = blkdev_dio_aligned(bdev, pos, iter);
+	if (ret)
+		return ret;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
 		opf |= REQ_ALLOC_CACHE;
@@ -296,11 +311,11 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	loff_t pos = iocb->ki_pos;
-	int ret = 0;
+	int ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
+	ret = blkdev_dio_aligned(bdev, pos, iter);
+	if (ret)
+		return ret;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
 		opf |= REQ_ALLOC_CACHE;
@@ -323,6 +338,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 		bio_iov_bvec_set(bio, iter);
 	} else {
 		ret = bio_iov_iter_get_pages(bio, iter);
+
+		/* check if iov is not aligned */
+		if (unlikely(iov_iter_count(iter)))
+			ret = -EINVAL;
 		if (unlikely(ret)) {
 			bio_put(bio);
 			return ret;
-- 
2.30.2


  parent reply	other threads:[~2022-05-23 21:02 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-23 21:01 [PATCHv3 0/6] direct io dma alignment Keith Busch
2022-05-23 21:01 ` [PATCHv3 1/6] block/bio: remove duplicate append pages code Keith Busch
2022-05-24  6:00   ` Christoph Hellwig
2022-05-24  6:24   ` Johannes Thumshirn
2022-05-24 14:17   ` Pankaj Raghav
2022-05-24 15:38     ` Keith Busch
2022-05-25  7:49       ` Pankaj Raghav
2022-05-25  8:30         ` Damien Le Moal
2022-05-25 13:37         ` Keith Busch
2022-05-25 14:25           ` Pankaj Raghav
2022-05-23 21:01 ` [PATCHv3 2/6] block: export dma_alignment attribute Keith Busch
2022-05-24  6:02   ` Christoph Hellwig
2022-05-24  6:24   ` Johannes Thumshirn
2022-05-23 21:01 ` [PATCHv3 3/6] block: introduce bdev_dma_alignment helper Keith Busch
2022-05-24  6:02   ` Christoph Hellwig
2022-05-24  6:25   ` Johannes Thumshirn
2022-05-23 21:01 ` [PATCHv3 4/6] block/merge: count bytes instead of sectors Keith Busch
2022-05-24  6:07   ` Christoph Hellwig
2022-05-23 21:01 ` [PATCHv3 5/6] block/bounce: " Keith Busch
2022-05-24  6:09   ` Christoph Hellwig
2022-05-25 14:08     ` Keith Busch
2022-05-25 14:17       ` Keith Busch
2022-05-24 14:32   ` Pankaj Raghav
2022-05-23 21:01 ` Keith Busch [this message]
2022-05-24  6:12   ` [PATCHv3 6/6] block: relax direct io memory alignment Christoph Hellwig
2022-05-24 15:19   ` Pankaj Raghav

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220523210119.2500150-7-kbusch@fb.com \
    --to=kbusch@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=ebiggers@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.