All of lore.kernel.org
 help / color / mirror / Atom feed
From: Keith Busch <kbusch@fb.com>
To: <linux-fsdevel@vger.kernel.org>, <linux-block@vger.kernel.org>
Cc: <axboe@kernel.dk>, Kernel Team <Kernel-team@fb.com>, <hch@lst.de>,
	<bvanassche@acm.org>, <damien.lemoal@opensource.wdc.com>,
	Keith Busch <kbusch@kernel.org>
Subject: [PATCHv2 3/3] block: relax direct io memory alignment
Date: Wed, 18 May 2022 10:11:31 -0700	[thread overview]
Message-ID: <20220518171131.3525293-4-kbusch@fb.com> (raw)
In-Reply-To: <20220518171131.3525293-1-kbusch@fb.com>

From: Keith Busch <kbusch@kernel.org>

Use the address alignment requirements from the hardware for direct io
instead of requiring addresses be aligned to the block size. User space
can discover the alignment requirements from the dma_alignment queue
attribute.

User space can specify any hardware compatible DMA offset for each
segment, but every segment length is still required to be a multiple of
the block size.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
v1->v2:

  Squashed the alignment patch into this one

  Use ALIGN_DOWN macro instead of reimplementing it

  Check for unalignment in _simple case

 block/bio.c            |  3 +++
 block/fops.c           | 20 ++++++++++++++------
 fs/direct-io.c         | 11 +++++++----
 fs/iomap/direct-io.c   |  3 ++-
 include/linux/blkdev.h |  5 +++++
 5 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 320514a47527..bde9b475a4d8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1207,6 +1207,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
 	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
 	bool same_page = false;
@@ -1223,6 +1224,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
 	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (size > 0)
+		size = ALIGN_DOWN(size, queue_logical_block_size(q));
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
diff --git a/block/fops.c b/block/fops.c
index b9b83030e0df..d8537c29602f 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -54,8 +54,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	struct bio bio;
 	ssize_t ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
 		return -EINVAL;
 
 	if (nr_pages <= DIO_INLINE_BIO_VECS)
@@ -80,6 +81,11 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	ret = bio_iov_iter_get_pages(&bio, iter);
 	if (unlikely(ret))
 		goto out;
+	if (unlikely(iov_iter_count(iter))) {
+		/* iov is not aligned for a single bio */
+		ret = -EINVAL;
+		goto out;
+	}
 	ret = bio.bi_iter.bi_size;
 
 	if (iov_iter_rw(iter) == WRITE)
@@ -173,8 +179,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
 		return -EINVAL;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
@@ -298,8 +305,9 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
 		return -EINVAL;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 840752006f60..64cc176be60c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1131,7 +1131,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	struct dio_submit sdio = { 0, };
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
-	unsigned long align = offset | iov_iter_alignment(iter);
+	unsigned long align = iov_iter_alignment(iter);
 
 	/*
 	 * Avoid references to bdev if not absolutely needed to give
@@ -1165,11 +1165,14 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		goto fail_dio;
 	}
 
-	if (align & blocksize_mask) {
-		if (bdev)
+	if ((offset | align) & blocksize_mask) {
+		if (bdev) {
 			blkbits = blksize_bits(bdev_logical_block_size(bdev));
+			if (align & bdev_dma_alignment(bdev))
+				goto fail_dio;
+		}
 		blocksize_mask = (1 << blkbits) - 1;
-		if (align & blocksize_mask)
+		if ((offset | count) & blocksize_mask)
 			goto fail_dio;
 	}
 
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 80f9b047aa1b..0256d28baa8e 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -244,7 +244,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	size_t copied = 0;
 	size_t orig_count;
 
-	if ((pos | length | align) & ((1 << blkbits) - 1))
+	if ((pos | length) & ((1 << blkbits) - 1) ||
+	    align & bdev_dma_alignment(iomap->bdev))
 		return -EINVAL;
 
 	if (iomap->type == IOMAP_UNWRITTEN) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5bdf2ac9142c..834b981ef01b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1365,6 +1365,11 @@ static inline int queue_dma_alignment(const struct request_queue *q)
 	return q ? q->dma_alignment : 511;
 }
 
+static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
+{
+	return queue_dma_alignment(bdev_get_queue(bdev));
+}
+
 static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 				 unsigned int len)
 {
-- 
2.30.2


  parent reply	other threads:[~2022-05-18 17:11 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-18 17:11 [PATCHv2 0/3] direct io alignment relax Keith Busch
2022-05-18 17:11 ` [PATCHv2 1/3] block/bio: remove duplicate append pages code Keith Busch
2022-05-18 20:21   ` Chaitanya Kulkarni
2022-05-19  4:28   ` Bart Van Assche
2022-05-19  7:32   ` Christoph Hellwig
2022-05-19 14:19     ` Keith Busch
2022-05-18 17:11 ` [PATCHv2 2/3] block: export dma_alignment attribute Keith Busch
2022-05-18 20:22   ` Chaitanya Kulkarni
2022-05-19  4:30   ` Bart Van Assche
2022-05-19  7:33   ` Christoph Hellwig
2022-05-18 17:11 ` Keith Busch [this message]
2022-05-19  0:14   ` [PATCHv2 3/3] block: relax direct io memory alignment Eric Biggers
2022-05-19  1:00     ` Keith Busch
2022-05-19  1:53       ` Eric Biggers
2022-05-19  1:59         ` Keith Busch
2022-05-19  2:08           ` Eric Biggers
2022-05-19  2:25             ` Keith Busch
2022-05-19  3:27               ` Eric Biggers
2022-05-19  4:40                 ` Bart Van Assche
2022-05-19  4:56                 ` Keith Busch
2022-05-19  6:45                   ` Damien Le Moal
2022-05-19 17:19                     ` Eric Biggers
2022-05-20  3:41                       ` Damien Le Moal
2022-05-19  7:41                   ` Christoph Hellwig
2022-05-19 16:35                     ` Keith Busch
2022-05-20  6:07                       ` Christoph Hellwig
2022-05-19 17:01                   ` Keith Busch
2022-05-19 17:27                     ` Eric Biggers
2022-05-19 17:43                       ` Keith Busch
2022-05-19  7:39       ` Christoph Hellwig
2022-05-19 22:31         ` Keith Busch
2022-05-19  7:38   ` Christoph Hellwig
2022-05-19 14:08     ` Keith Busch
2022-05-20  6:10       ` Christoph Hellwig
2022-05-18 22:45 ` [PATCHv2 0/3] direct io alignment relax Jens Axboe
2022-05-19  7:42   ` Christoph Hellwig
2022-05-19 12:46     ` Jens Axboe
2022-05-18 23:26 ` Eric Biggers
2022-05-19  0:51   ` Keith Busch
2022-05-19  1:02     ` Chaitanya Kulkarni
2022-05-19  2:02       ` Eric Biggers
2022-05-19  7:43         ` hch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220518171131.3525293-4-kbusch@fb.com \
    --to=kbusch@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.