All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] block: export dma_alignment attribute
@ 2022-05-13 16:13 Keith Busch
  2022-05-13 16:13 ` [PATCH 2/3] block: relax direct io memory alignment Keith Busch
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Keith Busch @ 2022-05-13 16:13 UTC (permalink / raw)
  To: linux-block; +Cc: axboe, Kernel Team, Keith Busch

From: Keith Busch <kbusch@kernel.org>

User space may want to know how to align their buffers to avoid
bouncing. Export the queue attribute.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/blk-sysfs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 88bd41d4cb59..14607565d781 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -274,6 +274,11 @@ static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page
 	return queue_var_show(q->limits.virt_boundary_mask, page);
 }
 
+static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_dma_alignment(q), page);
+}
+
 #define QUEUE_SYSFS_BIT_FNS(name, flag, neg)				\
 static ssize_t								\
 queue_##name##_show(struct request_queue *q, char *page)		\
@@ -606,6 +611,7 @@ QUEUE_RO_ENTRY(queue_dax, "dax");
 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
 QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
+QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
@@ -667,6 +673,7 @@ static struct attribute *queue_attrs[] = {
 	&blk_throtl_sample_time_entry.attr,
 #endif
 	&queue_virt_boundary_mask_entry.attr,
+	&queue_dma_alignment_entry.attr,
 	NULL,
 };
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] block: relax direct io memory alignment
  2022-05-13 16:13 [PATCH 1/3] block: export dma_alignment attribute Keith Busch
@ 2022-05-13 16:13 ` Keith Busch
  2022-05-13 16:13 ` [PATCH 3/3] block: ensure direct io is a block size Keith Busch
  2022-05-16  6:52 ` [PATCH 1/3] block: export dma_alignment attribute Christoph Hellwig
  2 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2022-05-13 16:13 UTC (permalink / raw)
  To: linux-block; +Cc: axboe, Kernel Team, Keith Busch

From: Keith Busch <kbusch@kernel.org>

Use the address alignment requirements from the hardware for direct io
instead of requiring addresses be aligned to the block size. User space
can discover the alignment requirements from the dma_alignment queue
attribute.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/fops.c           | 15 +++++++++------
 fs/direct-io.c         | 11 +++++++----
 fs/iomap/direct-io.c   |  3 ++-
 include/linux/blkdev.h |  5 +++++
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index 9f2ecec406b0..a6583bce1e7d 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -62,8 +62,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	struct bio bio;
 	ssize_t ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
 		return -EINVAL;
 
 	if (nr_pages <= DIO_INLINE_BIO_VECS)
@@ -193,8 +194,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
 		return -EINVAL;
 
 	bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
@@ -316,8 +318,9 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
 		return -EINVAL;
 
 	bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index aef06e607b40..b3d249d7d91d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1132,7 +1132,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	struct dio_submit sdio = { 0, };
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
-	unsigned long align = offset | iov_iter_alignment(iter);
+	unsigned long align = iov_iter_alignment(iter);
 
 	/*
 	 * Avoid references to bdev if not absolutely needed to give
@@ -1166,11 +1166,14 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		goto fail_dio;
 	}
 
-	if (align & blocksize_mask) {
-		if (bdev)
+	if ((offset | align) & blocksize_mask) {
+		if (bdev) {
 			blkbits = blksize_bits(bdev_logical_block_size(bdev));
+			if (align & bdev_dma_alignment(bdev))
+				goto fail_dio;
+		}
 		blocksize_mask = (1 << blkbits) - 1;
-		if (align & blocksize_mask)
+		if ((offset | count) & blocksize_mask)
 			goto fail_dio;
 	}
 
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b08f5dc31780..c73b050b7026 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -243,7 +243,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	size_t copied = 0;
 	size_t orig_count;
 
-	if ((pos | length | align) & ((1 << blkbits) - 1))
+	if ((pos | length) & ((1 << blkbits) - 1) ||
+	    align & bdev_dma_alignment(iomap->bdev))
 		return -EINVAL;
 
 	if (iomap->type == IOMAP_UNWRITTEN) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 60d016138997..dba6d411fc1e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1378,6 +1378,11 @@ static inline int queue_dma_alignment(const struct request_queue *q)
 	return q ? q->dma_alignment : 511;
 }
 
+static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
+{
+	return queue_dma_alignment(bdev_get_queue(bdev));
+}
+
 static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 				 unsigned int len)
 {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] block: ensure direct io is a block size
  2022-05-13 16:13 [PATCH 1/3] block: export dma_alignment attribute Keith Busch
  2022-05-13 16:13 ` [PATCH 2/3] block: relax direct io memory alignment Keith Busch
@ 2022-05-13 16:13 ` Keith Busch
  2022-05-14  2:26   ` Bart Van Assche
  2022-05-16 10:13   ` Damien Le Moal
  2022-05-16  6:52 ` [PATCH 1/3] block: export dma_alignment attribute Christoph Hellwig
  2 siblings, 2 replies; 10+ messages in thread
From: Keith Busch @ 2022-05-13 16:13 UTC (permalink / raw)
  To: linux-block; +Cc: axboe, Kernel Team, Keith Busch

From: Keith Busch <kbusch@kernel.org>

If the iterator has an offset, filling a bio to the max bvecs may result
in a size that isn't aligned to the block size. Mask off bytes for the
bio being constructed.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/bio.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index 4259125e16ab..b42a9e3ff068 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1144,6 +1144,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
 	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
 	bool same_page = false;
@@ -1160,6 +1161,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
 	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (size > 0)
+		size = size & ~(queue_logical_block_size(q) - 1);
+
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] block: ensure direct io is a block size
  2022-05-13 16:13 ` [PATCH 3/3] block: ensure direct io is a block size Keith Busch
@ 2022-05-14  2:26   ` Bart Van Assche
  2022-05-16 10:13   ` Damien Le Moal
  1 sibling, 0 replies; 10+ messages in thread
From: Bart Van Assche @ 2022-05-14  2:26 UTC (permalink / raw)
  To: Keith Busch, linux-block; +Cc: axboe, Kernel Team, Keith Busch

On 5/13/22 09:13, Keith Busch wrote:
>   	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
> +	if (size > 0)
> +		size = size & ~(queue_logical_block_size(q) - 1);

The above code won't do what it should do if sizeof(size) > 
sizeof(queue_logical_block_size(q)). Please use the ALIGN() macro instead.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] block: export dma_alignment attribute
  2022-05-13 16:13 [PATCH 1/3] block: export dma_alignment attribute Keith Busch
  2022-05-13 16:13 ` [PATCH 2/3] block: relax direct io memory alignment Keith Busch
  2022-05-13 16:13 ` [PATCH 3/3] block: ensure direct io is a block size Keith Busch
@ 2022-05-16  6:52 ` Christoph Hellwig
  2022-05-16 14:32   ` Keith Busch
  2 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2022-05-16  6:52 UTC (permalink / raw)
  To: Keith Busch; +Cc: linux-block, axboe, Kernel Team, Keith Busch

I'm not against the sysfs file, but for applications this is a
horrible interface.  We really need something that works on the fd.

The XFS_IOC_DIOINFO ioctl from xfs is one, although ugly, option.
The other is to export the alignment requirements through statx.
We had that whole discussion with the inline crypto direct I/O support,
and we really need to tackle it rather sooner than later.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] block: ensure direct io is a block size
  2022-05-13 16:13 ` [PATCH 3/3] block: ensure direct io is a block size Keith Busch
  2022-05-14  2:26   ` Bart Van Assche
@ 2022-05-16 10:13   ` Damien Le Moal
  2022-05-16 14:01     ` Keith Busch
  1 sibling, 1 reply; 10+ messages in thread
From: Damien Le Moal @ 2022-05-16 10:13 UTC (permalink / raw)
  To: Keith Busch, linux-block; +Cc: axboe, Kernel Team, Keith Busch

On 2022/05/13 18:13, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> If the iterator has an offset, filling a bio to the max bvecs may result
> in a size that isn't aligned to the block size. Mask off bytes for the
> bio being constructed.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>  block/bio.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 4259125e16ab..b42a9e3ff068 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1144,6 +1144,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
>  {
>  	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
>  	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
> +	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
>  	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
>  	struct page **pages = (struct page **)bv;
>  	bool same_page = false;
> @@ -1160,6 +1161,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
>  	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
>  
>  	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
> +	if (size > 0)
> +		size = size & ~(queue_logical_block_size(q) - 1);

I think that __bio_iov_append_get_pages() needs the same change. And given that
both __bio_iov_append_get_pages() and __bio_iov_iter_get_pages() start with
iov_iter_get_pages(), should we do that and check the size in the single caller:
bio_iov_iter_get_pages() ?

> +
>  	if (unlikely(size <= 0))
>  		return size ? size : -EFAULT;
>  


-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] block: ensure direct io is a block size
  2022-05-16 10:13   ` Damien Le Moal
@ 2022-05-16 14:01     ` Keith Busch
  0 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2022-05-16 14:01 UTC (permalink / raw)
  To: Damien Le Moal; +Cc: Keith Busch, linux-block, axboe, Kernel Team

On Mon, May 16, 2022 at 12:13:10PM +0200, Damien Le Moal wrote:
> On 2022/05/13 18:13, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > If the iterator has an offset, filling a bio to the max bvecs may result
> > in a size that isn't aligned to the block size. Mask off bytes for the
> > bio being constructed.
> > 
> > Signed-off-by: Keith Busch <kbusch@kernel.org>
> > ---
> >  block/bio.c | 4 ++++
> >  1 file changed, 4 insertions(+)
> > 
> > diff --git a/block/bio.c b/block/bio.c
> > index 4259125e16ab..b42a9e3ff068 100644
> > --- a/block/bio.c
> > +++ b/block/bio.c
> > @@ -1144,6 +1144,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
> >  {
> >  	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
> >  	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
> > +	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
> >  	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
> >  	struct page **pages = (struct page **)bv;
> >  	bool same_page = false;
> > @@ -1160,6 +1161,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
> >  	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
> >  
> >  	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
> > +	if (size > 0)
> > +		size = size & ~(queue_logical_block_size(q) - 1);
> 
> I think that __bio_iov_append_get_pages() needs the same change. And given that
> both __bio_iov_append_get_pages() and __bio_iov_iter_get_pages() start with
> iov_iter_get_pages(), should we do that and check the size in the single caller:
> bio_iov_iter_get_pages() ?

Right, __bio_iov_append_get_pages needs this too. I'll see if we can remove much
of the duplicated code among these two functions as a prep patch.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] block: export dma_alignment attribute
  2022-05-16  6:52 ` [PATCH 1/3] block: export dma_alignment attribute Christoph Hellwig
@ 2022-05-16 14:32   ` Keith Busch
  2022-05-17 22:55     ` Keith Busch
  0 siblings, 1 reply; 10+ messages in thread
From: Keith Busch @ 2022-05-16 14:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, linux-block, axboe, Kernel Team

On Sun, May 15, 2022 at 11:52:46PM -0700, Christoph Hellwig wrote:
> I'm not against the sysfs file, but for applications this is a
> horrible interface.  We really need something that works on the fd.

Yeah, sysfs is not very convenient for the intended use.
 
> The XFS_IOC_DIOINFO ioctl from xfs is one, although ugly, option.
> The other is to export the alignment requirements through statx.
> We had that whole discussion with the inline crypto direct I/O support,
> and we really need to tackle it rather sooner than later.

I'll try out assigning one of the spares in 'struct statx' for the purpose. I
like the interface for that, and looks easy enough to pass along the dma
alignment requirement through it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] block: export dma_alignment attribute
  2022-05-16 14:32   ` Keith Busch
@ 2022-05-17 22:55     ` Keith Busch
  2022-05-18  7:23       ` Christoph Hellwig
  0 siblings, 1 reply; 10+ messages in thread
From: Keith Busch @ 2022-05-17 22:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, linux-block, axboe, Kernel Team

On Mon, May 16, 2022 at 08:32:22AM -0600, Keith Busch wrote:
>  
> > The XFS_IOC_DIOINFO ioctl from xfs is one, although ugly, option.
> > The other is to export the alignment requirements through statx.
> > We had that whole discussion with the inline crypto direct I/O support,
> > and we really need to tackle it rather sooner than later.
> 
> I'll try out assigning one of the spares in 'struct statx' for the purpose. I
> like the interface for that, and looks easy enough to pass along the dma
> alignment requirement through it.

A little less easy than I thought... This is what I'm coming up with, and it's
bad enough that I'm sure there has to be a better way. Specifically concerning
is the new "do_dma()" function below, and was the only way I managed to get the
correct 'struct block_device' whether we're stat'ing a filesystem file or raw
block device file.

---
diff --git a/fs/stat.c b/fs/stat.c
index 5c2c94464e8b..72c0b36599c2 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -17,6 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 #include <linux/compat.h>
+#include <linux/blkdev.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -594,6 +595,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
 	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
 	tmp.stx_mode = stat->mode;
+	tmp.stx_dma = stat->dma;
 	tmp.stx_ino = stat->ino;
 	tmp.stx_size = stat->size;
 	tmp.stx_blocks = stat->blocks;
@@ -615,6 +617,35 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
+static void do_dma(int dfd, struct filename *filename, struct kstat *stat)
+{
+	struct open_flags op = {};
+	struct block_device *bdev;
+	struct file *f;
+
+	stat->dma = 511;
+
+	if (S_ISBLK(stat->mode)) {
+		bdev = blkdev_get_no_open(stat->rdev);
+
+		if (bdev) {
+			stat->dma = bdev_dma_alignment(bdev);
+			blkdev_put_no_open(bdev);
+		}
+		return;
+	}
+
+	f = do_filp_open(dfd, filename, &op);
+	if (IS_ERR(f))
+		return;
+
+	bdev = f->f_inode->i_sb->s_bdev;
+	if (bdev)
+		stat->dma = bdev_dma_alignment(bdev);
+	fput(f);
+}
+
 int do_statx(int dfd, struct filename *filename, unsigned int flags,
 	     unsigned int mask, struct statx __user *buffer)
 {
@@ -630,6 +661,7 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
 	if (error)
 		return error;
 
+	do_dma(dfd, filename, &stat);
 	return cp_statx(&stat, buffer);
 }
 
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 7df06931f25d..0a12c7498aa0 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -50,6 +50,7 @@ struct kstat {
 	struct timespec64 btime;			/* File creation time */
 	u64		blocks;
 	u64		mnt_id;
+	u16		dma;
 };
 
 #endif
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 1500a0f58041..380820c29c35 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -106,7 +106,7 @@ struct statx {
 	__u32	stx_uid;	/* User ID of owner */
 	__u32	stx_gid;	/* Group ID of owner */
 	__u16	stx_mode;	/* File mode */
-	__u16	__spare0[1];
+	__u16	stx_dma;	/* DMA alignment */
 	/* 0x20 */
 	__u64	stx_ino;	/* Inode number */
 	__u64	stx_size;	/* File size */
--

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] block: export dma_alignment attribute
  2022-05-17 22:55     ` Keith Busch
@ 2022-05-18  7:23       ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2022-05-18  7:23 UTC (permalink / raw)
  To: Keith Busch
  Cc: Christoph Hellwig, Keith Busch, linux-block, axboe, Kernel Team

On Tue, May 17, 2022 at 04:55:20PM -0600, Keith Busch wrote:
> A little less easy than I thought... This is what I'm coming up with, and it's
> bad enough that I'm sure there has to be a better way. Specifically concerning
> is the new "do_dma()" function below, and was the only way I managed to get the
> correct 'struct block_device' whether we're stat'ing a filesystem file or raw
> block device file.

I don't think doing this in common code makes much sense.  The core
VFS code should not have to know if something is on a block device or
not.

Instead add a new getattr method to block/bdev.c for the block devices,
and just have a helper to set the alinments(s) based on that by it,
and any file systems that is made ready to accept lower alignment.
And I'd prefer to do them individully and tested as there might be all
kinds of assumptions.  For all other instances keep the value as 0
for unknown.

> --- a/include/uapi/linux/stat.h
> +++ b/include/uapi/linux/stat.h
> @@ -106,7 +106,7 @@ struct statx {
>  	__u32	stx_uid;	/* User ID of owner */
>  	__u32	stx_gid;	/* Group ID of owner */
>  	__u16	stx_mode;	/* File mode */
> -	__u16	__spare0[1];
> +	__u16	stx_dma;	/* DMA alignment */

I'd name this dio_mem_alignment, because it is is:

 a) specific to direct I/O
 b) DMA is just the implementation detail, but not the user semantics

while we're at it, please also add a dio_file_alignment for the
alignment in the file, which can be sector or fs block size.  I'm
perfectly fine if you only do it for the block layer first, I'll
take up the wok to update the most common file systems after that.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-05-18  7:23 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-13 16:13 [PATCH 1/3] block: export dma_alignment attribute Keith Busch
2022-05-13 16:13 ` [PATCH 2/3] block: relax direct io memory alignment Keith Busch
2022-05-13 16:13 ` [PATCH 3/3] block: ensure direct io is a block size Keith Busch
2022-05-14  2:26   ` Bart Van Assche
2022-05-16 10:13   ` Damien Le Moal
2022-05-16 14:01     ` Keith Busch
2022-05-16  6:52 ` [PATCH 1/3] block: export dma_alignment attribute Christoph Hellwig
2022-05-16 14:32   ` Keith Busch
2022-05-17 22:55     ` Keith Busch
2022-05-18  7:23       ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.