All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] ext4: SEEK_HOLE / SEEK_DATA via iomap
@ 2017-08-29 14:29 Andreas Gruenbacher
  2017-08-29 14:29 ` [PATCH 1/4] iomap: Switch from blkno to disk offset Andreas Gruenbacher
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Andreas Gruenbacher @ 2017-08-29 14:29 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig, Andreas Gruenbacher

Add IOMAP_REPORT support to ext4, including support for inline data
which iomap couldn't report so far.  Switch to iomap_seek_{hole,data} on
ext4.

This patch set seems to be working correctly on ext4 as well as xfs
under xfstests.  Jan Kara has pointed out an issue that I can't
reproduce though, so please review carefully.

Thanks,
Andreas

Andreas Gruenbacher (3):
  iomap: Switch from blkno to disk offset
  iomap: Add IOMAP_F_DATA_INLINE flag
  ext4: Add iomap support for inline data

Christoph Hellwig (1):
  ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA

 fs/buffer.c           |   4 +-
 fs/dax.c              |   2 +-
 fs/ext2/inode.c       |   4 +-
 fs/ext4/Kconfig       |   1 +
 fs/ext4/ext4.h        |   7 +-
 fs/ext4/file.c        | 271 +++-----------------------------------------------
 fs/ext4/inline.c      |  33 ++++++
 fs/ext4/inode.c       | 114 +++++++--------------
 fs/iomap.c            |  13 +--
 fs/nfsd/blocklayout.c |   4 +-
 fs/xfs/xfs_iomap.c    |   6 +-
 include/linux/iomap.h |  15 +--
 12 files changed, 115 insertions(+), 359 deletions(-)

-- 
2.13.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/4] iomap: Switch from blkno to disk offset
  2017-08-29 14:29 [PATCH 0/4] ext4: SEEK_HOLE / SEEK_DATA via iomap Andreas Gruenbacher
@ 2017-08-29 14:29 ` Andreas Gruenbacher
  2017-08-29 16:11   ` Darrick J. Wong
  2017-08-30 15:00   ` Jan Kara
  2017-08-29 14:29 ` [PATCH 2/4] iomap: Add IOMAP_F_DATA_INLINE flag Andreas Gruenbacher
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 14+ messages in thread
From: Andreas Gruenbacher @ 2017-08-29 14:29 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig, Andreas Gruenbacher

Replace iomap->blkno, the sector number, with iomap->addr, the disk
offset in bytes.  For invalid disk offsets, use the special value
IOMAP_NULL_ADDR instead of IOMAP_NULL_BLOCK.  This allows to use iomap
for mappings which are not block aligned, such as inline data on ext4
and stuffed inodes on gfs2.

Tested on xfs and ext4 with xfstests; seems to be working correctly.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/buffer.c           |  4 ++--
 fs/dax.c              |  2 +-
 fs/ext2/inode.c       |  4 ++--
 fs/ext4/inode.c       |  4 ++--
 fs/iomap.c            | 11 +++++------
 fs/nfsd/blocklayout.c |  4 ++--
 fs/xfs/xfs_iomap.c    |  6 +++---
 include/linux/iomap.h | 10 +++++-----
 8 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 5715dac7821f..b9d20076bb8f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1979,8 +1979,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
 	case IOMAP_MAPPED:
 		if (offset >= i_size_read(inode))
 			set_buffer_new(bh);
-		bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
-				((offset - iomap->offset) >> inode->i_blkbits);
+		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
+				inode->i_blkbits;
 		set_buffer_mapped(bh);
 		break;
 	}
diff --git a/fs/dax.c b/fs/dax.c
index 865d42c63e23..539aaa0b9c45 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -987,7 +987,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
 
 static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 {
-	return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
+	return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
 }
 
 static loff_t
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 30163d007b2f..e7d8925d0140 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -824,11 +824,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 
 	if (ret == 0) {
 		iomap->type = IOMAP_HOLE;
-		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->addr = IOMAP_NULL_ADDR;
 		iomap->length = 1 << blkbits;
 	} else {
 		iomap->type = IOMAP_MAPPED;
-		iomap->blkno = (sector_t)bno << (blkbits - 9);
+		iomap->addr = (u64)bno << blkbits;
 		iomap->length = (u64)ret << blkbits;
 		iomap->flags |= IOMAP_F_MERGED;
 	}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c774bdc22759..429f0afde9f2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3483,7 +3483,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 
 	if (ret == 0) {
 		iomap->type = IOMAP_HOLE;
-		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->addr = IOMAP_NULL_ADDR;
 		iomap->length = (u64)map.m_len << blkbits;
 	} else {
 		if (map.m_flags & EXT4_MAP_MAPPED) {
@@ -3494,7 +3494,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 			WARN_ON_ONCE(1);
 			return -EIO;
 		}
-		iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
+		iomap->addr = (u64)map.m_pblk << blkbits;
 		iomap->length = (u64)map.m_len << blkbits;
 	}
 
diff --git a/fs/iomap.c b/fs/iomap.c
index 59cc98ad7577..7bc1797c2292 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -350,8 +350,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
 		struct iomap *iomap)
 {
-	sector_t sector = iomap->blkno +
-		(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
+	sector_t sector = (iomap->addr +
+			   (pos & PAGE_MASK) - iomap->offset) >> 9;
 
 	return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
 			offset, bytes);
@@ -512,9 +512,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
 		flags |= FIEMAP_EXTENT_SHARED;
 
 	return fiemap_fill_next_extent(fi, iomap->offset,
-			iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
+			iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
 			iomap->length, flags);
-
 }
 
 static loff_t
@@ -807,7 +806,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
 	bio = bio_alloc(GFP_KERNEL, 1);
 	bio->bi_bdev = iomap->bdev;
 	bio->bi_iter.bi_sector =
-		iomap->blkno + ((pos - iomap->offset) >> 9);
+		(iomap->addr + pos - iomap->offset) >> 9;
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
@@ -886,7 +885,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 		bio = bio_alloc(GFP_KERNEL, nr_pages);
 		bio->bi_bdev = iomap->bdev;
 		bio->bi_iter.bi_sector =
-			iomap->blkno + ((pos - iomap->offset) >> 9);
+			(iomap->addr + pos - iomap->offset) >> 9;
 		bio->bi_write_hint = dio->iocb->ki_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = iomap_dio_bio_end_io;
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index c862c2489df0..2d1d37b27dc7 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -65,7 +65,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
 			bex->es = PNFS_BLOCK_READ_DATA;
 		else
 			bex->es = PNFS_BLOCK_READWRITE_DATA;
-		bex->soff = (iomap.blkno << 9);
+		bex->soff = iomap.addr;
 		break;
 	case IOMAP_UNWRITTEN:
 		if (seg->iomode & IOMODE_RW) {
@@ -78,7 +78,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
 			}
 
 			bex->es = PNFS_BLOCK_INVALID_DATA;
-			bex->soff = (iomap.blkno << 9);
+			bex->soff = iomap.addr;
 			break;
 		}
 		/*FALLTHRU*/
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 813394c62849..1bcc5d0373a5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -54,13 +54,13 @@ xfs_bmbt_to_iomap(
 	struct xfs_mount	*mp = ip->i_mount;
 
 	if (imap->br_startblock == HOLESTARTBLOCK) {
-		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->addr = IOMAP_NULL_ADDR;
 		iomap->type = IOMAP_HOLE;
 	} else if (imap->br_startblock == DELAYSTARTBLOCK) {
-		iomap->blkno = IOMAP_NULL_BLOCK;
+		iomap->addr = IOMAP_NULL_ADDR;
 		iomap->type = IOMAP_DELALLOC;
 	} else {
-		iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
+		iomap->addr = (u64)xfs_fsb_to_db(ip, imap->br_startblock) << 9;
 		if (imap->br_state == XFS_EXT_UNWRITTEN)
 			iomap->type = IOMAP_UNWRITTEN;
 		else
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f64dc6ce5161..7b8a615fa021 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -15,8 +15,8 @@ struct vm_fault;
  */
 #define IOMAP_HOLE	0x01	/* no blocks allocated, need allocation */
 #define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
-#define IOMAP_MAPPED	0x03	/* blocks allocated @blkno */
-#define IOMAP_UNWRITTEN	0x04	/* blocks allocated @blkno in unwritten state */
+#define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
+#define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
 
 /*
  * Flags for all iomap mappings:
@@ -30,12 +30,12 @@ struct vm_fault;
 #define IOMAP_F_SHARED	0x20	/* block shared with another file */
 
 /*
- * Magic value for blkno:
+ * Magic value for addr:
  */
-#define IOMAP_NULL_BLOCK -1LL	/* blkno is not valid */
+#define IOMAP_NULL_ADDR -1ULL	/* addr is not valid */
 
 struct iomap {
-	sector_t		blkno;	/* 1st sector of mapping, 512b units */
+	u64			addr; /* disk offset of mapping, bytes */
 	loff_t			offset;	/* file offset of mapping, bytes */
 	u64			length;	/* length of mapping, bytes */
 	u16			type;	/* type of mapping */
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/4] iomap: Add IOMAP_F_DATA_INLINE flag
  2017-08-29 14:29 [PATCH 0/4] ext4: SEEK_HOLE / SEEK_DATA via iomap Andreas Gruenbacher
  2017-08-29 14:29 ` [PATCH 1/4] iomap: Switch from blkno to disk offset Andreas Gruenbacher
@ 2017-08-29 14:29 ` Andreas Gruenbacher
  2017-08-30 15:03   ` Jan Kara
  2017-08-29 14:29 ` [PATCH 3/4] ext4: Add iomap support for inline data Andreas Gruenbacher
  2017-08-29 14:29 ` [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA Andreas Gruenbacher
  3 siblings, 1 reply; 14+ messages in thread
From: Andreas Gruenbacher @ 2017-08-29 14:29 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig, Andreas Gruenbacher

Add a new IOMAP_F_DATA_INLINE flag to indicate that a mapping is in a
disk area that contains data as well as metadata.  In iomap_fiemap, map
this flag to FIEMAP_EXTENT_DATA_INLINE.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/iomap.c            | 2 ++
 include/linux/iomap.h | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 7bc1797c2292..f0a263482a0e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -510,6 +510,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
 		flags |= FIEMAP_EXTENT_MERGED;
 	if (iomap->flags & IOMAP_F_SHARED)
 		flags |= FIEMAP_EXTENT_SHARED;
+	if (iomap->flags & IOMAP_F_DATA_INLINE)
+		flags |= FIEMAP_EXTENT_DATA_INLINE;
 
 	return fiemap_fill_next_extent(fi, iomap->offset,
 			iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 7b8a615fa021..2b0790dbd6ea 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -26,8 +26,9 @@ struct vm_fault;
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
  */
-#define IOMAP_F_MERGED	0x10	/* contains multiple blocks/extents */
-#define IOMAP_F_SHARED	0x20	/* block shared with another file */
+#define IOMAP_F_MERGED		0x10	/* contains multiple blocks/extents */
+#define IOMAP_F_SHARED		0x20	/* block shared with another file */
+#define IOMAP_F_DATA_INLINE	0x40	/* data inline in the inode */
 
 /*
  * Magic value for addr:
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/4] ext4: Add iomap support for inline data
  2017-08-29 14:29 [PATCH 0/4] ext4: SEEK_HOLE / SEEK_DATA via iomap Andreas Gruenbacher
  2017-08-29 14:29 ` [PATCH 1/4] iomap: Switch from blkno to disk offset Andreas Gruenbacher
  2017-08-29 14:29 ` [PATCH 2/4] iomap: Add IOMAP_F_DATA_INLINE flag Andreas Gruenbacher
@ 2017-08-29 14:29 ` Andreas Gruenbacher
  2017-08-30 12:47   ` Jan Kara
  2017-08-30 12:54   ` Jan Kara
  2017-08-29 14:29 ` [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA Andreas Gruenbacher
  3 siblings, 2 replies; 14+ messages in thread
From: Andreas Gruenbacher @ 2017-08-29 14:29 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig, Andreas Gruenbacher

Report inline data as an IOMAP_F_DATA_INLINE mapping.  This allows to
switch to iomap_seek_hole and iomap_seek_data in ext4_llseek, and will
make switching to iomap_fiemap in ext4_fiemap easier as well.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/ext4/ext4.h   |  4 ++++
 fs/ext4/inline.c | 33 +++++++++++++++++++++++++++++++++
 fs/ext4/inode.c  | 10 ++++++++--
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a2bb7d2870e4..017e55942a49 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3048,6 +3048,10 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
 extern int ext4_inline_data_fiemap(struct inode *inode,
 				   struct fiemap_extent_info *fieinfo,
 				   int *has_inline, __u64 start, __u64 len);
+
+struct iomap;
+extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
+
 extern int ext4_try_to_evict_inline_data(handle_t *handle,
 					 struct inode *inode,
 					 int needed);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 28c5c3abddb3..21a078fef80f 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -12,6 +12,7 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/iomap.h>
 #include <linux/fiemap.h>
 
 #include "ext4_jbd2.h"
@@ -1827,6 +1828,38 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
 	return ret;
 }
 
+int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap)
+{
+	__u64 addr;
+	int error = -ENOENT;
+	struct ext4_iloc iloc;
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode))
+		goto out;
+
+	error = ext4_get_inode_loc(inode, &iloc);
+	if (error)
+		goto out;
+
+	addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
+	addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
+	addr += offsetof(struct ext4_inode, i_block);
+
+	brelse(iloc.bh);
+
+	iomap->addr = addr;
+	iomap->offset = 0;
+	iomap->length = min_t(loff_t, ext4_get_inline_size(inode),
+			      i_size_read(inode));
+	iomap->type = 0;
+	iomap->flags = IOMAP_F_DATA_INLINE;
+
+out:
+	up_read(&EXT4_I(inode)->xattr_sem);
+	return error;
+}
+
 int ext4_inline_data_fiemap(struct inode *inode,
 			    struct fiemap_extent_info *fieinfo,
 			    int *has_inline, __u64 start, __u64 len)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 429f0afde9f2..ab6ab835255e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3411,8 +3411,14 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	struct ext4_map_blocks map;
 	int ret;
 
-	if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
-		return -ERANGE;
+	if ((flags & IOMAP_REPORT) && ext4_has_inline_data(inode)) {
+		ret = ext4_inline_data_iomap(inode, iomap);
+		if (ret != -ENOENT) {
+			if (ret == 0 && offset >= iomap->length)
+				ret = -ENOENT;
+			return ret;
+		}
+	}
 
 	map.m_lblk = first_block;
 	map.m_len = last_block - first_block + 1;
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
  2017-08-29 14:29 [PATCH 0/4] ext4: SEEK_HOLE / SEEK_DATA via iomap Andreas Gruenbacher
                   ` (2 preceding siblings ...)
  2017-08-29 14:29 ` [PATCH 3/4] ext4: Add iomap support for inline data Andreas Gruenbacher
@ 2017-08-29 14:29 ` Andreas Gruenbacher
  2017-08-30 14:59   ` Jan Kara
  3 siblings, 1 reply; 14+ messages in thread
From: Andreas Gruenbacher @ 2017-08-29 14:29 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig, Andreas Gruenbacher

From: Christoph Hellwig <hch@lst.de>

Switch to the iomap_seek_hole and iomap_seek_data helpers for
implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the code that
isn't needed any more.

Note that with this patch, ext4 will now always depend on the iomap code
instead of only when CONFIG_DAX is enabled, and it requires adding a
call into the extent status tree for iomap_begin as well to properly
deal with delalloc extents.

Signed-off-by: Christoph Hellwig <hch@lst.de>
[Minor fixes and cleanups by Andreas]
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/ext4/Kconfig |   1 +
 fs/ext4/ext4.h  |   3 -
 fs/ext4/file.c  | 271 +++-----------------------------------------------------
 fs/ext4/inode.c | 100 ++++++---------------
 4 files changed, 43 insertions(+), 332 deletions(-)

diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e38039fd96ff..73b850f5659c 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -37,6 +37,7 @@ config EXT4_FS
 	select CRC16
 	select CRYPTO
 	select CRYPTO_CRC32C
+	select FS_IOMAP
 	help
 	  This is the next generation of the ext3 filesystem.
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 017e55942a49..f295fce7a9c8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2515,9 +2515,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
 					int used, int quota_claim);
 extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
 			      ext4_fsblk_t pblk, ext4_lblk_t len);
-extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
-				unsigned int map_len,
-				struct extent_status *result);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 0d7cf0cc9b87..bc7a901c8b3f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -20,6 +20,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
+#include <linux/iomap.h>
 #include <linux/mount.h>
 #include <linux/path.h>
 #include <linux/dax.h>
@@ -455,256 +456,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 }
 
 /*
- * Here we use ext4_map_blocks() to get a block mapping for a extent-based
- * file rather than ext4_ext_walk_space() because we can introduce
- * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
- * function.  When extent status tree has been fully implemented, it will
- * track all extent status for a file and we can directly use it to
- * retrieve the offset for SEEK_DATA/SEEK_HOLE.
- */
-
-/*
- * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
- * lookup page cache to check whether or not there has some data between
- * [startoff, endoff] because, if this range contains an unwritten extent,
- * we determine this extent as a data or a hole according to whether the
- * page cache has data or not.
- */
-static int ext4_find_unwritten_pgoff(struct inode *inode,
-				     int whence,
-				     ext4_lblk_t end_blk,
-				     loff_t *offset)
-{
-	struct pagevec pvec;
-	unsigned int blkbits;
-	pgoff_t index;
-	pgoff_t end;
-	loff_t endoff;
-	loff_t startoff;
-	loff_t lastoff;
-	int found = 0;
-
-	blkbits = inode->i_sb->s_blocksize_bits;
-	startoff = *offset;
-	lastoff = startoff;
-	endoff = (loff_t)end_blk << blkbits;
-
-	index = startoff >> PAGE_SHIFT;
-	end = (endoff - 1) >> PAGE_SHIFT;
-
-	pagevec_init(&pvec, 0);
-	do {
-		int i, num;
-		unsigned long nr_pages;
-
-		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
-		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
-					  (pgoff_t)num);
-		if (nr_pages == 0)
-			break;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-			struct buffer_head *bh, *head;
-
-			/*
-			 * If current offset is smaller than the page offset,
-			 * there is a hole at this offset.
-			 */
-			if (whence == SEEK_HOLE && lastoff < endoff &&
-			    lastoff < page_offset(pvec.pages[i])) {
-				found = 1;
-				*offset = lastoff;
-				goto out;
-			}
-
-			if (page->index > end)
-				goto out;
-
-			lock_page(page);
-
-			if (unlikely(page->mapping != inode->i_mapping)) {
-				unlock_page(page);
-				continue;
-			}
-
-			if (!page_has_buffers(page)) {
-				unlock_page(page);
-				continue;
-			}
-
-			if (page_has_buffers(page)) {
-				lastoff = page_offset(page);
-				bh = head = page_buffers(page);
-				do {
-					if (lastoff + bh->b_size <= startoff)
-						goto next;
-					if (buffer_uptodate(bh) ||
-					    buffer_unwritten(bh)) {
-						if (whence == SEEK_DATA)
-							found = 1;
-					} else {
-						if (whence == SEEK_HOLE)
-							found = 1;
-					}
-					if (found) {
-						*offset = max_t(loff_t,
-							startoff, lastoff);
-						unlock_page(page);
-						goto out;
-					}
-next:
-					lastoff += bh->b_size;
-					bh = bh->b_this_page;
-				} while (bh != head);
-			}
-
-			lastoff = page_offset(page) + PAGE_SIZE;
-			unlock_page(page);
-		}
-
-		/* The no. of pages is less than our desired, we are done. */
-		if (nr_pages < num)
-			break;
-
-		index = pvec.pages[i - 1]->index + 1;
-		pagevec_release(&pvec);
-	} while (index <= end);
-
-	if (whence == SEEK_HOLE && lastoff < endoff) {
-		found = 1;
-		*offset = lastoff;
-	}
-out:
-	pagevec_release(&pvec);
-	return found;
-}
-
-/*
- * ext4_seek_data() retrieves the offset for SEEK_DATA.
- */
-static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
-{
-	struct inode *inode = file->f_mapping->host;
-	struct extent_status es;
-	ext4_lblk_t start, last, end;
-	loff_t dataoff, isize;
-	int blkbits;
-	int ret;
-
-	inode_lock(inode);
-
-	isize = i_size_read(inode);
-	if (offset >= isize) {
-		inode_unlock(inode);
-		return -ENXIO;
-	}
-
-	blkbits = inode->i_sb->s_blocksize_bits;
-	start = offset >> blkbits;
-	last = start;
-	end = isize >> blkbits;
-	dataoff = offset;
-
-	do {
-		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
-		if (ret <= 0) {
-			/* No extent found -> no data */
-			if (ret == 0)
-				ret = -ENXIO;
-			inode_unlock(inode);
-			return ret;
-		}
-
-		last = es.es_lblk;
-		if (last != start)
-			dataoff = (loff_t)last << blkbits;
-		if (!ext4_es_is_unwritten(&es))
-			break;
-
-		/*
-		 * If there is a unwritten extent at this offset,
-		 * it will be as a data or a hole according to page
-		 * cache that has data or not.
-		 */
-		if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
-					      es.es_lblk + es.es_len, &dataoff))
-			break;
-		last += es.es_len;
-		dataoff = (loff_t)last << blkbits;
-		cond_resched();
-	} while (last <= end);
-
-	inode_unlock(inode);
-
-	if (dataoff > isize)
-		return -ENXIO;
-
-	return vfs_setpos(file, dataoff, maxsize);
-}
-
-/*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
- */
-static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
-{
-	struct inode *inode = file->f_mapping->host;
-	struct extent_status es;
-	ext4_lblk_t start, last, end;
-	loff_t holeoff, isize;
-	int blkbits;
-	int ret;
-
-	inode_lock(inode);
-
-	isize = i_size_read(inode);
-	if (offset >= isize) {
-		inode_unlock(inode);
-		return -ENXIO;
-	}
-
-	blkbits = inode->i_sb->s_blocksize_bits;
-	start = offset >> blkbits;
-	last = start;
-	end = isize >> blkbits;
-	holeoff = offset;
-
-	do {
-		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
-		if (ret < 0) {
-			inode_unlock(inode);
-			return ret;
-		}
-		/* Found a hole? */
-		if (ret == 0 || es.es_lblk > last) {
-			if (last != start)
-				holeoff = (loff_t)last << blkbits;
-			break;
-		}
-		/*
-		 * If there is a unwritten extent at this offset,
-		 * it will be as a data or a hole according to page
-		 * cache that has data or not.
-		 */
-		if (ext4_es_is_unwritten(&es) &&
-		    ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
-					      last + es.es_len, &holeoff))
-			break;
-
-		last += es.es_len;
-		holeoff = (loff_t)last << blkbits;
-		cond_resched();
-	} while (last <= end);
-
-	inode_unlock(inode);
-
-	if (holeoff > isize)
-		holeoff = isize;
-
-	return vfs_setpos(file, holeoff, maxsize);
-}
-
-/*
  * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
  * by calling generic_file_llseek_size() with the appropriate maxbytes
  * value for each.
@@ -720,18 +471,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 		maxbytes = inode->i_sb->s_maxbytes;
 
 	switch (whence) {
-	case SEEK_SET:
-	case SEEK_CUR:
-	case SEEK_END:
+	default:
 		return generic_file_llseek_size(file, offset, whence,
 						maxbytes, i_size_read(inode));
-	case SEEK_DATA:
-		return ext4_seek_data(file, offset, maxbytes);
 	case SEEK_HOLE:
-		return ext4_seek_hole(file, offset, maxbytes);
+		inode_lock_shared(inode);
+		offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
+	case SEEK_DATA:
+		inode_lock_shared(inode);
+		offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
 	}
 
-	return -EINVAL;
+	if (offset < 0)
+		return offset;
+	return vfs_setpos(file, offset, maxbytes);
 }
 
 const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab6ab835255e..f5895ea6ac70 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3400,7 +3400,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 		return try_to_free_buffers(page);
 }
 
-#ifdef CONFIG_FS_DAX
 static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 			    unsigned flags, struct iomap *iomap)
 {
@@ -3409,6 +3408,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	unsigned long first_block = offset >> blkbits;
 	unsigned long last_block = (offset + length - 1) >> blkbits;
 	struct ext4_map_blocks map;
+	bool delalloc = false;
 	int ret;
 
 	if ((flags & IOMAP_REPORT) && ext4_has_inline_data(inode)) {
@@ -3425,6 +3425,29 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 
 	if (!(flags & IOMAP_WRITE)) {
 		ret = ext4_map_blocks(NULL, inode, &map, 0);
+		if (ret < 0)
+			return ret;
+		if (!ret) {
+			struct extent_status es = {};
+
+			ext4_es_find_delayed_extent_range(inode, map.m_lblk,
+					map.m_lblk + map.m_len - 1, &es);
+			/* Is delalloc data before next block in extent tree? */
+			if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
+				ext4_lblk_t offs = 0;
+
+				if (es.es_lblk < map.m_lblk)
+					offs = map.m_lblk - es.es_lblk;
+				map.m_lblk = es.es_lblk + offs;
+				map.m_pblk = ext4_es_pblock(&es) + offs;
+				map.m_len = es.es_len - offs;
+				if (ext4_es_is_unwritten(&es))
+					map.m_flags |= EXT4_MAP_UNWRITTEN;
+				if (ext4_es_is_delayed(&es))
+					delalloc = true;
+				ret = 1;
+			}
+		}
 	} else {
 		int dio_credits;
 		handle_t *handle;
@@ -3486,11 +3509,14 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	else
 		iomap->dax_dev = NULL;
 	iomap->offset = first_block << blkbits;
+	iomap->length = (u64)map.m_len << blkbits;
 
 	if (ret == 0) {
 		iomap->type = IOMAP_HOLE;
 		iomap->addr = IOMAP_NULL_ADDR;
-		iomap->length = (u64)map.m_len << blkbits;
+	} else if (delalloc) {
+		iomap->type = IOMAP_DELALLOC;
+		iomap->addr = IOMAP_NULL_ADDR;
 	} else {
 		if (map.m_flags & EXT4_MAP_MAPPED) {
 			iomap->type = IOMAP_MAPPED;
@@ -3501,7 +3527,6 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 			return -EIO;
 		}
 		iomap->addr = (u64)map.m_pblk << blkbits;
-		iomap->length = (u64)map.m_len << blkbits;
 	}
 
 	if (map.m_flags & EXT4_MAP_NEW)
@@ -3567,8 +3592,6 @@ const struct iomap_ops ext4_iomap_ops = {
 	.iomap_end		= ext4_iomap_end,
 };
 
-#endif
-
 static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private)
 {
@@ -6132,70 +6155,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)
 
 	return err;
 }
-
-/*
- * Find the first extent at or after @lblk in an inode that is not a hole.
- * Search for @map_len blocks at most. The extent is returned in @result.
- *
- * The function returns 1 if we found an extent. The function returns 0 in
- * case there is no extent at or after @lblk and in that case also sets
- * @result->es_len to 0. In case of error, the error code is returned.
- */
-int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
-			 unsigned int map_len, struct extent_status *result)
-{
-	struct ext4_map_blocks map;
-	struct extent_status es = {};
-	int ret;
-
-	map.m_lblk = lblk;
-	map.m_len = map_len;
-
-	/*
-	 * For non-extent based files this loop may iterate several times since
-	 * we do not determine full hole size.
-	 */
-	while (map.m_len > 0) {
-		ret = ext4_map_blocks(NULL, inode, &map, 0);
-		if (ret < 0)
-			return ret;
-		/* There's extent covering m_lblk? Just return it. */
-		if (ret > 0) {
-			int status;
-
-			ext4_es_store_pblock(result, map.m_pblk);
-			result->es_lblk = map.m_lblk;
-			result->es_len = map.m_len;
-			if (map.m_flags & EXT4_MAP_UNWRITTEN)
-				status = EXTENT_STATUS_UNWRITTEN;
-			else
-				status = EXTENT_STATUS_WRITTEN;
-			ext4_es_store_status(result, status);
-			return 1;
-		}
-		ext4_es_find_delayed_extent_range(inode, map.m_lblk,
-						  map.m_lblk + map.m_len - 1,
-						  &es);
-		/* Is delalloc data before next block in extent tree? */
-		if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
-			ext4_lblk_t offset = 0;
-
-			if (es.es_lblk < lblk)
-				offset = lblk - es.es_lblk;
-			result->es_lblk = es.es_lblk + offset;
-			ext4_es_store_pblock(result,
-					     ext4_es_pblock(&es) + offset);
-			result->es_len = es.es_len - offset;
-			ext4_es_store_status(result, ext4_es_status(&es));
-
-			return 1;
-		}
-		/* There's a hole at m_lblk, advance us after it */
-		map.m_lblk += map.m_len;
-		map_len -= map.m_len;
-		map.m_len = map_len;
-		cond_resched();
-	}
-	result->es_len = 0;
-	return 0;
-}
-- 
2.13.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] iomap: Switch from blkno to disk offset
  2017-08-29 14:29 ` [PATCH 1/4] iomap: Switch from blkno to disk offset Andreas Gruenbacher
@ 2017-08-29 16:11   ` Darrick J. Wong
  2017-08-31 21:32     ` Darrick J. Wong
  2017-08-30 15:00   ` Jan Kara
  1 sibling, 1 reply; 14+ messages in thread
From: Darrick J. Wong @ 2017-08-29 16:11 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-fsdevel, linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig

On Tue, Aug 29, 2017 at 04:29:39PM +0200, Andreas Gruenbacher wrote:
> Replace iomap->blkno, the sector number, with iomap->addr, the disk
> offset in bytes.  For invalid disk offsets, use the special value
> IOMAP_NULL_ADDR instead of IOMAP_NULL_BLOCK.  This allows to use iomap
> for mappings which are not block aligned, such as inline data on ext4
> and stuffed inodes on gfs2.
> 
> Tested on xfs and ext4 with xfstests; seems to be working correctly.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> ---
>  fs/buffer.c           |  4 ++--
>  fs/dax.c              |  2 +-
>  fs/ext2/inode.c       |  4 ++--
>  fs/ext4/inode.c       |  4 ++--
>  fs/iomap.c            | 11 +++++------
>  fs/nfsd/blocklayout.c |  4 ++--
>  fs/xfs/xfs_iomap.c    |  6 +++---
>  include/linux/iomap.h | 10 +++++-----
>  8 files changed, 22 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 5715dac7821f..b9d20076bb8f 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1979,8 +1979,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
>  	case IOMAP_MAPPED:
>  		if (offset >= i_size_read(inode))
>  			set_buffer_new(bh);
> -		bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
> -				((offset - iomap->offset) >> inode->i_blkbits);
> +		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
> +				inode->i_blkbits;
>  		set_buffer_mapped(bh);
>  		break;
>  	}
> diff --git a/fs/dax.c b/fs/dax.c
> index 865d42c63e23..539aaa0b9c45 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -987,7 +987,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
>  
>  static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
>  {
> -	return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
> +	return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
>  }
>  
>  static loff_t
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index 30163d007b2f..e7d8925d0140 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -824,11 +824,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  
>  	if (ret == 0) {
>  		iomap->type = IOMAP_HOLE;
> -		iomap->blkno = IOMAP_NULL_BLOCK;
> +		iomap->addr = IOMAP_NULL_ADDR;
>  		iomap->length = 1 << blkbits;
>  	} else {
>  		iomap->type = IOMAP_MAPPED;
> -		iomap->blkno = (sector_t)bno << (blkbits - 9);
> +		iomap->addr = (u64)bno << blkbits;
>  		iomap->length = (u64)ret << blkbits;
>  		iomap->flags |= IOMAP_F_MERGED;
>  	}
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index c774bdc22759..429f0afde9f2 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3483,7 +3483,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  
>  	if (ret == 0) {
>  		iomap->type = IOMAP_HOLE;
> -		iomap->blkno = IOMAP_NULL_BLOCK;
> +		iomap->addr = IOMAP_NULL_ADDR;
>  		iomap->length = (u64)map.m_len << blkbits;
>  	} else {
>  		if (map.m_flags & EXT4_MAP_MAPPED) {
> @@ -3494,7 +3494,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  			WARN_ON_ONCE(1);
>  			return -EIO;
>  		}
> -		iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
> +		iomap->addr = (u64)map.m_pblk << blkbits;
>  		iomap->length = (u64)map.m_len << blkbits;
>  	}
>  
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 59cc98ad7577..7bc1797c2292 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -350,8 +350,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
>  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
>  		struct iomap *iomap)
>  {
> -	sector_t sector = iomap->blkno +
> -		(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
> +	sector_t sector = (iomap->addr +
> +			   (pos & PAGE_MASK) - iomap->offset) >> 9;
>  
>  	return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
>  			offset, bytes);
> @@ -512,9 +512,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
>  		flags |= FIEMAP_EXTENT_SHARED;
>  
>  	return fiemap_fill_next_extent(fi, iomap->offset,
> -			iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
> +			iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
>  			iomap->length, flags);
> -
>  }
>  
>  static loff_t
> @@ -807,7 +806,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
>  	bio = bio_alloc(GFP_KERNEL, 1);
>  	bio->bi_bdev = iomap->bdev;
>  	bio->bi_iter.bi_sector =
> -		iomap->blkno + ((pos - iomap->offset) >> 9);
> +		(iomap->addr + pos - iomap->offset) >> 9;
>  	bio->bi_private = dio;
>  	bio->bi_end_io = iomap_dio_bio_end_io;
>  
> @@ -886,7 +885,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
>  		bio = bio_alloc(GFP_KERNEL, nr_pages);
>  		bio->bi_bdev = iomap->bdev;
>  		bio->bi_iter.bi_sector =
> -			iomap->blkno + ((pos - iomap->offset) >> 9);
> +			(iomap->addr + pos - iomap->offset) >> 9;
>  		bio->bi_write_hint = dio->iocb->ki_hint;
>  		bio->bi_private = dio;
>  		bio->bi_end_io = iomap_dio_bio_end_io;
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index c862c2489df0..2d1d37b27dc7 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -65,7 +65,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  			bex->es = PNFS_BLOCK_READ_DATA;
>  		else
>  			bex->es = PNFS_BLOCK_READWRITE_DATA;
> -		bex->soff = (iomap.blkno << 9);
> +		bex->soff = iomap.addr;
>  		break;
>  	case IOMAP_UNWRITTEN:
>  		if (seg->iomode & IOMODE_RW) {
> @@ -78,7 +78,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
>  			}
>  
>  			bex->es = PNFS_BLOCK_INVALID_DATA;
> -			bex->soff = (iomap.blkno << 9);
> +			bex->soff = iomap.addr;
>  			break;
>  		}
>  		/*FALLTHRU*/
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 813394c62849..1bcc5d0373a5 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -54,13 +54,13 @@ xfs_bmbt_to_iomap(
>  	struct xfs_mount	*mp = ip->i_mount;
>  
>  	if (imap->br_startblock == HOLESTARTBLOCK) {
> -		iomap->blkno = IOMAP_NULL_BLOCK;
> +		iomap->addr = IOMAP_NULL_ADDR;
>  		iomap->type = IOMAP_HOLE;
>  	} else if (imap->br_startblock == DELAYSTARTBLOCK) {
> -		iomap->blkno = IOMAP_NULL_BLOCK;
> +		iomap->addr = IOMAP_NULL_ADDR;
>  		iomap->type = IOMAP_DELALLOC;
>  	} else {
> -		iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
> +		iomap->addr = (u64)xfs_fsb_to_db(ip, imap->br_startblock) << 9;

iomap->addr = BBTOB(xfs_fsb_to_db(...));

Same machine-level transformation, but please help us maintain
consistent unit conversion code; the 'BBTOB' name makes the units
analysis of a given piece of code more straightforward.

With that fixed,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

for the iomap and XFS parts.

--D

>  		if (imap->br_state == XFS_EXT_UNWRITTEN)
>  			iomap->type = IOMAP_UNWRITTEN;
>  		else
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index f64dc6ce5161..7b8a615fa021 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -15,8 +15,8 @@ struct vm_fault;
>   */
>  #define IOMAP_HOLE	0x01	/* no blocks allocated, need allocation */
>  #define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
> -#define IOMAP_MAPPED	0x03	/* blocks allocated @blkno */
> -#define IOMAP_UNWRITTEN	0x04	/* blocks allocated @blkno in unwritten state */
> +#define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
> +#define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
>  
>  /*
>   * Flags for all iomap mappings:
> @@ -30,12 +30,12 @@ struct vm_fault;
>  #define IOMAP_F_SHARED	0x20	/* block shared with another file */
>  
>  /*
> - * Magic value for blkno:
> + * Magic value for addr:
>   */
> -#define IOMAP_NULL_BLOCK -1LL	/* blkno is not valid */
> +#define IOMAP_NULL_ADDR -1ULL	/* addr is not valid */
>  
>  struct iomap {
> -	sector_t		blkno;	/* 1st sector of mapping, 512b units */
> +	u64			addr; /* disk offset of mapping, bytes */
>  	loff_t			offset;	/* file offset of mapping, bytes */
>  	u64			length;	/* length of mapping, bytes */
>  	u16			type;	/* type of mapping */
> -- 
> 2.13.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] ext4: Add iomap support for inline data
  2017-08-29 14:29 ` [PATCH 3/4] ext4: Add iomap support for inline data Andreas Gruenbacher
@ 2017-08-30 12:47   ` Jan Kara
  2017-08-30 12:54   ` Jan Kara
  1 sibling, 0 replies; 14+ messages in thread
From: Jan Kara @ 2017-08-30 12:47 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-fsdevel, linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig

On Tue 29-08-17 16:29:41, Andreas Gruenbacher wrote:
> Report inline data as an IOMAP_F_DATA_INLINE mapping.  This allows to
> switch to iomap_seek_hole and iomap_seek_data in ext4_llseek, and will
> make switching to iomap_fiemap in ext4_fiemap easier as well.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

Just one nit and one comment below. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>


> ---
>  fs/ext4/ext4.h   |  4 ++++
>  fs/ext4/inline.c | 33 +++++++++++++++++++++++++++++++++
>  fs/ext4/inode.c  | 10 ++++++++--
>  3 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index a2bb7d2870e4..017e55942a49 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -3048,6 +3048,10 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
>  extern int ext4_inline_data_fiemap(struct inode *inode,
>  				   struct fiemap_extent_info *fieinfo,
>  				   int *has_inline, __u64 start, __u64 len);
> +
> +struct iomap;
> +extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
> +
>  extern int ext4_try_to_evict_inline_data(handle_t *handle,
>  					 struct inode *inode,
>  					 int needed);
> diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
> index 28c5c3abddb3..21a078fef80f 100644
> --- a/fs/ext4/inline.c
> +++ b/fs/ext4/inline.c
> @@ -12,6 +12,7 @@
>   * GNU General Public License for more details.
>   */
>  
> +#include <linux/iomap.h>
>  #include <linux/fiemap.h>
>  
>  #include "ext4_jbd2.h"
> @@ -1827,6 +1828,38 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
>  	return ret;
>  }
>  
> +int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap)
> +{
> +	__u64 addr;
> +	int error = -ENOENT;
> +	struct ext4_iloc iloc;
> +
> +	down_read(&EXT4_I(inode)->xattr_sem);
> +	if (!ext4_has_inline_data(inode))
> +		goto out;

Hum, ENOENT looks rather arbitrary for a lost race with inode conversion.
Maybe EAGAIN would be better? We use it in some other place as well to
indicate that inode has been converted...

> +
> +	error = ext4_get_inode_loc(inode, &iloc);
> +	if (error)
> +		goto out;

Using ext4_get_inode_loc() just to get block + offset of the inode is a bit
overkill since it will also allocate buffer_head, load the block with inode
from disk, etc. But since this is not performance critical and the buffer
is likely to be in cache anyway, I can live with this I guess.

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] ext4: Add iomap support for inline data
  2017-08-29 14:29 ` [PATCH 3/4] ext4: Add iomap support for inline data Andreas Gruenbacher
  2017-08-30 12:47   ` Jan Kara
@ 2017-08-30 12:54   ` Jan Kara
  1 sibling, 0 replies; 14+ messages in thread
From: Jan Kara @ 2017-08-30 12:54 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-fsdevel, linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig

On Tue 29-08-17 16:29:41, Andreas Gruenbacher wrote:
> Report inline data as an IOMAP_F_DATA_INLINE mapping.  This allows to
> switch to iomap_seek_hole and iomap_seek_data in ext4_llseek, and will
> make switching to iomap_fiemap in ext4_fiemap easier as well.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

...

> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 429f0afde9f2..ab6ab835255e 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3411,8 +3411,14 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  	struct ext4_map_blocks map;
>  	int ret;
>  
> -	if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
> -		return -ERANGE;
> +	if ((flags & IOMAP_REPORT) && ext4_has_inline_data(inode)) {
> +		ret = ext4_inline_data_iomap(inode, iomap);
> +		if (ret != -ENOENT) {
> +			if (ret == 0 && offset >= iomap->length)
> +				ret = -ENOENT;
> +			return ret;
> +		}
> +	}

One more thing: Please keep that WARN_ON and bail out for !IOMAP_REPORT
cases. Thanks!

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
  2017-08-29 14:29 ` [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA Andreas Gruenbacher
@ 2017-08-30 14:59   ` Jan Kara
  2017-09-14  9:17     ` Andreas Gruenbacher
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Kara @ 2017-08-30 14:59 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-fsdevel, linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig

[-- Attachment #1: Type: text/plain, Size: 3217 bytes --]

On Tue 29-08-17 16:29:42, Andreas Gruenbacher wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> Switch to the iomap_seek_hole and iomap_seek_data helpers for
> implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the code that
> isn't needed any more.
> 
> Note that with this patch, ext4 will now always depend on the iomap code
> instead of only when CONFIG_DAX is enabled, and it requires adding a
> call into the extent status tree for iomap_begin as well to properly
> deal with delalloc extents.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> [Minor fixes and cleanups by Andreas]
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

...

> @@ -3425,6 +3425,29 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  
>  	if (!(flags & IOMAP_WRITE)) {
>  		ret = ext4_map_blocks(NULL, inode, &map, 0);
> +		if (ret < 0)
> +			return ret;

One general question about IOMAP_REPORT: When there is unwritten extent, we
use page_cache_seek_hole_data() to determine what is actually a hole and
what is data. We could use exactly the same function to determine what is a
hole and what is data in an IOMAP_HOLE extent, couldn't we? Now I
understand that a filesystem is supposed to return IOMAP_DELALLOC extent if
there is delayed allocation data covering queried offset so probably it's
not a great idea to use that however it still seems a bit like an
unnecessary duplication...

> +		if (!ret) {

Please go to this branch only for IOMAP_REPORT case to avoid unnecessary
overhead for DAX mappings...

> +			struct extent_status es = {};
> +
> +			ext4_es_find_delayed_extent_range(inode, map.m_lblk,
> +					map.m_lblk + map.m_len - 1, &es);
> +			/* Is delalloc data before next block in extent tree? */
> +			if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {

And this is still wrong - I have actually verified that with attached patch
that disables caching of extents (so it is equivalent to *very* aggresive
slab reclaim happening). With that patch applied you'll get:

kvm0:~ # rm /mnt/file; xfs_io -f -c "pwrite 4096 4096" -c "seek -a 0"
/mnt/file
wrote 4096/4096 bytes at offset 4096
4 KiB, 1 ops; 0.0000 sec (16.693 MiB/sec and 4273.5043 ops/sec)
Whence	Result
DATA	0
HOLE	8192

Which is obviously wrong - hole should be 0, data should be 4096.

And the reason why this is wrong is that when we are asked to map things at
first_block and there is a hole at that offset, we need to just truncate
the hole extent returned by ext4_map_blocks() by possibly following
delalloc allocation. But we still need to return that there *is a hole* at
first_block. But your patch seems to try to return the delalloc extent
instead which is wrong.

> +				ext4_lblk_t offs = 0;
> +
> +				if (es.es_lblk < map.m_lblk)
> +					offs = map.m_lblk - es.es_lblk;
> +				map.m_lblk = es.es_lblk + offs;
> +				map.m_pblk = ext4_es_pblock(&es) + offs;
> +				map.m_len = es.es_len - offs;
> +				if (ext4_es_is_unwritten(&es))
> +					map.m_flags |= EXT4_MAP_UNWRITTEN;
> +				if (ext4_es_is_delayed(&es))
> +					delalloc = true;
> +				ret = 1;
> +			}
> +		}
>  	} else {
>  		int dio_credits;
>  		handle_t *handle;

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

[-- Attachment #2: extent_status_disable.diff --]
[-- Type: text/x-patch, Size: 467 bytes --]

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e7f12a204cbc..32b55cdf9b82 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -698,7 +698,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 	es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
 		 lblk, len, pblk, status, inode->i_ino);
 
-	if (!len)
+	if (!len || !(status & EXTENT_STATUS_DELAYED))
 		return 0;
 
 	BUG_ON(end < lblk);

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] iomap: Switch from blkno to disk offset
  2017-08-29 14:29 ` [PATCH 1/4] iomap: Switch from blkno to disk offset Andreas Gruenbacher
  2017-08-29 16:11   ` Darrick J. Wong
@ 2017-08-30 15:00   ` Jan Kara
  1 sibling, 0 replies; 14+ messages in thread
From: Jan Kara @ 2017-08-30 15:00 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-fsdevel, linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig

On Tue 29-08-17 16:29:39, Andreas Gruenbacher wrote:
> Replace iomap->blkno, the sector number, with iomap->addr, the disk
> offset in bytes.  For invalid disk offsets, use the special value
> IOMAP_NULL_ADDR instead of IOMAP_NULL_BLOCK.  This allows to use iomap
> for mappings which are not block aligned, such as inline data on ext4
> and stuffed inodes on gfs2.
> 
> Tested on xfs and ext4 with xfstests; seems to be working correctly.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

The patch looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] iomap: Add IOMAP_F_DATA_INLINE flag
  2017-08-29 14:29 ` [PATCH 2/4] iomap: Add IOMAP_F_DATA_INLINE flag Andreas Gruenbacher
@ 2017-08-30 15:03   ` Jan Kara
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Kara @ 2017-08-30 15:03 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-fsdevel, linux-ext4, linux-xfs, Jan Kara, Christoph Hellwig

On Tue 29-08-17 16:29:40, Andreas Gruenbacher wrote:
> Add a new IOMAP_F_DATA_INLINE flag to indicate that a mapping is in a
> disk area that contains data as well as metadata.  In iomap_fiemap, map
> this flag to FIEMAP_EXTENT_DATA_INLINE.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

Looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/iomap.c            | 2 ++
>  include/linux/iomap.h | 5 +++--
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 7bc1797c2292..f0a263482a0e 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -510,6 +510,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
>  		flags |= FIEMAP_EXTENT_MERGED;
>  	if (iomap->flags & IOMAP_F_SHARED)
>  		flags |= FIEMAP_EXTENT_SHARED;
> +	if (iomap->flags & IOMAP_F_DATA_INLINE)
> +		flags |= FIEMAP_EXTENT_DATA_INLINE;
>  
>  	return fiemap_fill_next_extent(fi, iomap->offset,
>  			iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 7b8a615fa021..2b0790dbd6ea 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -26,8 +26,9 @@ struct vm_fault;
>  /*
>   * Flags that only need to be reported for IOMAP_REPORT requests:
>   */
> -#define IOMAP_F_MERGED	0x10	/* contains multiple blocks/extents */
> -#define IOMAP_F_SHARED	0x20	/* block shared with another file */
> +#define IOMAP_F_MERGED		0x10	/* contains multiple blocks/extents */
> +#define IOMAP_F_SHARED		0x20	/* block shared with another file */
> +#define IOMAP_F_DATA_INLINE	0x40	/* data inline in the inode */
>  
>  /*
>   * Magic value for addr:
> -- 
> 2.13.3
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] iomap: Switch from blkno to disk offset
  2017-08-29 16:11   ` Darrick J. Wong
@ 2017-08-31 21:32     ` Darrick J. Wong
  2017-08-31 21:37       ` Andreas Grünbacher
  0 siblings, 1 reply; 14+ messages in thread
From: Darrick J. Wong @ 2017-08-31 21:32 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-fsdevel, linux-ext4, linux-xfs, Jan Kara,
	Christoph Hellwig, Theodore Ts'o

On Tue, Aug 29, 2017 at 09:11:15AM -0700, Darrick J. Wong wrote:
> On Tue, Aug 29, 2017 at 04:29:39PM +0200, Andreas Gruenbacher wrote:
> > Replace iomap->blkno, the sector number, with iomap->addr, the disk
> > offset in bytes.  For invalid disk offsets, use the special value
> > IOMAP_NULL_ADDR instead of IOMAP_NULL_BLOCK.  This allows to use iomap
> > for mappings which are not block aligned, such as inline data on ext4
> > and stuffed inodes on gfs2.
> > 
> > Tested on xfs and ext4 with xfstests; seems to be working correctly.
> > 
> > Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> > ---
> >  fs/buffer.c           |  4 ++--
> >  fs/dax.c              |  2 +-
> >  fs/ext2/inode.c       |  4 ++--
> >  fs/ext4/inode.c       |  4 ++--
> >  fs/iomap.c            | 11 +++++------
> >  fs/nfsd/blocklayout.c |  4 ++--
> >  fs/xfs/xfs_iomap.c    |  6 +++---
> >  include/linux/iomap.h | 10 +++++-----
> >  8 files changed, 22 insertions(+), 23 deletions(-)
> > 
> > diff --git a/fs/buffer.c b/fs/buffer.c
> > index 5715dac7821f..b9d20076bb8f 100644
> > --- a/fs/buffer.c
> > +++ b/fs/buffer.c
> > @@ -1979,8 +1979,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
> >  	case IOMAP_MAPPED:
> >  		if (offset >= i_size_read(inode))
> >  			set_buffer_new(bh);
> > -		bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
> > -				((offset - iomap->offset) >> inode->i_blkbits);
> > +		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
> > +				inode->i_blkbits;
> >  		set_buffer_mapped(bh);
> >  		break;
> >  	}
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 865d42c63e23..539aaa0b9c45 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -987,7 +987,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
> >  
> >  static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
> >  {
> > -	return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
> > +	return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
> >  }
> >  
> >  static loff_t
> > diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> > index 30163d007b2f..e7d8925d0140 100644
> > --- a/fs/ext2/inode.c
> > +++ b/fs/ext2/inode.c
> > @@ -824,11 +824,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> >  
> >  	if (ret == 0) {
> >  		iomap->type = IOMAP_HOLE;
> > -		iomap->blkno = IOMAP_NULL_BLOCK;
> > +		iomap->addr = IOMAP_NULL_ADDR;
> >  		iomap->length = 1 << blkbits;
> >  	} else {
> >  		iomap->type = IOMAP_MAPPED;
> > -		iomap->blkno = (sector_t)bno << (blkbits - 9);
> > +		iomap->addr = (u64)bno << blkbits;
> >  		iomap->length = (u64)ret << blkbits;
> >  		iomap->flags |= IOMAP_F_MERGED;
> >  	}
> > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> > index c774bdc22759..429f0afde9f2 100644
> > --- a/fs/ext4/inode.c
> > +++ b/fs/ext4/inode.c
> > @@ -3483,7 +3483,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> >  
> >  	if (ret == 0) {
> >  		iomap->type = IOMAP_HOLE;
> > -		iomap->blkno = IOMAP_NULL_BLOCK;
> > +		iomap->addr = IOMAP_NULL_ADDR;
> >  		iomap->length = (u64)map.m_len << blkbits;
> >  	} else {
> >  		if (map.m_flags & EXT4_MAP_MAPPED) {
> > @@ -3494,7 +3494,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> >  			WARN_ON_ONCE(1);
> >  			return -EIO;
> >  		}
> > -		iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
> > +		iomap->addr = (u64)map.m_pblk << blkbits;
> >  		iomap->length = (u64)map.m_len << blkbits;
> >  	}
> >  
> > diff --git a/fs/iomap.c b/fs/iomap.c
> > index 59cc98ad7577..7bc1797c2292 100644
> > --- a/fs/iomap.c
> > +++ b/fs/iomap.c
> > @@ -350,8 +350,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
> >  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
> >  		struct iomap *iomap)
> >  {
> > -	sector_t sector = iomap->blkno +
> > -		(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
> > +	sector_t sector = (iomap->addr +
> > +			   (pos & PAGE_MASK) - iomap->offset) >> 9;
> >  
> >  	return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
> >  			offset, bytes);
> > @@ -512,9 +512,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
> >  		flags |= FIEMAP_EXTENT_SHARED;
> >  
> >  	return fiemap_fill_next_extent(fi, iomap->offset,
> > -			iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
> > +			iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
> >  			iomap->length, flags);
> > -
> >  }
> >  
> >  static loff_t
> > @@ -807,7 +806,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
> >  	bio = bio_alloc(GFP_KERNEL, 1);
> >  	bio->bi_bdev = iomap->bdev;
> >  	bio->bi_iter.bi_sector =
> > -		iomap->blkno + ((pos - iomap->offset) >> 9);
> > +		(iomap->addr + pos - iomap->offset) >> 9;
> >  	bio->bi_private = dio;
> >  	bio->bi_end_io = iomap_dio_bio_end_io;
> >  
> > @@ -886,7 +885,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
> >  		bio = bio_alloc(GFP_KERNEL, nr_pages);
> >  		bio->bi_bdev = iomap->bdev;
> >  		bio->bi_iter.bi_sector =
> > -			iomap->blkno + ((pos - iomap->offset) >> 9);
> > +			(iomap->addr + pos - iomap->offset) >> 9;
> >  		bio->bi_write_hint = dio->iocb->ki_hint;
> >  		bio->bi_private = dio;
> >  		bio->bi_end_io = iomap_dio_bio_end_io;
> > diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> > index c862c2489df0..2d1d37b27dc7 100644
> > --- a/fs/nfsd/blocklayout.c
> > +++ b/fs/nfsd/blocklayout.c
> > @@ -65,7 +65,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> >  			bex->es = PNFS_BLOCK_READ_DATA;
> >  		else
> >  			bex->es = PNFS_BLOCK_READWRITE_DATA;
> > -		bex->soff = (iomap.blkno << 9);
> > +		bex->soff = iomap.addr;
> >  		break;
> >  	case IOMAP_UNWRITTEN:
> >  		if (seg->iomode & IOMODE_RW) {
> > @@ -78,7 +78,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
> >  			}
> >  
> >  			bex->es = PNFS_BLOCK_INVALID_DATA;
> > -			bex->soff = (iomap.blkno << 9);
> > +			bex->soff = iomap.addr;
> >  			break;
> >  		}
> >  		/*FALLTHRU*/
> > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> > index 813394c62849..1bcc5d0373a5 100644
> > --- a/fs/xfs/xfs_iomap.c
> > +++ b/fs/xfs/xfs_iomap.c
> > @@ -54,13 +54,13 @@ xfs_bmbt_to_iomap(
> >  	struct xfs_mount	*mp = ip->i_mount;
> >  
> >  	if (imap->br_startblock == HOLESTARTBLOCK) {
> > -		iomap->blkno = IOMAP_NULL_BLOCK;
> > +		iomap->addr = IOMAP_NULL_ADDR;
> >  		iomap->type = IOMAP_HOLE;
> >  	} else if (imap->br_startblock == DELAYSTARTBLOCK) {
> > -		iomap->blkno = IOMAP_NULL_BLOCK;
> > +		iomap->addr = IOMAP_NULL_ADDR;
> >  		iomap->type = IOMAP_DELALLOC;
> >  	} else {
> > -		iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
> > +		iomap->addr = (u64)xfs_fsb_to_db(ip, imap->br_startblock) << 9;
> 
> iomap->addr = BBTOB(xfs_fsb_to_db(...));
> 
> Same machine-level transformation, but please help us maintain
> consistent unit conversion code; the 'BBTOB' name makes the units
> analysis of a given piece of code more straightforward.
> 
> With that fixed,
> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> 
> for the iomap and XFS parts.

FWIW I still want the BBTOB thing fixed in whatever patch lands
upstream, but I tested the first two patches in this series and nothing
blew up so I guess everything works ok.

--D

> 
> --D
> 
> >  		if (imap->br_state == XFS_EXT_UNWRITTEN)
> >  			iomap->type = IOMAP_UNWRITTEN;
> >  		else
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index f64dc6ce5161..7b8a615fa021 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -15,8 +15,8 @@ struct vm_fault;
> >   */
> >  #define IOMAP_HOLE	0x01	/* no blocks allocated, need allocation */
> >  #define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
> > -#define IOMAP_MAPPED	0x03	/* blocks allocated @blkno */
> > -#define IOMAP_UNWRITTEN	0x04	/* blocks allocated @blkno in unwritten state */
> > +#define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
> > +#define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
> >  
> >  /*
> >   * Flags for all iomap mappings:
> > @@ -30,12 +30,12 @@ struct vm_fault;
> >  #define IOMAP_F_SHARED	0x20	/* block shared with another file */
> >  
> >  /*
> > - * Magic value for blkno:
> > + * Magic value for addr:
> >   */
> > -#define IOMAP_NULL_BLOCK -1LL	/* blkno is not valid */
> > +#define IOMAP_NULL_ADDR -1ULL	/* addr is not valid */
> >  
> >  struct iomap {
> > -	sector_t		blkno;	/* 1st sector of mapping, 512b units */
> > +	u64			addr; /* disk offset of mapping, bytes */
> >  	loff_t			offset;	/* file offset of mapping, bytes */
> >  	u64			length;	/* length of mapping, bytes */
> >  	u16			type;	/* type of mapping */
> > -- 
> > 2.13.3
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] iomap: Switch from blkno to disk offset
  2017-08-31 21:32     ` Darrick J. Wong
@ 2017-08-31 21:37       ` Andreas Grünbacher
  0 siblings, 0 replies; 14+ messages in thread
From: Andreas Grünbacher @ 2017-08-31 21:37 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Andreas Gruenbacher, Linux FS-devel Mailing List, linux-ext4,
	linux-xfs, Jan Kara, Christoph Hellwig, Theodore Ts'o

2017-08-31 23:32 GMT+02:00 Darrick J. Wong <darrick.wong@oracle.com>:
> FWIW I still want the BBTOB thing fixed in whatever patch lands
> upstream, but I tested the first two patches in this series and nothing
> blew up so I guess everything works ok.

Sure, I'll repost the series once I have the ext4 part working
properly. Thanks for the review and testing.

Andreas

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
  2017-08-30 14:59   ` Jan Kara
@ 2017-09-14  9:17     ` Andreas Gruenbacher
  0 siblings, 0 replies; 14+ messages in thread
From: Andreas Gruenbacher @ 2017-09-14  9:17 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-fsdevel, linux-ext4, linux-xfs, Christoph Hellwig

Jan,

On Wed, Aug 30, 2017 at 4:59 PM, Jan Kara <jack@suse.cz> wrote:
> On Tue 29-08-17 16:29:42, Andreas Gruenbacher wrote:
>> From: Christoph Hellwig <hch@lst.de>
>>
>> Switch to the iomap_seek_hole and iomap_seek_data helpers for
>> implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the code that
>> isn't needed any more.
>>
>> Note that with this patch, ext4 will now always depend on the iomap code
>> instead of only when CONFIG_DAX is enabled, and it requires adding a
>> call into the extent status tree for iomap_begin as well to properly
>> deal with delalloc extents.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> [Minor fixes and cleanups by Andreas]
>> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
>
> ...
>
>> @@ -3425,6 +3425,29 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>>
>>       if (!(flags & IOMAP_WRITE)) {
>>               ret = ext4_map_blocks(NULL, inode, &map, 0);
>> +             if (ret < 0)
>> +                     return ret;
>
> One general question about IOMAP_REPORT: When there is unwritten extent, we
> use page_cache_seek_hole_data() to determine what is actually a hole and
> what is data. We could use exactly the same function to determine what is a
> hole and what is data in an IOMAP_HOLE extent, couldn't we? Now I
> understand that a filesystem is supposed to return IOMAP_DELALLOC extent if
> there is delayed allocation data covering queried offset so probably it's
> not a great idea to use that however it still seems a bit like an
> unnecessary duplication...

There is no point in scanning the page cache on filesystems that don't
support delayed allocation, so it does make sense to distinguish the
two types of mappings.

>> +             if (!ret) {
>
> Please go to this branch only for IOMAP_REPORT case to avoid unnecessary
> overhead for DAX mappings...
>
>> +                     struct extent_status es = {};
>> +
>> +                     ext4_es_find_delayed_extent_range(inode, map.m_lblk,
>> +                                     map.m_lblk + map.m_len - 1, &es);
>> +                     /* Is delalloc data before next block in extent tree? */
>> +                     if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
>
> And this is still wrong - I have actually verified that with attached patch
> that disables caching of extents (so it is equivalent to *very* aggresive
> slab reclaim happening). With that patch applied you'll get:
>
> kvm0:~ # rm /mnt/file; xfs_io -f -c "pwrite 4096 4096" -c "seek -a 0"
> /mnt/file
> wrote 4096/4096 bytes at offset 4096
> 4 KiB, 1 ops; 0.0000 sec (16.693 MiB/sec and 4273.5043 ops/sec)
> Whence  Result
> DATA    0
> HOLE    8192
>
> Which is obviously wrong - hole should be 0, data should be 4096.
>
> And the reason why this is wrong is that when we are asked to map things at
> first_block and there is a hole at that offset, we need to just truncate
> the hole extent returned by ext4_map_blocks() by possibly following
> delalloc allocation. But we still need to return that there *is a hole* at
> first_block. But your patch seems to try to return the delalloc extent
> instead which is wrong.

Got it, thanks for the debug patch. I'll send a fixed version of the series.

>> +                             ext4_lblk_t offs = 0;
>> +
>> +                             if (es.es_lblk < map.m_lblk)
>> +                                     offs = map.m_lblk - es.es_lblk;
>> +                             map.m_lblk = es.es_lblk + offs;
>> +                             map.m_pblk = ext4_es_pblock(&es) + offs;
>> +                             map.m_len = es.es_len - offs;
>> +                             if (ext4_es_is_unwritten(&es))
>> +                                     map.m_flags |= EXT4_MAP_UNWRITTEN;
>> +                             if (ext4_es_is_delayed(&es))
>> +                                     delalloc = true;
>> +                             ret = 1;
>> +                     }
>> +             }
>>       } else {
>>               int dio_credits;
>>               handle_t *handle;

Thanks,
Andreas

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-09-14  9:17 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-29 14:29 [PATCH 0/4] ext4: SEEK_HOLE / SEEK_DATA via iomap Andreas Gruenbacher
2017-08-29 14:29 ` [PATCH 1/4] iomap: Switch from blkno to disk offset Andreas Gruenbacher
2017-08-29 16:11   ` Darrick J. Wong
2017-08-31 21:32     ` Darrick J. Wong
2017-08-31 21:37       ` Andreas Grünbacher
2017-08-30 15:00   ` Jan Kara
2017-08-29 14:29 ` [PATCH 2/4] iomap: Add IOMAP_F_DATA_INLINE flag Andreas Gruenbacher
2017-08-30 15:03   ` Jan Kara
2017-08-29 14:29 ` [PATCH 3/4] ext4: Add iomap support for inline data Andreas Gruenbacher
2017-08-30 12:47   ` Jan Kara
2017-08-30 12:54   ` Jan Kara
2017-08-29 14:29 ` [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA Andreas Gruenbacher
2017-08-30 14:59   ` Jan Kara
2017-09-14  9:17     ` Andreas Gruenbacher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.