All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andreas Gruenbacher <agruenba@redhat.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-ext4@vger.kernel.org, linux-xfs@vger.kernel.org,
	Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@lst.de>,
	Andreas Gruenbacher <agruenba@redhat.com>
Subject: [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
Date: Tue, 29 Aug 2017 16:29:42 +0200	[thread overview]
Message-ID: <20170829142942.21594-5-agruenba@redhat.com> (raw)
In-Reply-To: <20170829142942.21594-1-agruenba@redhat.com>

From: Christoph Hellwig <hch@lst.de>

Switch to the iomap_seek_hole and iomap_seek_data helpers for
implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the code that
isn't needed any more.

Note that with this patch, ext4 will now always depend on the iomap code
instead of only when CONFIG_DAX is enabled, and it requires adding a
call into the extent status tree for iomap_begin as well to properly
deal with delalloc extents.

Signed-off-by: Christoph Hellwig <hch@lst.de>
[Minor fixes and cleanups by Andreas]
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/ext4/Kconfig |   1 +
 fs/ext4/ext4.h  |   3 -
 fs/ext4/file.c  | 271 +++-----------------------------------------------------
 fs/ext4/inode.c | 100 ++++++---------------
 4 files changed, 43 insertions(+), 332 deletions(-)

diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e38039fd96ff..73b850f5659c 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -37,6 +37,7 @@ config EXT4_FS
 	select CRC16
 	select CRYPTO
 	select CRYPTO_CRC32C
+	select FS_IOMAP
 	help
 	  This is the next generation of the ext3 filesystem.
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 017e55942a49..f295fce7a9c8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2515,9 +2515,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
 					int used, int quota_claim);
 extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
 			      ext4_fsblk_t pblk, ext4_lblk_t len);
-extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
-				unsigned int map_len,
-				struct extent_status *result);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 0d7cf0cc9b87..bc7a901c8b3f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -20,6 +20,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
+#include <linux/iomap.h>
 #include <linux/mount.h>
 #include <linux/path.h>
 #include <linux/dax.h>
@@ -455,256 +456,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 }
 
 /*
- * Here we use ext4_map_blocks() to get a block mapping for a extent-based
- * file rather than ext4_ext_walk_space() because we can introduce
- * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
- * function.  When extent status tree has been fully implemented, it will
- * track all extent status for a file and we can directly use it to
- * retrieve the offset for SEEK_DATA/SEEK_HOLE.
- */
-
-/*
- * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
- * lookup page cache to check whether or not there has some data between
- * [startoff, endoff] because, if this range contains an unwritten extent,
- * we determine this extent as a data or a hole according to whether the
- * page cache has data or not.
- */
-static int ext4_find_unwritten_pgoff(struct inode *inode,
-				     int whence,
-				     ext4_lblk_t end_blk,
-				     loff_t *offset)
-{
-	struct pagevec pvec;
-	unsigned int blkbits;
-	pgoff_t index;
-	pgoff_t end;
-	loff_t endoff;
-	loff_t startoff;
-	loff_t lastoff;
-	int found = 0;
-
-	blkbits = inode->i_sb->s_blocksize_bits;
-	startoff = *offset;
-	lastoff = startoff;
-	endoff = (loff_t)end_blk << blkbits;
-
-	index = startoff >> PAGE_SHIFT;
-	end = (endoff - 1) >> PAGE_SHIFT;
-
-	pagevec_init(&pvec, 0);
-	do {
-		int i, num;
-		unsigned long nr_pages;
-
-		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
-		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
-					  (pgoff_t)num);
-		if (nr_pages == 0)
-			break;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-			struct buffer_head *bh, *head;
-
-			/*
-			 * If current offset is smaller than the page offset,
-			 * there is a hole at this offset.
-			 */
-			if (whence == SEEK_HOLE && lastoff < endoff &&
-			    lastoff < page_offset(pvec.pages[i])) {
-				found = 1;
-				*offset = lastoff;
-				goto out;
-			}
-
-			if (page->index > end)
-				goto out;
-
-			lock_page(page);
-
-			if (unlikely(page->mapping != inode->i_mapping)) {
-				unlock_page(page);
-				continue;
-			}
-
-			if (!page_has_buffers(page)) {
-				unlock_page(page);
-				continue;
-			}
-
-			if (page_has_buffers(page)) {
-				lastoff = page_offset(page);
-				bh = head = page_buffers(page);
-				do {
-					if (lastoff + bh->b_size <= startoff)
-						goto next;
-					if (buffer_uptodate(bh) ||
-					    buffer_unwritten(bh)) {
-						if (whence == SEEK_DATA)
-							found = 1;
-					} else {
-						if (whence == SEEK_HOLE)
-							found = 1;
-					}
-					if (found) {
-						*offset = max_t(loff_t,
-							startoff, lastoff);
-						unlock_page(page);
-						goto out;
-					}
-next:
-					lastoff += bh->b_size;
-					bh = bh->b_this_page;
-				} while (bh != head);
-			}
-
-			lastoff = page_offset(page) + PAGE_SIZE;
-			unlock_page(page);
-		}
-
-		/* The no. of pages is less than our desired, we are done. */
-		if (nr_pages < num)
-			break;
-
-		index = pvec.pages[i - 1]->index + 1;
-		pagevec_release(&pvec);
-	} while (index <= end);
-
-	if (whence == SEEK_HOLE && lastoff < endoff) {
-		found = 1;
-		*offset = lastoff;
-	}
-out:
-	pagevec_release(&pvec);
-	return found;
-}
-
-/*
- * ext4_seek_data() retrieves the offset for SEEK_DATA.
- */
-static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
-{
-	struct inode *inode = file->f_mapping->host;
-	struct extent_status es;
-	ext4_lblk_t start, last, end;
-	loff_t dataoff, isize;
-	int blkbits;
-	int ret;
-
-	inode_lock(inode);
-
-	isize = i_size_read(inode);
-	if (offset >= isize) {
-		inode_unlock(inode);
-		return -ENXIO;
-	}
-
-	blkbits = inode->i_sb->s_blocksize_bits;
-	start = offset >> blkbits;
-	last = start;
-	end = isize >> blkbits;
-	dataoff = offset;
-
-	do {
-		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
-		if (ret <= 0) {
-			/* No extent found -> no data */
-			if (ret == 0)
-				ret = -ENXIO;
-			inode_unlock(inode);
-			return ret;
-		}
-
-		last = es.es_lblk;
-		if (last != start)
-			dataoff = (loff_t)last << blkbits;
-		if (!ext4_es_is_unwritten(&es))
-			break;
-
-		/*
-		 * If there is a unwritten extent at this offset,
-		 * it will be as a data or a hole according to page
-		 * cache that has data or not.
-		 */
-		if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
-					      es.es_lblk + es.es_len, &dataoff))
-			break;
-		last += es.es_len;
-		dataoff = (loff_t)last << blkbits;
-		cond_resched();
-	} while (last <= end);
-
-	inode_unlock(inode);
-
-	if (dataoff > isize)
-		return -ENXIO;
-
-	return vfs_setpos(file, dataoff, maxsize);
-}
-
-/*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
- */
-static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
-{
-	struct inode *inode = file->f_mapping->host;
-	struct extent_status es;
-	ext4_lblk_t start, last, end;
-	loff_t holeoff, isize;
-	int blkbits;
-	int ret;
-
-	inode_lock(inode);
-
-	isize = i_size_read(inode);
-	if (offset >= isize) {
-		inode_unlock(inode);
-		return -ENXIO;
-	}
-
-	blkbits = inode->i_sb->s_blocksize_bits;
-	start = offset >> blkbits;
-	last = start;
-	end = isize >> blkbits;
-	holeoff = offset;
-
-	do {
-		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
-		if (ret < 0) {
-			inode_unlock(inode);
-			return ret;
-		}
-		/* Found a hole? */
-		if (ret == 0 || es.es_lblk > last) {
-			if (last != start)
-				holeoff = (loff_t)last << blkbits;
-			break;
-		}
-		/*
-		 * If there is a unwritten extent at this offset,
-		 * it will be as a data or a hole according to page
-		 * cache that has data or not.
-		 */
-		if (ext4_es_is_unwritten(&es) &&
-		    ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
-					      last + es.es_len, &holeoff))
-			break;
-
-		last += es.es_len;
-		holeoff = (loff_t)last << blkbits;
-		cond_resched();
-	} while (last <= end);
-
-	inode_unlock(inode);
-
-	if (holeoff > isize)
-		holeoff = isize;
-
-	return vfs_setpos(file, holeoff, maxsize);
-}
-
-/*
  * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
  * by calling generic_file_llseek_size() with the appropriate maxbytes
  * value for each.
@@ -720,18 +471,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 		maxbytes = inode->i_sb->s_maxbytes;
 
 	switch (whence) {
-	case SEEK_SET:
-	case SEEK_CUR:
-	case SEEK_END:
+	default:
 		return generic_file_llseek_size(file, offset, whence,
 						maxbytes, i_size_read(inode));
-	case SEEK_DATA:
-		return ext4_seek_data(file, offset, maxbytes);
 	case SEEK_HOLE:
-		return ext4_seek_hole(file, offset, maxbytes);
+		inode_lock_shared(inode);
+		offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
+	case SEEK_DATA:
+		inode_lock_shared(inode);
+		offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
+		inode_unlock_shared(inode);
+		break;
 	}
 
-	return -EINVAL;
+	if (offset < 0)
+		return offset;
+	return vfs_setpos(file, offset, maxbytes);
 }
 
 const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab6ab835255e..f5895ea6ac70 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3400,7 +3400,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 		return try_to_free_buffers(page);
 }
 
-#ifdef CONFIG_FS_DAX
 static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 			    unsigned flags, struct iomap *iomap)
 {
@@ -3409,6 +3408,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	unsigned long first_block = offset >> blkbits;
 	unsigned long last_block = (offset + length - 1) >> blkbits;
 	struct ext4_map_blocks map;
+	bool delalloc = false;
 	int ret;
 
 	if ((flags & IOMAP_REPORT) && ext4_has_inline_data(inode)) {
@@ -3425,6 +3425,29 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 
 	if (!(flags & IOMAP_WRITE)) {
 		ret = ext4_map_blocks(NULL, inode, &map, 0);
+		if (ret < 0)
+			return ret;
+		if (!ret) {
+			struct extent_status es = {};
+
+			ext4_es_find_delayed_extent_range(inode, map.m_lblk,
+					map.m_lblk + map.m_len - 1, &es);
+			/* Is delalloc data before next block in extent tree? */
+			if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
+				ext4_lblk_t offs = 0;
+
+				if (es.es_lblk < map.m_lblk)
+					offs = map.m_lblk - es.es_lblk;
+				map.m_lblk = es.es_lblk + offs;
+				map.m_pblk = ext4_es_pblock(&es) + offs;
+				map.m_len = es.es_len - offs;
+				if (ext4_es_is_unwritten(&es))
+					map.m_flags |= EXT4_MAP_UNWRITTEN;
+				if (ext4_es_is_delayed(&es))
+					delalloc = true;
+				ret = 1;
+			}
+		}
 	} else {
 		int dio_credits;
 		handle_t *handle;
@@ -3486,11 +3509,14 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	else
 		iomap->dax_dev = NULL;
 	iomap->offset = first_block << blkbits;
+	iomap->length = (u64)map.m_len << blkbits;
 
 	if (ret == 0) {
 		iomap->type = IOMAP_HOLE;
 		iomap->addr = IOMAP_NULL_ADDR;
-		iomap->length = (u64)map.m_len << blkbits;
+	} else if (delalloc) {
+		iomap->type = IOMAP_DELALLOC;
+		iomap->addr = IOMAP_NULL_ADDR;
 	} else {
 		if (map.m_flags & EXT4_MAP_MAPPED) {
 			iomap->type = IOMAP_MAPPED;
@@ -3501,7 +3527,6 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 			return -EIO;
 		}
 		iomap->addr = (u64)map.m_pblk << blkbits;
-		iomap->length = (u64)map.m_len << blkbits;
 	}
 
 	if (map.m_flags & EXT4_MAP_NEW)
@@ -3567,8 +3592,6 @@ const struct iomap_ops ext4_iomap_ops = {
 	.iomap_end		= ext4_iomap_end,
 };
 
-#endif
-
 static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private)
 {
@@ -6132,70 +6155,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)
 
 	return err;
 }
-
-/*
- * Find the first extent at or after @lblk in an inode that is not a hole.
- * Search for @map_len blocks at most. The extent is returned in @result.
- *
- * The function returns 1 if we found an extent. The function returns 0 in
- * case there is no extent at or after @lblk and in that case also sets
- * @result->es_len to 0. In case of error, the error code is returned.
- */
-int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
-			 unsigned int map_len, struct extent_status *result)
-{
-	struct ext4_map_blocks map;
-	struct extent_status es = {};
-	int ret;
-
-	map.m_lblk = lblk;
-	map.m_len = map_len;
-
-	/*
-	 * For non-extent based files this loop may iterate several times since
-	 * we do not determine full hole size.
-	 */
-	while (map.m_len > 0) {
-		ret = ext4_map_blocks(NULL, inode, &map, 0);
-		if (ret < 0)
-			return ret;
-		/* There's extent covering m_lblk? Just return it. */
-		if (ret > 0) {
-			int status;
-
-			ext4_es_store_pblock(result, map.m_pblk);
-			result->es_lblk = map.m_lblk;
-			result->es_len = map.m_len;
-			if (map.m_flags & EXT4_MAP_UNWRITTEN)
-				status = EXTENT_STATUS_UNWRITTEN;
-			else
-				status = EXTENT_STATUS_WRITTEN;
-			ext4_es_store_status(result, status);
-			return 1;
-		}
-		ext4_es_find_delayed_extent_range(inode, map.m_lblk,
-						  map.m_lblk + map.m_len - 1,
-						  &es);
-		/* Is delalloc data before next block in extent tree? */
-		if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
-			ext4_lblk_t offset = 0;
-
-			if (es.es_lblk < lblk)
-				offset = lblk - es.es_lblk;
-			result->es_lblk = es.es_lblk + offset;
-			ext4_es_store_pblock(result,
-					     ext4_es_pblock(&es) + offset);
-			result->es_len = es.es_len - offset;
-			ext4_es_store_status(result, ext4_es_status(&es));
-
-			return 1;
-		}
-		/* There's a hole at m_lblk, advance us after it */
-		map.m_lblk += map.m_len;
-		map_len -= map.m_len;
-		map.m_len = map_len;
-		cond_resched();
-	}
-	result->es_len = 0;
-	return 0;
-}
-- 
2.13.3

  parent reply	other threads:[~2017-08-29 14:29 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-29 14:29 [PATCH 0/4] ext4: SEEK_HOLE / SEEK_DATA via iomap Andreas Gruenbacher
2017-08-29 14:29 ` [PATCH 1/4] iomap: Switch from blkno to disk offset Andreas Gruenbacher
2017-08-29 16:11   ` Darrick J. Wong
2017-08-31 21:32     ` Darrick J. Wong
2017-08-31 21:37       ` Andreas Grünbacher
2017-08-30 15:00   ` Jan Kara
2017-08-29 14:29 ` [PATCH 2/4] iomap: Add IOMAP_F_DATA_INLINE flag Andreas Gruenbacher
2017-08-30 15:03   ` Jan Kara
2017-08-29 14:29 ` [PATCH 3/4] ext4: Add iomap support for inline data Andreas Gruenbacher
2017-08-30 12:47   ` Jan Kara
2017-08-30 12:54   ` Jan Kara
2017-08-29 14:29 ` Andreas Gruenbacher [this message]
2017-08-30 14:59   ` [PATCH 4/4] ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA Jan Kara
2017-09-14  9:17     ` Andreas Gruenbacher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170829142942.21594-5-agruenba@redhat.com \
    --to=agruenba@redhat.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.