linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel@redhat.com, Christoph Hellwig <hch@lst.de>
Cc: linux-fsdevel@vger.kernel.org, Andreas Gruenbacher <agruenba@redhat.com>
Subject: [PATCH v4 05/11] gfs2: Iomap cleanups and improvements
Date: Mon, 14 May 2018 17:36:18 +0200	[thread overview]
Message-ID: <20180514153624.29598-6-agruenba@redhat.com> (raw)
In-Reply-To: <20180514153624.29598-1-agruenba@redhat.com>

Clean up gfs2_iomap_alloc and gfs2_iomap_get.  Document how
gfs2_iomap_alloc works: it now needs to be called separately after
gfs2_iomap_get where necessary; this will be used later by iomap write.
Move gfs2_iomap_ops into bmap.c.

Introduce a new gfs2_iomap_get_alloc helper and use it in
fallocate_chunk: gfs2_iomap_begin will become unsuitable for fallocate
with proper iomap write support.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c  | 202 ++++++++++++++++++++++++++++--------------------
 fs/gfs2/bmap.h  |   6 +-
 fs/gfs2/file.c  |   4 +-
 fs/gfs2/inode.c |   4 -
 4 files changed, 124 insertions(+), 92 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0026c15d64bc..725a2736681b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -436,22 +436,6 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b
 	return (ptr - first);
 }
 
-static inline void bmap_lock(struct gfs2_inode *ip, int create)
-{
-	if (create)
-		down_write(&ip->i_rw_mutex);
-	else
-		down_read(&ip->i_rw_mutex);
-}
-
-static inline void bmap_unlock(struct gfs2_inode *ip, int create)
-{
-	if (create)
-		up_write(&ip->i_rw_mutex);
-	else
-		up_read(&ip->i_rw_mutex);
-}
-
 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
 					 struct gfs2_glock *gl, unsigned int i,
 					 unsigned offset, u64 bn)
@@ -478,15 +462,11 @@ enum alloc_state {
 };
 
 /**
- * gfs2_bmap_alloc - Build a metadata tree of the requested height
+ * gfs2_iomap_alloc - Build a metadata tree of the requested height
  * @inode: The GFS2 inode
- * @lblock: The logical starting block of the extent
- * @bh_map: This is used to return the mapping details
- * @zero_new: True if newly allocated blocks should be zeroed
+ * @iomap: The iomap structure
+ * @flags: iomap flags
  * @mp: The metapath, with proper height information calculated
- * @maxlen: The max number of data blocks to alloc
- * @dblock: Pointer to return the resulting new block
- * @dblks: Pointer to return the number of blocks allocated
  *
  * In this routine we may have to alloc:
  *   i) Indirect blocks to grow the metadata tree height
@@ -499,6 +479,13 @@ enum alloc_state {
  * blocks are available, there will only be one request per bmap call)
  * and uses the state machine to initialise the blocks in order.
  *
+ * Right now, this function will allocate at most one indirect block
+ * worth of data -- with a default block size of 4K, that's slightly
+ * less than 2M.  If this limitation is ever removed to allow huge
+ * allocations, we would probably still want to limit the iomap size we
+ * return to avoid stalling other tasks during huge writes; the next
+ * iomap iteration would then find the blocks already allocated.
+ *
  * Returns: errno on error
  */
 
@@ -513,6 +500,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 	unsigned dblks = 0;
 	unsigned ptrs_per_blk;
 	const unsigned end_of_metadata = mp->mp_fheight - 1;
+	int ret;
 	enum alloc_state state;
 	__be64 *ptr;
 	__be64 zero_bn = 0;
@@ -523,6 +511,8 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 
+	down_write(&ip->i_rw_mutex);
+
 	if (mp->mp_fheight == mp->mp_aheight) {
 		struct buffer_head *bh;
 		int eob;
@@ -558,11 +548,10 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 	blks = dblks + iblks;
 	i = mp->mp_aheight;
 	do {
-		int error;
 		n = blks - alloced;
-		error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
-		if (error)
-			return error;
+		ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+		if (ret)
+			goto out;
 		alloced += n;
 		if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 			gfs2_trans_add_unrevoke(sdp, bn, n);
@@ -618,7 +607,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 			dblks = n;
 			ptr = metapointer(end_of_metadata, mp);
 			iomap->addr = bn << inode->i_blkbits;
-			iomap->flags |= IOMAP_F_NEW;
+			iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
 			while (n-- > 0)
 				*ptr++ = cpu_to_be64(bn++);
 			break;
@@ -628,8 +617,10 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 	iomap->length = (u64)dblks << inode->i_blkbits;
 	ip->i_height = mp->mp_fheight;
 	gfs2_add_inode_blocks(&ip->i_inode, alloced);
-	gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
-	return 0;
+	gfs2_dinode_out(ip, dibh->b_data);
+out:
+	up_write(&ip->i_rw_mutex);
+	return ret;
 }
 
 typedef const __be64 *(*gfs2_metadata_walker)(
@@ -781,109 +772,130 @@ static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
 }
 
 /**
- * gfs2_iomap_begin - Map blocks from an inode to disk blocks
+ * gfs2_iomap_get - Map blocks from an inode to disk blocks
  * @inode: The inode
  * @pos: Starting position in bytes
  * @length: Length to map, in bytes
  * @flags: iomap flags
  * @iomap: The iomap structure
+ * @mp: The metapath
  *
  * Returns: errno
  */
-int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-		     unsigned flags, struct iomap *iomap)
+static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
+			  unsigned flags, struct iomap *iomap,
+			  struct metapath *mp)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct metapath mp = { .mp_aheight = 1, };
 	__be64 *ptr;
 	sector_t lblock;
-	sector_t lend;
-	int ret = 0;
+	sector_t lblock_stop;
+	int ret;
 	int eob;
-	unsigned int len;
+	u64 len;
 	struct buffer_head *bh;
 	u8 height;
 
-	trace_gfs2_iomap_start(ip, pos, length, flags);
-	if (!length) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!length)
+		return -EINVAL;
 
 	if (gfs2_is_stuffed(ip)) {
 		if (flags & IOMAP_REPORT) {
+			if (pos >= i_size_read(inode))
+				return -ENOENT;
 			gfs2_stuffed_iomap(inode, iomap);
-			if (pos >= iomap->length)
-				ret = -ENOENT;
-			goto out;
+			return 0;
 		}
 		BUG_ON(!(flags & IOMAP_WRITE));
 	}
-
 	lblock = pos >> inode->i_blkbits;
-	lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
-
 	iomap->offset = lblock << inode->i_blkbits;
-	iomap->addr = IOMAP_NULL_ADDR;
-	iomap->type = IOMAP_HOLE;
-	iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
-	iomap->flags = IOMAP_F_MERGED;
-	bmap_lock(ip, flags & IOMAP_WRITE);
+	lblock_stop = (pos + length - 1) >> inode->i_blkbits;
+	len = lblock_stop - lblock + 1;
+
+	down_read(&ip->i_rw_mutex);
 
-	ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
+	ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
 	if (ret)
-		goto out_release;
+		goto unlock;
 
 	height = ip->i_height;
 	while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
 		height++;
-	find_metapath(sdp, lblock, &mp, height);
+	find_metapath(sdp, lblock, mp, height);
 	if (height > ip->i_height || gfs2_is_stuffed(ip))
 		goto do_alloc;
 
-	ret = lookup_metapath(ip, &mp);
+	ret = lookup_metapath(ip, mp);
 	if (ret)
-		goto out_release;
+		goto unlock;
 
-	if (mp.mp_aheight != ip->i_height)
+	if (mp->mp_aheight != ip->i_height)
 		goto do_alloc;
 
-	ptr = metapointer(ip->i_height - 1, &mp);
+	ptr = metapointer(ip->i_height - 1, mp);
 	if (*ptr == 0)
 		goto do_alloc;
 
-	iomap->type = IOMAP_MAPPED;
-	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+	bh = mp->mp_bh[ip->i_height - 1];
+	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
 
-	bh = mp.mp_bh[ip->i_height - 1];
-	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
+	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+	iomap->length = len << inode->i_blkbits;
+	iomap->type = IOMAP_MAPPED;
+	iomap->flags = IOMAP_F_MERGED;
 	if (eob)
 		iomap->flags |= IOMAP_F_BOUNDARY;
-	iomap->length = (u64)len << inode->i_blkbits;
 
-out_release:
-	release_metapath(&mp);
-	bmap_unlock(ip, flags & IOMAP_WRITE);
 out:
-	trace_gfs2_iomap_end(ip, iomap, ret);
+	iomap->bdev = inode->i_sb->s_bdev;
+unlock:
+	up_read(&ip->i_rw_mutex);
 	return ret;
 
 do_alloc:
-	if (flags & IOMAP_WRITE) {
-		ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
-	} else if (flags & IOMAP_REPORT) {
+	iomap->addr = IOMAP_NULL_ADDR;
+	iomap->length = len << inode->i_blkbits;
+	iomap->type = IOMAP_HOLE;
+	iomap->flags = 0;
+	if (flags & IOMAP_REPORT) {
 		loff_t size = i_size_read(inode);
 		if (pos >= size)
 			ret = -ENOENT;
-		else if (height <= ip->i_height)
-			ret = gfs2_hole_size(inode, lblock, len, &mp, iomap);
+		else if (height == ip->i_height)
+			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 		else
 			iomap->length = size - pos;
 	}
-	goto out_release;
+	goto out;
 }
 
+int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+		     unsigned flags, struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct metapath mp = { .mp_aheight = 1, };
+	int ret;
+
+	trace_gfs2_iomap_start(ip, pos, length, flags);
+	if (flags & IOMAP_WRITE) {
+		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+		if (!ret && iomap->type == IOMAP_HOLE)
+			ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+		release_metapath(&mp);
+	} else {
+		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+		release_metapath(&mp);
+	}
+	trace_gfs2_iomap_end(ip, iomap, ret);
+	return ret;
+}
+
+const struct iomap_ops gfs2_iomap_ops = {
+	.iomap_begin = gfs2_iomap_begin,
+};
+
 /**
  * gfs2_block_map - Map one or more blocks of an inode to a disk block
  * @inode: The inode
@@ -909,25 +921,34 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 		   struct buffer_head *bh_map, int create)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
+	loff_t pos = (loff_t)lblock << inode->i_blkbits;
+	loff_t length = bh_map->b_size;
+	struct metapath mp = { .mp_aheight = 1, };
 	struct iomap iomap;
-	int ret, flags = 0;
+	int ret;
 
 	clear_buffer_mapped(bh_map);
 	clear_buffer_new(bh_map);
 	clear_buffer_boundary(bh_map);
 	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 
-	if (create)
-		flags |= IOMAP_WRITE;
-	ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
-			       bh_map->b_size, flags, &iomap);
-	if (ret) {
-		if (!create && ret == -ENOENT) {
-			/* Return unmapped buffer beyond the end of file.  */
+	if (create) {
+		ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
+		if (!ret && iomap.type == IOMAP_HOLE)
+			ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+		release_metapath(&mp);
+	} else {
+		ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
+		release_metapath(&mp);
+
+		/* Return unmapped buffer beyond the end of file. */
+		if (ret == -ENOENT) {
 			ret = 0;
+			goto out;
 		}
-		goto out;
 	}
+	if (ret)
+		goto out;
 
 	if (iomap.length > bh_map->b_size) {
 		iomap.length = bh_map->b_size;
@@ -1142,6 +1163,19 @@ static int trunc_start(struct inode *inode, u64 newsize)
 	return error;
 }
 
+int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+			 struct iomap *iomap)
+{
+	struct metapath mp = { .mp_aheight = 1, };
+	int ret;
+
+	ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
+	if (!ret && iomap->type == IOMAP_HOLE)
+		ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+	release_metapath(&mp);
+	return ret;
+}
+
 /**
  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
  * @ip: inode
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c3402fe00653..6b18fb323f0a 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -46,11 +46,13 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
 	}
 }
 
+extern const struct iomap_ops gfs2_iomap_ops;
+
 extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh, int create);
-extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-			    unsigned flags, struct iomap *iomap);
+extern int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+				struct iomap *iomap);
 extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
 			   u64 *dblock, unsigned *extlen);
 extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4b71f021a9e2..1a168e4eac97 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -749,8 +749,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	}
 
 	while (offset < end) {
-		error = gfs2_iomap_begin(inode, offset, end - offset,
-					 IOMAP_WRITE, &iomap);
+		error = gfs2_iomap_get_alloc(inode, offset, end - offset,
+					     &iomap);
 		if (error)
 			goto out;
 		offset = iomap.offset + iomap.length;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8700eb815638..feda55f67050 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2006,10 +2006,6 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat,
 	return 0;
 }
 
-const struct iomap_ops gfs2_iomap_ops = {
-	.iomap_begin = gfs2_iomap_begin,
-};
-
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {
-- 
2.17.0

  parent reply	other threads:[~2018-05-14 15:36 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-14 15:36 [PATCH v4 00/11] gfs2 iomap write support Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 01/11] gfs2: Update find_metapath comment Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 02/11] gfs2: hole_size improvement Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 03/11] gfs2: gfs2_stuffed_write_end cleanup Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 04/11] gfs2: Remove ordered write mode handling from gfs2_trans_add_data Andreas Gruenbacher
2018-05-14 15:36 ` Andreas Gruenbacher [this message]
2018-05-14 15:36 ` [PATCH v4 06/11] iomap: Add write_{begin,end} iomap operations Andreas Gruenbacher
2018-05-15  1:11   ` Dave Chinner
2018-05-15  7:22     ` Christoph Hellwig
2018-05-15  8:16       ` Andreas Gruenbacher
2018-05-18 16:04         ` Christoph Hellwig
2018-05-25 17:58           ` Andreas Grünbacher
2018-05-28 13:02             ` Christoph Hellwig
2018-05-14 15:36 ` [PATCH v4 07/11] gfs2: iomap buffered write support Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 08/11] gfs2: gfs2_extent_length cleanup Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 09/11] gfs2: iomap direct I/O support Andreas Gruenbacher
2018-05-15  7:31   ` Christoph Hellwig
2018-05-16 20:36     ` Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 10/11] gfs2: Remove gfs2_write_{begin,end} Andreas Gruenbacher
2018-05-14 15:36 ` [PATCH v4 11/11] iomap: Complete partial direct I/O writes synchronously Andreas Gruenbacher
2018-05-15  7:24   ` Christoph Hellwig
2018-05-16 20:27     ` Andreas Gruenbacher
2018-05-18 15:56       ` Christoph Hellwig
2018-05-18 18:35         ` Andreas Grünbacher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180514153624.29598-6-agruenba@redhat.com \
    --to=agruenba@redhat.com \
    --cc=cluster-devel@redhat.com \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).