All of lore.kernel.org
 help / color / mirror / Atom feed
* getbmap refactor V2
@ 2017-09-18 15:26 Christoph Hellwig
  2017-09-18 15:26 ` [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
  2017-09-18 15:26 ` [PATCH 2/2] xfs: simplify the xfs_getbmap interface Christoph Hellwig
  0 siblings, 2 replies; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-18 15:26 UTC (permalink / raw)
  To: linux-xfs

Hi all,

this series refactors the getbmap support to not sit on top of xfs_bmapi_read,
and to remove the callbacks for formatting the userspace buffers.

Changes since V1:
 - fix setting of BMV_OF_LAST

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-18 15:26 getbmap refactor V2 Christoph Hellwig
@ 2017-09-18 15:26 ` Christoph Hellwig
  2017-09-20 13:23   ` Brian Foster
  2017-09-20 23:00   ` Darrick J. Wong
  2017-09-18 15:26 ` [PATCH 2/2] xfs: simplify the xfs_getbmap interface Christoph Hellwig
  1 sibling, 2 replies; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-18 15:26 UTC (permalink / raw)
  To: linux-xfs

Currently getbmap uses xfs_bmapi_read to query the extent map, and then
fixes up various bits that are eventually reported to userspace.

This patch instead rewrites it to use xfs_iext_lookup_extent and
xfs_iext_get_extent to iteratively process the extent map.  This not
only avoids the need to allocate a map for the returned xfs_bmbt_irec
structures but also greatly simplified the code.

There are two intentional behavior changes compared to the old code:

 - the current code reports unwritten extents that don't directly border
   a written one as unwritten even when not passing the BMV_IF_PREALLOC
   option, contrary to the documentation.  The new code requires the
   BMV_IF_PREALLOC flag to report the unwrittent extent bit.
 - The new code does never merges consecutive extents, unlike the old
   code that sometimes does it based on the boundaries of the
   xfs_bmapi_read calls.  Note that the extent merging behavior was
   entirely undocumented.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
 1 file changed, 208 insertions(+), 317 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd9a5400ba4f..a87d05978c92 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -403,125 +403,103 @@ xfs_bmap_count_blocks(
 	return 0;
 }
 
-/*
- * returns 1 for success, 0 if we failed to map the extent.
- */
-STATIC int
-xfs_getbmapx_fix_eof_hole(
-	xfs_inode_t		*ip,		/* xfs incore inode pointer */
-	int			whichfork,
-	struct getbmapx		*out,		/* output structure */
-	int			prealloced,	/* this is a file with
-						 * preallocated data space */
-	int64_t			end,		/* last block requested */
-	xfs_fsblock_t		startblock,
-	bool			moretocome)
+static int
+xfs_getbmap_report_one(
+	struct xfs_inode	*ip,
+	struct getbmapx		*bmv,
+	struct getbmapx		*out,
+	int64_t			bmv_end,
+	struct xfs_bmbt_irec	*got)
 {
-	int64_t			fixlen;
-	xfs_mount_t		*mp;		/* file system mount point */
-	xfs_ifork_t		*ifp;		/* inode fork pointer */
-	xfs_extnum_t		lastx;		/* last extent pointer */
-	xfs_fileoff_t		fileblock;
-
-	if (startblock == HOLESTARTBLOCK) {
-		mp = ip->i_mount;
-		out->bmv_block = -1;
-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
-		fixlen -= out->bmv_offset;
-		if (prealloced && out->bmv_offset + out->bmv_length == end) {
-			/* Came to hole at EOF. Trim it. */
-			if (fixlen <= 0)
-				return 0;
-			out->bmv_length = fixlen;
-		}
+	struct getbmapx		*p = out + bmv->bmv_entries;
+	bool			shared = false, trimmed = false;
+	int			error;
+
+	error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
+	if (error)
+		return error;
+
+	if (isnullstartblock(got->br_startblock) ||
+	    got->br_startblock == DELAYSTARTBLOCK) {
+		/*
+		 * Delalloc extents that start beyond EOF can occur due to
+		 * speculative EOF allocation when the delalloc extent is larger
+		 * than the largest freespace extent at conversion time.  These
+		 * extents cannot be converted by data writeback, so can exist
+		 * here even if we are not supposed to be finding delalloc
+		 * extents.
+		 */
+		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
+			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
+
+		p->bmv_oflags |= BMV_OF_DELALLOC;
+		p->bmv_block = -2;
 	} else {
-		if (startblock == DELAYSTARTBLOCK)
-			out->bmv_block = -2;
-		else
-			out->bmv_block = xfs_fsb_to_db(ip, startblock);
-		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
-		ifp = XFS_IFORK_PTR(ip, whichfork);
-		if (!moretocome &&
-		    xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
-		   (lastx == xfs_iext_count(ifp) - 1))
-			out->bmv_oflags |= BMV_OF_LAST;
+		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
 	}
 
-	return 1;
+	if (got->br_state == XFS_EXT_UNWRITTEN &&
+	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
+		p->bmv_oflags |= BMV_OF_PREALLOC;
+
+	if (shared)
+		p->bmv_oflags |= BMV_OF_SHARED;
+
+	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
+	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
+
+	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
+	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
+	bmv->bmv_entries++;
+	return 0;
 }
 
-/* Adjust the reported bmap around shared/unshared extent transitions. */
-STATIC int
-xfs_getbmap_adjust_shared(
-	struct xfs_inode		*ip,
-	int				whichfork,
-	struct xfs_bmbt_irec		*map,
-	struct getbmapx			*out,
-	struct xfs_bmbt_irec		*next_map)
+static void
+xfs_getbmap_report_hole(
+	struct xfs_inode	*ip,
+	struct getbmapx		*bmv,
+	struct getbmapx		*out,
+	int64_t			bmv_end,
+	xfs_fileoff_t		bno,
+	xfs_fileoff_t		end)
 {
-	struct xfs_mount		*mp = ip->i_mount;
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	xfs_agblock_t			ebno;
-	xfs_extlen_t			elen;
-	xfs_extlen_t			nlen;
-	int				error;
+	struct getbmapx		*p = out + bmv->bmv_entries;
 
-	next_map->br_startblock = NULLFSBLOCK;
-	next_map->br_startoff = NULLFILEOFF;
-	next_map->br_blockcount = 0;
+	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
+		return;
 
-	/* Only written data blocks can be shared. */
-	if (!xfs_is_reflink_inode(ip) ||
-	    whichfork != XFS_DATA_FORK ||
-	    !xfs_bmap_is_real_extent(map))
-		return 0;
+	p->bmv_block = -1;
+	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
+	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
 
-	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
-	agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
-	error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
-			map->br_blockcount, &ebno, &elen, true);
-	if (error)
-		return error;
+	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
+	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
+	bmv->bmv_entries++;
+}
 
-	if (ebno == NULLAGBLOCK) {
-		/* No shared blocks at all. */
-		return 0;
-	} else if (agbno == ebno) {
-		/*
-		 * Shared extent at (agbno, elen).  Shrink the reported
-		 * extent length and prepare to move the start of map[i]
-		 * to agbno+elen, with the aim of (re)formatting the new
-		 * map[i] the next time through the inner loop.
-		 */
-		out->bmv_length = XFS_FSB_TO_BB(mp, elen);
-		out->bmv_oflags |= BMV_OF_SHARED;
-		if (elen != map->br_blockcount) {
-			*next_map = *map;
-			next_map->br_startblock += elen;
-			next_map->br_startoff += elen;
-			next_map->br_blockcount -= elen;
-		}
-		map->br_blockcount -= elen;
-	} else {
-		/*
-		 * There's an unshared extent (agbno, ebno - agbno)
-		 * followed by shared extent at (ebno, elen).  Shrink
-		 * the reported extent length to cover only the unshared
-		 * extent and prepare to move up the start of map[i] to
-		 * ebno, with the aim of (re)formatting the new map[i]
-		 * the next time through the inner loop.
-		 */
-		*next_map = *map;
-		nlen = ebno - agbno;
-		out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
-		next_map->br_startblock += nlen;
-		next_map->br_startoff += nlen;
-		next_map->br_blockcount -= nlen;
-		map->br_blockcount -= nlen;
-	}
+static inline bool
+xfs_getbmap_full(
+	struct getbmapx		*bmv)
+{
+	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
+}
 
-	return 0;
+static bool
+xfs_getbmap_next_rec(
+	struct xfs_bmbt_irec	*rec,
+	xfs_fileoff_t		total_end)
+{
+	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
+
+	if (end == total_end)
+		return false;
+
+	rec->br_startoff += rec->br_blockcount;
+	if (!isnullstartblock(rec->br_startblock) &&
+	    rec->br_startblock != DELAYSTARTBLOCK)
+		rec->br_startblock += rec->br_blockcount;
+	rec->br_blockcount = total_end - end;
+	return true;
 }
 
 /*
@@ -538,119 +516,72 @@ xfs_getbmap(
 	xfs_bmap_format_t	formatter,	/* format to user */
 	void			*arg)		/* formatter arg */
 {
-	int64_t			bmvend;		/* last block requested */
-	int			error = 0;	/* return value */
-	int64_t			fixlen;		/* length for -1 case */
-	int			i;		/* extent number */
-	int			lock;		/* lock state */
-	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
-	xfs_mount_t		*mp;		/* file system mount point */
-	int			nex;		/* # of user extents can do */
-	int			subnex;		/* # of bmapi's can do */
-	int			nmap;		/* number of map entries */
-	struct getbmapx		*out;		/* output structure */
-	int			whichfork;	/* data or attr fork */
-	int			prealloced;	/* this is a file with
-						 * preallocated data space */
-	int			iflags;		/* interface flags */
-	int			bmapi_flags;	/* flags for xfs_bmapi */
-	int			cur_ext = 0;
-	struct xfs_bmbt_irec	inject_map;
-
-	mp = ip->i_mount;
-	iflags = bmv->bmv_iflags;
+	struct xfs_mount	*mp = ip->i_mount;
+	int			iflags = bmv->bmv_iflags;
+	int			whichfork, lock, i, error = 0;
+	int64_t			bmv_end, max_len;
+	xfs_fileoff_t		bno, first_bno;
+	struct xfs_ifork	*ifp;
+	struct getbmapx		*out;
+	struct xfs_bmbt_irec	got, rec;
+	xfs_filblks_t		len;
+	xfs_extnum_t		idx;
 
 #ifndef DEBUG
 	/* Only allow CoW fork queries if we're debugging. */
 	if (iflags & BMV_IF_COWFORK)
 		return -EINVAL;
 #endif
+
 	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
 		return -EINVAL;
 
+	if (bmv->bmv_count <= 1)
+		return -EINVAL;
+	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
+		return -ENOMEM;
+
+	if (bmv->bmv_length < -1)
+		return -EINVAL;
+
+	bmv->bmv_entries = 0;
+	if (bmv->bmv_length == 0)
+		return 0;
+
+	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
+	if (!out)
+		return -ENOMEM;
+
 	if (iflags & BMV_IF_ATTRFORK)
 		whichfork = XFS_ATTR_FORK;
 	else if (iflags & BMV_IF_COWFORK)
 		whichfork = XFS_COW_FORK;
 	else
 		whichfork = XFS_DATA_FORK;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
 
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	switch (whichfork) {
 	case XFS_ATTR_FORK:
-		if (XFS_IFORK_Q(ip)) {
-			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
-			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
-			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
-				return -EINVAL;
-		} else if (unlikely(
-			   ip->i_d.di_aformat != 0 &&
-			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
-			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
-					 ip->i_mount);
-			return -EFSCORRUPTED;
-		}
+		if (!XFS_IFORK_Q(ip))
+			goto out_unlock_iolock;
 
-		prealloced = 0;
-		fixlen = 1LL << 32;
+		max_len = 1LL << 32;
+		lock = xfs_ilock_attr_map_shared(ip);
 		break;
 	case XFS_COW_FORK:
-		if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
-			return -EINVAL;
+		/* No CoW fork? Just return */
+		if (!ifp)
+			goto out_unlock_iolock;
 
-		if (xfs_get_cowextsz_hint(ip)) {
-			prealloced = 1;
-			fixlen = mp->m_super->s_maxbytes;
-		} else {
-			prealloced = 0;
-			fixlen = XFS_ISIZE(ip);
-		}
-		break;
-	default:
-		/* Local format data forks report no extents. */
-		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-			bmv->bmv_entries = 0;
-			return 0;
-		}
-		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
-			return -EINVAL;
+		if (xfs_get_cowextsz_hint(ip))
+			max_len = mp->m_super->s_maxbytes;
+		else
+			max_len = XFS_ISIZE(ip);
 
-		if (xfs_get_extsz_hint(ip) ||
-		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
-			prealloced = 1;
-			fixlen = mp->m_super->s_maxbytes;
-		} else {
-			prealloced = 0;
-			fixlen = XFS_ISIZE(ip);
-		}
+		lock = XFS_ILOCK_SHARED;
+		xfs_ilock(ip, lock);
 		break;
-	}
-
-	if (bmv->bmv_length == -1) {
-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
-		bmv->bmv_length =
-			max_t(int64_t, fixlen - bmv->bmv_offset, 0);
-	} else if (bmv->bmv_length == 0) {
-		bmv->bmv_entries = 0;
-		return 0;
-	} else if (bmv->bmv_length < 0) {
-		return -EINVAL;
-	}
-
-	nex = bmv->bmv_count - 1;
-	if (nex <= 0)
-		return -EINVAL;
-	bmvend = bmv->bmv_offset + bmv->bmv_length;
-
-
-	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
-		return -ENOMEM;
-	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
-	if (!out)
-		return -ENOMEM;
-
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	switch (whichfork) {
 	case XFS_DATA_FORK:
 		if (!(iflags & BMV_IF_DELALLOC) &&
 		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
@@ -668,147 +599,107 @@ xfs_getbmap(
 			 */
 		}
 
+		if (xfs_get_extsz_hint(ip) ||
+		    (ip->i_d.di_flags &
+		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
+			max_len = mp->m_super->s_maxbytes;
+		else
+			max_len = XFS_ISIZE(ip);
+
 		lock = xfs_ilock_data_map_shared(ip);
 		break;
-	case XFS_COW_FORK:
-		lock = XFS_ILOCK_SHARED;
-		xfs_ilock(ip, lock);
-		break;
-	case XFS_ATTR_FORK:
-		lock = xfs_ilock_attr_map_shared(ip);
-		break;
 	}
 
-	/*
-	 * Don't let nex be bigger than the number of extents
-	 * we can have assuming alternating holes and real extents.
-	 */
-	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
-		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
-
-	bmapi_flags = xfs_bmapi_aflag(whichfork);
-	if (!(iflags & BMV_IF_PREALLOC))
-		bmapi_flags |= XFS_BMAPI_IGSTATE;
-
-	/*
-	 * Allocate enough space to handle "subnex" maps at a time.
-	 */
-	error = -ENOMEM;
-	subnex = 16;
-	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
-	if (!map)
+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	case XFS_DINODE_FMT_EXTENTS:
+	case XFS_DINODE_FMT_BTREE:
+		break;
+	case XFS_DINODE_FMT_LOCAL:
+		/* Local format inode forks report no extents. */
 		goto out_unlock_ilock;
+	default:
+		error = -EINVAL;
+		goto out_unlock_ilock;
+	}
 
-	bmv->bmv_entries = 0;
-
-	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
-	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
-		error = 0;
-		goto out_free_map;
+	if (bmv->bmv_length == -1) {
+		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
+		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
 	}
 
-	do {
-		nmap = (nex> subnex) ? subnex : nex;
-		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
-				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
-				       map, &nmap, bmapi_flags);
-		if (error)
-			goto out_free_map;
-		ASSERT(nmap <= subnex);
-
-		for (i = 0; i < nmap && bmv->bmv_length &&
-				cur_ext < bmv->bmv_count - 1; i++) {
-			out[cur_ext].bmv_oflags = 0;
-			if (map[i].br_state == XFS_EXT_UNWRITTEN)
-				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
-			else if (map[i].br_startblock == DELAYSTARTBLOCK)
-				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
-			out[cur_ext].bmv_offset =
-				XFS_FSB_TO_BB(mp, map[i].br_startoff);
-			out[cur_ext].bmv_length =
-				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			out[cur_ext].bmv_unused1 = 0;
-			out[cur_ext].bmv_unused2 = 0;
+	bmv_end = bmv->bmv_offset + bmv->bmv_length;
 
-			/*
-			 * delayed allocation extents that start beyond EOF can
-			 * occur due to speculative EOF allocation when the
-			 * delalloc extent is larger than the largest freespace
-			 * extent at conversion time. These extents cannot be
-			 * converted by data writeback, so can exist here even
-			 * if we are not supposed to be finding delalloc
-			 * extents.
-			 */
-			if (map[i].br_startblock == DELAYSTARTBLOCK &&
-			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
-				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
-
-                        if (map[i].br_startblock == HOLESTARTBLOCK &&
-			    whichfork == XFS_ATTR_FORK) {
-				/* came to the end of attribute fork */
-				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
-				goto out_free_map;
-			}
+	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
+	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
 
-			/* Is this a shared block? */
-			error = xfs_getbmap_adjust_shared(ip, whichfork,
-					&map[i], &out[cur_ext], &inject_map);
-			if (error)
-				goto out_free_map;
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(NULL, ip, whichfork);
+		if (error)
+			goto out_unlock_ilock;
+	}
 
-			if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
-					&out[cur_ext], prealloced, bmvend,
-					map[i].br_startblock,
-					inject_map.br_startblock != NULLFSBLOCK))
-				goto out_free_map;
+	if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
+		/*
+		 * Report a whole-file hole if the delalloc flag is set to
+		 * stay compatible with the old implementation.
+		 */
+		if (iflags & BMV_IF_DELALLOC)
+			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
+					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
+		goto out_unlock_ilock;
+	}
 
-			bmv->bmv_offset =
-				out[cur_ext].bmv_offset +
-				out[cur_ext].bmv_length;
-			bmv->bmv_length =
-				max_t(int64_t, 0, bmvend - bmv->bmv_offset);
+	while (!xfs_getbmap_full(bmv)) {
+		xfs_trim_extent(&got, first_bno, len);
 
-			/*
-			 * In case we don't want to return the hole,
-			 * don't increase cur_ext so that we can reuse
-			 * it in the next loop.
-			 */
-			if ((iflags & BMV_IF_NO_HOLES) &&
-			    map[i].br_startblock == HOLESTARTBLOCK) {
-				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
-				continue;
-			}
+		/*
+		 * Report an entry for a hole if this extent doesn't directly
+		 * follow the previous one.
+		 */
+		if (got.br_startoff > bno) {
+			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
+					got.br_startoff);
+			if (xfs_getbmap_full(bmv))
+				break;
+		}
 
-			/*
-			 * In order to report shared extents accurately,
-			 * we report each distinct shared/unshared part
-			 * of a single bmbt record using multiple bmap
-			 * extents.  To make that happen, we iterate the
-			 * same map array item multiple times, each
-			 * time trimming out the subextent that we just
-			 * reported.
-			 *
-			 * Because of this, we must check the out array
-			 * index (cur_ext) directly against bmv_count-1
-			 * to avoid overflows.
-			 */
-			if (inject_map.br_startblock != NULLFSBLOCK) {
-				map[i] = inject_map;
-				i--;
+		/*
+		 * In order to report shared extents accurately, we report each
+		 * distinct shared / unshared part of a single bmbt record with
+		 * an individual getbmapx record.
+		 */
+		bno = got.br_startoff + got.br_blockcount;
+		rec = got;
+		do {
+			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
+					&rec);
+			if (error || xfs_getbmap_full(bmv))
+				goto out_unlock_ilock;
+		} while (xfs_getbmap_next_rec(&rec, bno));
+
+		if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
+			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
+
+			out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
+
+			if (whichfork != XFS_ATTR_FORK && bno < end &&
+			    !xfs_getbmap_full(bmv)) {
+				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
+						bno, end);
 			}
-			bmv->bmv_entries++;
-			cur_ext++;
+			break;
 		}
-	} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
 
- out_free_map:
-	kmem_free(map);
- out_unlock_ilock:
+		if (bno >= first_bno + len)
+			break;
+	}
+
+out_unlock_ilock:
 	xfs_iunlock(ip, lock);
- out_unlock_iolock:
+out_unlock_iolock:
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
-	for (i = 0; i < cur_ext; i++) {
+	for (i = 0; i < bmv->bmv_entries; i++) {
 		/* format results & advance arg */
 		error = formatter(&arg, &out[i]);
 		if (error)
-- 
2.14.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] xfs: simplify the xfs_getbmap interface
  2017-09-18 15:26 getbmap refactor V2 Christoph Hellwig
  2017-09-18 15:26 ` [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
@ 2017-09-18 15:26 ` Christoph Hellwig
  2017-09-20 23:08   ` Darrick J. Wong
  1 sibling, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-18 15:26 UTC (permalink / raw)
  To: linux-xfs

Instead of passing in a formatter callback allocate the bmap buffer
in the caller and process the entries there.  Additionally replace
the in-kernel buffer with a new much smaller structure, and unify
the implementation of the different ioctls in a single function.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_bmap_util.c |  38 ++++-----------
 fs/xfs/xfs_bmap_util.h |  10 ++--
 fs/xfs/xfs_ioctl.c     | 122 ++++++++++++++++++++++++-------------------------
 3 files changed, 75 insertions(+), 95 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a87d05978c92..b540ac65b8b3 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -407,11 +407,11 @@ static int
 xfs_getbmap_report_one(
 	struct xfs_inode	*ip,
 	struct getbmapx		*bmv,
-	struct getbmapx		*out,
+	struct kgetbmap		*out,
 	int64_t			bmv_end,
 	struct xfs_bmbt_irec	*got)
 {
-	struct getbmapx		*p = out + bmv->bmv_entries;
+	struct kgetbmap		*p = out + bmv->bmv_entries;
 	bool			shared = false, trimmed = false;
 	int			error;
 
@@ -458,12 +458,12 @@ static void
 xfs_getbmap_report_hole(
 	struct xfs_inode	*ip,
 	struct getbmapx		*bmv,
-	struct getbmapx		*out,
+	struct kgetbmap		*out,
 	int64_t			bmv_end,
 	xfs_fileoff_t		bno,
 	xfs_fileoff_t		end)
 {
-	struct getbmapx		*p = out + bmv->bmv_entries;
+	struct kgetbmap		*p = out + bmv->bmv_entries;
 
 	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
 		return;
@@ -511,47 +511,36 @@ xfs_getbmap_next_rec(
  */
 int						/* error code */
 xfs_getbmap(
-	xfs_inode_t		*ip,
+	struct xfs_inode	*ip,
 	struct getbmapx		*bmv,		/* user bmap structure */
-	xfs_bmap_format_t	formatter,	/* format to user */
-	void			*arg)		/* formatter arg */
+	struct kgetbmap		*out)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	int			iflags = bmv->bmv_iflags;
-	int			whichfork, lock, i, error = 0;
+	int			whichfork, lock, error = 0;
 	int64_t			bmv_end, max_len;
 	xfs_fileoff_t		bno, first_bno;
 	struct xfs_ifork	*ifp;
-	struct getbmapx		*out;
 	struct xfs_bmbt_irec	got, rec;
 	xfs_filblks_t		len;
 	xfs_extnum_t		idx;
 
+	if (bmv->bmv_iflags & ~BMV_IF_VALID)
+		return -EINVAL;
 #ifndef DEBUG
 	/* Only allow CoW fork queries if we're debugging. */
 	if (iflags & BMV_IF_COWFORK)
 		return -EINVAL;
 #endif
-
 	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
 		return -EINVAL;
 
-	if (bmv->bmv_count <= 1)
-		return -EINVAL;
-	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
-		return -ENOMEM;
-
 	if (bmv->bmv_length < -1)
 		return -EINVAL;
-
 	bmv->bmv_entries = 0;
 	if (bmv->bmv_length == 0)
 		return 0;
 
-	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
-	if (!out)
-		return -ENOMEM;
-
 	if (iflags & BMV_IF_ATTRFORK)
 		whichfork = XFS_ATTR_FORK;
 	else if (iflags & BMV_IF_COWFORK)
@@ -698,15 +687,6 @@ xfs_getbmap(
 	xfs_iunlock(ip, lock);
 out_unlock_iolock:
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-	for (i = 0; i < bmv->bmv_entries; i++) {
-		/* format results & advance arg */
-		error = formatter(&arg, &out[i]);
-		if (error)
-			break;
-	}
-
-	kmem_free(out);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 0eaa81dc49be..6cfe747cb142 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -34,10 +34,14 @@ int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
 int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
 		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
 
-/* bmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *);
+struct kgetbmap {
+	__s64		bmv_offset;	/* file offset of segment in blocks */
+	__s64		bmv_block;	/* starting block (64-bit daddr_t)  */
+	__s64		bmv_length;	/* length of segment, blocks	    */
+	__s32		bmv_oflags;	/* output flags */
+};
 int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
-		xfs_bmap_format_t formatter, void *arg);
+		struct kgetbmap *out);
 
 /* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
 int	xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5049e8ab6e30..8e1ab254aa19 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1539,17 +1539,26 @@ xfs_ioc_setxflags(
 	return error;
 }
 
-STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv)
+static bool
+xfs_getbmap_format(
+	struct kgetbmap		*p,
+	struct getbmapx __user	*u,
+	size_t			recsize)
 {
-	struct getbmap __user	*base = (struct getbmap __user *)*ap;
-
-	/* copy only getbmap portion (not getbmapx) */
-	if (copy_to_user(base, bmv, sizeof(struct getbmap)))
-		return -EFAULT;
-
-	*ap += sizeof(struct getbmap);
-	return 0;
+	if (put_user(p->bmv_offset, &u->bmv_offset) ||
+	    put_user(p->bmv_block, &u->bmv_block) ||
+	    put_user(p->bmv_length, &u->bmv_length) ||
+	    put_user(0, &u->bmv_count) ||
+	    put_user(0, &u->bmv_entries))
+		return false;
+	if (recsize < sizeof(struct getbmapx))
+		return true;
+	if (put_user(0, &u->bmv_iflags) ||
+	    put_user(p->bmv_oflags, &u->bmv_oflags) ||
+	    put_user(0, &u->bmv_unused1) ||
+	    put_user(0, &u->bmv_unused2))
+		return false;
+	return true;
 }
 
 STATIC int
@@ -1559,68 +1568,57 @@ xfs_ioc_getbmap(
 	void			__user *arg)
 {
 	struct getbmapx		bmx = { 0 };
-	int			error;
+	struct kgetbmap		*buf;
+	size_t			recsize;
+	int			error, i;
 
-	/* struct getbmap is a strict subset of struct getbmapx. */
-	if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
-		return -EFAULT;
-
-	if (bmx.bmv_count < 2)
+	switch (cmd) {
+	case XFS_IOC_GETBMAPA:
+		bmx.bmv_iflags = BMV_IF_ATTRFORK;
+		/*FALLTHRU*/
+	case XFS_IOC_GETBMAP:
+		if (file->f_mode & FMODE_NOCMTIME)
+			bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
+		/* struct getbmap is a strict subset of struct getbmapx. */
+		recsize = sizeof(struct getbmap);
+		break;
+	case XFS_IOC_GETBMAPX:
+		recsize = sizeof(struct getbmapx);
+		break;
+	default:
 		return -EINVAL;
+	}
 
-	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
-	if (file->f_mode & FMODE_NOCMTIME)
-		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
-
-	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
-			    (__force struct getbmap *)arg+1);
-	if (error)
-		return error;
-
-	/* copy back header - only size of getbmap */
-	if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
-		return -EFAULT;
-	return 0;
-}
-
-STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv)
-{
-	struct getbmapx __user	*base = (struct getbmapx __user *)*ap;
-
-	if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
-		return -EFAULT;
-
-	*ap += sizeof(struct getbmapx);
-	return 0;
-}
-
-STATIC int
-xfs_ioc_getbmapx(
-	struct xfs_inode	*ip,
-	void			__user *arg)
-{
-	struct getbmapx		bmx;
-	int			error;
-
-	if (copy_from_user(&bmx, arg, sizeof(bmx)))
+	if (copy_from_user(&bmx, arg, recsize))
 		return -EFAULT;
 
 	if (bmx.bmv_count < 2)
 		return -EINVAL;
+	if (bmx.bmv_count > ULONG_MAX / recsize)
+		return -ENOMEM;
 
-	if (bmx.bmv_iflags & (~BMV_IF_VALID))
-		return -EINVAL;
+	buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0);
+	if (!buf)
+		return -ENOMEM;
 
-	error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
-			    (__force struct getbmapx *)arg+1);
+	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, buf);
 	if (error)
-		return error;
+		goto out_free_buf;
 
-	/* copy back header */
-	if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
-		return -EFAULT;
+	error = -EFAULT;
+	if (copy_to_user(arg, &bmx, recsize))
+		goto out_free_buf;
+	arg += recsize;
+
+	for (i = 0; i < bmx.bmv_entries; i++) {
+		if (!xfs_getbmap_format(buf + i, arg, recsize))
+			goto out_free_buf;
+		arg += recsize;
+	}
 
+	error = 0;
+out_free_buf:
+	kmem_free(buf);
 	return 0;
 }
 
@@ -1877,10 +1875,8 @@ xfs_file_ioctl(
 
 	case XFS_IOC_GETBMAP:
 	case XFS_IOC_GETBMAPA:
-		return xfs_ioc_getbmap(filp, cmd, arg);
-
 	case XFS_IOC_GETBMAPX:
-		return xfs_ioc_getbmapx(ip, arg);
+		return xfs_ioc_getbmap(filp, cmd, arg);
 
 	case FS_IOC_GETFSMAP:
 		return xfs_ioc_getfsmap(ip, arg);
-- 
2.14.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-18 15:26 ` [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
@ 2017-09-20 13:23   ` Brian Foster
  2017-09-20 14:41     ` Christoph Hellwig
  2017-09-20 23:00   ` Darrick J. Wong
  1 sibling, 1 reply; 16+ messages in thread
From: Brian Foster @ 2017-09-20 13:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Sep 18, 2017 at 08:26:29AM -0700, Christoph Hellwig wrote:
> Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> fixes up various bits that are eventually reported to userspace.
> 
> This patch instead rewrites it to use xfs_iext_lookup_extent and
> xfs_iext_get_extent to iteratively process the extent map.  This not
> only avoids the need to allocate a map for the returned xfs_bmbt_irec
> structures but also greatly simplified the code.
> 
> There are two intentional behavior changes compared to the old code:
> 
>  - the current code reports unwritten extents that don't directly border
>    a written one as unwritten even when not passing the BMV_IF_PREALLOC
>    option, contrary to the documentation.  The new code requires the
>    BMV_IF_PREALLOC flag to report the unwrittent extent bit.
>  - The new code does never merges consecutive extents, unlike the old
>    code that sometimes does it based on the boundaries of the
>    xfs_bmapi_read calls.  Note that the extent merging behavior was
>    entirely undocumented.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
>  1 file changed, 208 insertions(+), 317 deletions(-)
> 
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index cd9a5400ba4f..a87d05978c92 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
...
> @@ -668,147 +599,107 @@ xfs_getbmap(
...
> +		if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> +			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> +
> +			out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
> +

I couldn't quite tell from your previous response, so just to be sure...
is the expectation to set BMV_OF_LAST on the last real extent of the
file, even though we might report a hole entry just below the file ends
with a hole? If so, this seems fine:

Reviewed-by: Brian Foster <bfoster@redhat.com>

> +			if (whichfork != XFS_ATTR_FORK && bno < end &&
> +			    !xfs_getbmap_full(bmv)) {
> +				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
> +						bno, end);
>  			}
> -			bmv->bmv_entries++;
> -			cur_ext++;
> +			break;
>  		}
> -	} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
>  
> - out_free_map:
> -	kmem_free(map);
> - out_unlock_ilock:
> +		if (bno >= first_bno + len)
> +			break;
> +	}
> +
> +out_unlock_ilock:
>  	xfs_iunlock(ip, lock);
> - out_unlock_iolock:
> +out_unlock_iolock:
>  	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
>  
> -	for (i = 0; i < cur_ext; i++) {
> +	for (i = 0; i < bmv->bmv_entries; i++) {
>  		/* format results & advance arg */
>  		error = formatter(&arg, &out[i]);
>  		if (error)
> -- 
> 2.14.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-20 13:23   ` Brian Foster
@ 2017-09-20 14:41     ` Christoph Hellwig
  2017-09-20 17:03       ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-20 14:41 UTC (permalink / raw)
  To: Brian Foster; +Cc: Christoph Hellwig, linux-xfs

On Wed, Sep 20, 2017 at 09:23:18AM -0400, Brian Foster wrote:
> I couldn't quite tell from your previous response, so just to be sure...
> is the expectation to set BMV_OF_LAST on the last real extent of the
> file, even though we might report a hole entry just below the file ends
> with a hole? If so, this seems fine:

Yes.  That's what the old code does.  Note that there isn't anything
in xfsprogs or xfstests that ever looks at BMV_OF_LAST to start with.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-20 14:41     ` Christoph Hellwig
@ 2017-09-20 17:03       ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2017-09-20 17:03 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Brian Foster, linux-xfs

On Wed, Sep 20, 2017 at 04:41:30PM +0200, Christoph Hellwig wrote:
> On Wed, Sep 20, 2017 at 09:23:18AM -0400, Brian Foster wrote:
> > I couldn't quite tell from your previous response, so just to be sure...
> > is the expectation to set BMV_OF_LAST on the last real extent of the
> > file, even though we might report a hole entry just below the file ends
> > with a hole? If so, this seems fine:
> 
> Yes.  That's what the old code does.  Note that there isn't anything
> in xfsprogs or xfstests that ever looks at BMV_OF_LAST to start with.

FWIW xfs_scrub tool has a phase that reads all the file data blocks
(having sorted them in disk order) to look for read errors, and uses
nftw+bmap to map bad blocks back to (file path, offset).  So long as
there are never any real extents returned after the BMV_OF_LAST record,
this semantic is fine...

(Will try to review this series and the one before it today.)

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-18 15:26 ` [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
  2017-09-20 13:23   ` Brian Foster
@ 2017-09-20 23:00   ` Darrick J. Wong
  2017-09-20 23:08     ` Darrick J. Wong
  2017-09-21 13:35     ` Christoph Hellwig
  1 sibling, 2 replies; 16+ messages in thread
From: Darrick J. Wong @ 2017-09-20 23:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Sep 18, 2017 at 08:26:29AM -0700, Christoph Hellwig wrote:
> Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> fixes up various bits that are eventually reported to userspace.
> 
> This patch instead rewrites it to use xfs_iext_lookup_extent and
> xfs_iext_get_extent to iteratively process the extent map.  This not
> only avoids the need to allocate a map for the returned xfs_bmbt_irec
> structures but also greatly simplified the code.
> 
> There are two intentional behavior changes compared to the old code:
> 
>  - the current code reports unwritten extents that don't directly border
>    a written one as unwritten even when not passing the BMV_IF_PREALLOC
>    option, contrary to the documentation.  The new code requires the
>    BMV_IF_PREALLOC flag to report the unwrittent extent bit.
>  - The new code does never merges consecutive extents, unlike the old
>    code that sometimes does it based on the boundaries of the
>    xfs_bmapi_read calls.  Note that the extent merging behavior was
>    entirely undocumented.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
>  1 file changed, 208 insertions(+), 317 deletions(-)
> 
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index cd9a5400ba4f..a87d05978c92 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -403,125 +403,103 @@ xfs_bmap_count_blocks(
>  	return 0;
>  }
>  
> -/*
> - * returns 1 for success, 0 if we failed to map the extent.
> - */
> -STATIC int
> -xfs_getbmapx_fix_eof_hole(
> -	xfs_inode_t		*ip,		/* xfs incore inode pointer */
> -	int			whichfork,
> -	struct getbmapx		*out,		/* output structure */
> -	int			prealloced,	/* this is a file with
> -						 * preallocated data space */
> -	int64_t			end,		/* last block requested */
> -	xfs_fsblock_t		startblock,
> -	bool			moretocome)
> +static int
> +xfs_getbmap_report_one(
> +	struct xfs_inode	*ip,
> +	struct getbmapx		*bmv,
> +	struct getbmapx		*out,
> +	int64_t			bmv_end,
> +	struct xfs_bmbt_irec	*got)
>  {
> -	int64_t			fixlen;
> -	xfs_mount_t		*mp;		/* file system mount point */
> -	xfs_ifork_t		*ifp;		/* inode fork pointer */
> -	xfs_extnum_t		lastx;		/* last extent pointer */
> -	xfs_fileoff_t		fileblock;
> -
> -	if (startblock == HOLESTARTBLOCK) {
> -		mp = ip->i_mount;
> -		out->bmv_block = -1;
> -		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> -		fixlen -= out->bmv_offset;
> -		if (prealloced && out->bmv_offset + out->bmv_length == end) {
> -			/* Came to hole at EOF. Trim it. */
> -			if (fixlen <= 0)
> -				return 0;
> -			out->bmv_length = fixlen;
> -		}
> +	struct getbmapx		*p = out + bmv->bmv_entries;
> +	bool			shared = false, trimmed = false;
> +	int			error;
> +
> +	error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
> +	if (error)
> +		return error;
> +
> +	if (isnullstartblock(got->br_startblock) ||
> +	    got->br_startblock == DELAYSTARTBLOCK) {
> +		/*
> +		 * Delalloc extents that start beyond EOF can occur due to
> +		 * speculative EOF allocation when the delalloc extent is larger
> +		 * than the largest freespace extent at conversion time.  These
> +		 * extents cannot be converted by data writeback, so can exist
> +		 * here even if we are not supposed to be finding delalloc
> +		 * extents.
> +		 */
> +		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
> +			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
> +
> +		p->bmv_oflags |= BMV_OF_DELALLOC;
> +		p->bmv_block = -2;

Could you please turn the special bmv_block values (-2 for delayed
allocation, -1 for hole) into defined constants in xfs_fs.h?

I'm particularly cranky about bmv_block == -1 since there isn't even a
BMV_OF_ flag for holes.

>  	} else {
> -		if (startblock == DELAYSTARTBLOCK)
> -			out->bmv_block = -2;
> -		else
> -			out->bmv_block = xfs_fsb_to_db(ip, startblock);
> -		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
> -		ifp = XFS_IFORK_PTR(ip, whichfork);
> -		if (!moretocome &&
> -		    xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
> -		   (lastx == xfs_iext_count(ifp) - 1))
> -			out->bmv_oflags |= BMV_OF_LAST;
> +		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
>  	}
>  
> -	return 1;
> +	if (got->br_state == XFS_EXT_UNWRITTEN &&
> +	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
> +		p->bmv_oflags |= BMV_OF_PREALLOC;

Am I the only one who thought (from the xfs_bmap manpage) that you're
supposed to BMV_IF_PREALLOC if you want the output to contain prealloc
extents, and omit the flag if you don't want them?

Versus what the kernel actually does, which seems to be to merge extents
together if you don't pass the flag:

$ xfs_io -c 'bmap -vvvv' moo
moo:
 EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL
   0: [0..39]:         335288488..335288527  7 (736424..736463)    40

$ xfs_io -c 'bmap -vvvv -p' moo
moo:
 EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL FLAGS
   0: [0..7]:          335288488..335288495  7 (736424..736431)     8 000000
   1: [8..39]:         335288496..335288527  7 (736432..736463)    32 010000

Eh.  I guess the old code would report prealloc extents, it just doesn't
flag them, so this is ok.

> +
> +	if (shared)
> +		p->bmv_oflags |= BMV_OF_SHARED;
> +
> +	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
> +	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
> +
> +	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> +	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> +	bmv->bmv_entries++;
> +	return 0;
>  }
>  
> -/* Adjust the reported bmap around shared/unshared extent transitions. */
> -STATIC int
> -xfs_getbmap_adjust_shared(
> -	struct xfs_inode		*ip,
> -	int				whichfork,
> -	struct xfs_bmbt_irec		*map,
> -	struct getbmapx			*out,
> -	struct xfs_bmbt_irec		*next_map)
> +static void
> +xfs_getbmap_report_hole(
> +	struct xfs_inode	*ip,
> +	struct getbmapx		*bmv,
> +	struct getbmapx		*out,
> +	int64_t			bmv_end,
> +	xfs_fileoff_t		bno,
> +	xfs_fileoff_t		end)
>  {
> -	struct xfs_mount		*mp = ip->i_mount;
> -	xfs_agnumber_t			agno;
> -	xfs_agblock_t			agbno;
> -	xfs_agblock_t			ebno;
> -	xfs_extlen_t			elen;
> -	xfs_extlen_t			nlen;
> -	int				error;
> +	struct getbmapx		*p = out + bmv->bmv_entries;
>  
> -	next_map->br_startblock = NULLFSBLOCK;
> -	next_map->br_startoff = NULLFILEOFF;
> -	next_map->br_blockcount = 0;
> +	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
> +		return;
>  
> -	/* Only written data blocks can be shared. */
> -	if (!xfs_is_reflink_inode(ip) ||
> -	    whichfork != XFS_DATA_FORK ||
> -	    !xfs_bmap_is_real_extent(map))
> -		return 0;
> +	p->bmv_block = -1;
> +	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
> +	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
>  
> -	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
> -	agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
> -	error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
> -			map->br_blockcount, &ebno, &elen, true);
> -	if (error)
> -		return error;
> +	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> +	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> +	bmv->bmv_entries++;
> +}
>  
> -	if (ebno == NULLAGBLOCK) {
> -		/* No shared blocks at all. */
> -		return 0;
> -	} else if (agbno == ebno) {
> -		/*
> -		 * Shared extent at (agbno, elen).  Shrink the reported
> -		 * extent length and prepare to move the start of map[i]
> -		 * to agbno+elen, with the aim of (re)formatting the new
> -		 * map[i] the next time through the inner loop.
> -		 */
> -		out->bmv_length = XFS_FSB_TO_BB(mp, elen);
> -		out->bmv_oflags |= BMV_OF_SHARED;
> -		if (elen != map->br_blockcount) {
> -			*next_map = *map;
> -			next_map->br_startblock += elen;
> -			next_map->br_startoff += elen;
> -			next_map->br_blockcount -= elen;
> -		}
> -		map->br_blockcount -= elen;
> -	} else {
> -		/*
> -		 * There's an unshared extent (agbno, ebno - agbno)
> -		 * followed by shared extent at (ebno, elen).  Shrink
> -		 * the reported extent length to cover only the unshared
> -		 * extent and prepare to move up the start of map[i] to
> -		 * ebno, with the aim of (re)formatting the new map[i]
> -		 * the next time through the inner loop.
> -		 */
> -		*next_map = *map;
> -		nlen = ebno - agbno;
> -		out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
> -		next_map->br_startblock += nlen;
> -		next_map->br_startoff += nlen;
> -		next_map->br_blockcount -= nlen;
> -		map->br_blockcount -= nlen;
> -	}
> +static inline bool
> +xfs_getbmap_full(
> +	struct getbmapx		*bmv)
> +{
> +	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
> +}
>  
> -	return 0;
> +static bool
> +xfs_getbmap_next_rec(
> +	struct xfs_bmbt_irec	*rec,
> +	xfs_fileoff_t		total_end)
> +{
> +	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
> +
> +	if (end == total_end)
> +		return false;
> +
> +	rec->br_startoff += rec->br_blockcount;
> +	if (!isnullstartblock(rec->br_startblock) &&
> +	    rec->br_startblock != DELAYSTARTBLOCK)
> +		rec->br_startblock += rec->br_blockcount;
> +	rec->br_blockcount = total_end - end;
> +	return true;
>  }
>  
>  /*
> @@ -538,119 +516,72 @@ xfs_getbmap(
>  	xfs_bmap_format_t	formatter,	/* format to user */
>  	void			*arg)		/* formatter arg */
>  {
> -	int64_t			bmvend;		/* last block requested */
> -	int			error = 0;	/* return value */
> -	int64_t			fixlen;		/* length for -1 case */
> -	int			i;		/* extent number */
> -	int			lock;		/* lock state */
> -	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
> -	xfs_mount_t		*mp;		/* file system mount point */
> -	int			nex;		/* # of user extents can do */
> -	int			subnex;		/* # of bmapi's can do */
> -	int			nmap;		/* number of map entries */
> -	struct getbmapx		*out;		/* output structure */
> -	int			whichfork;	/* data or attr fork */
> -	int			prealloced;	/* this is a file with
> -						 * preallocated data space */
> -	int			iflags;		/* interface flags */
> -	int			bmapi_flags;	/* flags for xfs_bmapi */
> -	int			cur_ext = 0;
> -	struct xfs_bmbt_irec	inject_map;
> -
> -	mp = ip->i_mount;
> -	iflags = bmv->bmv_iflags;
> +	struct xfs_mount	*mp = ip->i_mount;
> +	int			iflags = bmv->bmv_iflags;
> +	int			whichfork, lock, i, error = 0;
> +	int64_t			bmv_end, max_len;
> +	xfs_fileoff_t		bno, first_bno;
> +	struct xfs_ifork	*ifp;
> +	struct getbmapx		*out;
> +	struct xfs_bmbt_irec	got, rec;
> +	xfs_filblks_t		len;
> +	xfs_extnum_t		idx;
>  
>  #ifndef DEBUG
>  	/* Only allow CoW fork queries if we're debugging. */
>  	if (iflags & BMV_IF_COWFORK)
>  		return -EINVAL;
>  #endif
> +
>  	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
>  		return -EINVAL;
>  
> +	if (bmv->bmv_count <= 1)
> +		return -EINVAL;
> +	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> +		return -ENOMEM;
> +
> +	if (bmv->bmv_length < -1)
> +		return -EINVAL;
> +
> +	bmv->bmv_entries = 0;
> +	if (bmv->bmv_length == 0)
> +		return 0;
> +
> +	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);

I'm still wondering why we allocate a potentially large getbmapx buffer,
fill it out, and only then format the results to userspace?  I think
getbmap (the ioctl) is now the only user of these functions, so can't
we just call the formatter directly from _getbmap_report_one and
_getbmap_report_hole, like what getfsmap does?

(I also feel like I've asked this before, so apologies if I'm merely
forgetting the answer.)

--D

> +	if (!out)
> +		return -ENOMEM;
> +
>  	if (iflags & BMV_IF_ATTRFORK)
>  		whichfork = XFS_ATTR_FORK;
>  	else if (iflags & BMV_IF_COWFORK)
>  		whichfork = XFS_COW_FORK;
>  	else
>  		whichfork = XFS_DATA_FORK;
> +	ifp = XFS_IFORK_PTR(ip, whichfork);
>  
> +	xfs_ilock(ip, XFS_IOLOCK_SHARED);
>  	switch (whichfork) {
>  	case XFS_ATTR_FORK:
> -		if (XFS_IFORK_Q(ip)) {
> -			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
> -			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
> -			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
> -				return -EINVAL;
> -		} else if (unlikely(
> -			   ip->i_d.di_aformat != 0 &&
> -			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
> -			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
> -					 ip->i_mount);
> -			return -EFSCORRUPTED;
> -		}
> +		if (!XFS_IFORK_Q(ip))
> +			goto out_unlock_iolock;
>  
> -		prealloced = 0;
> -		fixlen = 1LL << 32;
> +		max_len = 1LL << 32;
> +		lock = xfs_ilock_attr_map_shared(ip);
>  		break;
>  	case XFS_COW_FORK:
> -		if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
> -			return -EINVAL;
> +		/* No CoW fork? Just return */
> +		if (!ifp)
> +			goto out_unlock_iolock;
>  
> -		if (xfs_get_cowextsz_hint(ip)) {
> -			prealloced = 1;
> -			fixlen = mp->m_super->s_maxbytes;
> -		} else {
> -			prealloced = 0;
> -			fixlen = XFS_ISIZE(ip);
> -		}
> -		break;
> -	default:
> -		/* Local format data forks report no extents. */
> -		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> -			bmv->bmv_entries = 0;
> -			return 0;
> -		}
> -		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
> -		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
> -			return -EINVAL;
> +		if (xfs_get_cowextsz_hint(ip))
> +			max_len = mp->m_super->s_maxbytes;
> +		else
> +			max_len = XFS_ISIZE(ip);
>  
> -		if (xfs_get_extsz_hint(ip) ||
> -		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
> -			prealloced = 1;
> -			fixlen = mp->m_super->s_maxbytes;
> -		} else {
> -			prealloced = 0;
> -			fixlen = XFS_ISIZE(ip);
> -		}
> +		lock = XFS_ILOCK_SHARED;
> +		xfs_ilock(ip, lock);
>  		break;
> -	}
> -
> -	if (bmv->bmv_length == -1) {
> -		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
> -		bmv->bmv_length =
> -			max_t(int64_t, fixlen - bmv->bmv_offset, 0);
> -	} else if (bmv->bmv_length == 0) {
> -		bmv->bmv_entries = 0;
> -		return 0;
> -	} else if (bmv->bmv_length < 0) {
> -		return -EINVAL;
> -	}
> -
> -	nex = bmv->bmv_count - 1;
> -	if (nex <= 0)
> -		return -EINVAL;
> -	bmvend = bmv->bmv_offset + bmv->bmv_length;
> -
> -
> -	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> -		return -ENOMEM;
> -	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> -	if (!out)
> -		return -ENOMEM;
> -
> -	xfs_ilock(ip, XFS_IOLOCK_SHARED);
> -	switch (whichfork) {
>  	case XFS_DATA_FORK:
>  		if (!(iflags & BMV_IF_DELALLOC) &&
>  		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
> @@ -668,147 +599,107 @@ xfs_getbmap(
>  			 */
>  		}
>  
> +		if (xfs_get_extsz_hint(ip) ||
> +		    (ip->i_d.di_flags &
> +		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
> +			max_len = mp->m_super->s_maxbytes;
> +		else
> +			max_len = XFS_ISIZE(ip);
> +
>  		lock = xfs_ilock_data_map_shared(ip);
>  		break;
> -	case XFS_COW_FORK:
> -		lock = XFS_ILOCK_SHARED;
> -		xfs_ilock(ip, lock);
> -		break;
> -	case XFS_ATTR_FORK:
> -		lock = xfs_ilock_attr_map_shared(ip);
> -		break;
>  	}
>  
> -	/*
> -	 * Don't let nex be bigger than the number of extents
> -	 * we can have assuming alternating holes and real extents.
> -	 */
> -	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
> -		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
> -
> -	bmapi_flags = xfs_bmapi_aflag(whichfork);
> -	if (!(iflags & BMV_IF_PREALLOC))
> -		bmapi_flags |= XFS_BMAPI_IGSTATE;
> -
> -	/*
> -	 * Allocate enough space to handle "subnex" maps at a time.
> -	 */
> -	error = -ENOMEM;
> -	subnex = 16;
> -	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
> -	if (!map)
> +	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
> +	case XFS_DINODE_FMT_EXTENTS:
> +	case XFS_DINODE_FMT_BTREE:
> +		break;
> +	case XFS_DINODE_FMT_LOCAL:
> +		/* Local format inode forks report no extents. */
>  		goto out_unlock_ilock;
> +	default:
> +		error = -EINVAL;
> +		goto out_unlock_ilock;
> +	}
>  
> -	bmv->bmv_entries = 0;
> -
> -	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
> -	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
> -		error = 0;
> -		goto out_free_map;
> +	if (bmv->bmv_length == -1) {
> +		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
> +		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
>  	}
>  
> -	do {
> -		nmap = (nex> subnex) ? subnex : nex;
> -		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
> -				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
> -				       map, &nmap, bmapi_flags);
> -		if (error)
> -			goto out_free_map;
> -		ASSERT(nmap <= subnex);
> -
> -		for (i = 0; i < nmap && bmv->bmv_length &&
> -				cur_ext < bmv->bmv_count - 1; i++) {
> -			out[cur_ext].bmv_oflags = 0;
> -			if (map[i].br_state == XFS_EXT_UNWRITTEN)
> -				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
> -			else if (map[i].br_startblock == DELAYSTARTBLOCK)
> -				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
> -			out[cur_ext].bmv_offset =
> -				XFS_FSB_TO_BB(mp, map[i].br_startoff);
> -			out[cur_ext].bmv_length =
> -				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
> -			out[cur_ext].bmv_unused1 = 0;
> -			out[cur_ext].bmv_unused2 = 0;
> +	bmv_end = bmv->bmv_offset + bmv->bmv_length;
>  
> -			/*
> -			 * delayed allocation extents that start beyond EOF can
> -			 * occur due to speculative EOF allocation when the
> -			 * delalloc extent is larger than the largest freespace
> -			 * extent at conversion time. These extents cannot be
> -			 * converted by data writeback, so can exist here even
> -			 * if we are not supposed to be finding delalloc
> -			 * extents.
> -			 */
> -			if (map[i].br_startblock == DELAYSTARTBLOCK &&
> -			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
> -				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
> -
> -                        if (map[i].br_startblock == HOLESTARTBLOCK &&
> -			    whichfork == XFS_ATTR_FORK) {
> -				/* came to the end of attribute fork */
> -				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
> -				goto out_free_map;
> -			}
> +	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
> +	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
>  
> -			/* Is this a shared block? */
> -			error = xfs_getbmap_adjust_shared(ip, whichfork,
> -					&map[i], &out[cur_ext], &inject_map);
> -			if (error)
> -				goto out_free_map;
> +	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> +		error = xfs_iread_extents(NULL, ip, whichfork);
> +		if (error)
> +			goto out_unlock_ilock;
> +	}
>  
> -			if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
> -					&out[cur_ext], prealloced, bmvend,
> -					map[i].br_startblock,
> -					inject_map.br_startblock != NULLFSBLOCK))
> -				goto out_free_map;
> +	if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
> +		/*
> +		 * Report a whole-file hole if the delalloc flag is set to
> +		 * stay compatible with the old implementation.
> +		 */
> +		if (iflags & BMV_IF_DELALLOC)
> +			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
> +					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> +		goto out_unlock_ilock;
> +	}
>  
> -			bmv->bmv_offset =
> -				out[cur_ext].bmv_offset +
> -				out[cur_ext].bmv_length;
> -			bmv->bmv_length =
> -				max_t(int64_t, 0, bmvend - bmv->bmv_offset);
> +	while (!xfs_getbmap_full(bmv)) {
> +		xfs_trim_extent(&got, first_bno, len);
>  
> -			/*
> -			 * In case we don't want to return the hole,
> -			 * don't increase cur_ext so that we can reuse
> -			 * it in the next loop.
> -			 */
> -			if ((iflags & BMV_IF_NO_HOLES) &&
> -			    map[i].br_startblock == HOLESTARTBLOCK) {
> -				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
> -				continue;
> -			}
> +		/*
> +		 * Report an entry for a hole if this extent doesn't directly
> +		 * follow the previous one.
> +		 */
> +		if (got.br_startoff > bno) {
> +			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
> +					got.br_startoff);
> +			if (xfs_getbmap_full(bmv))
> +				break;
> +		}
>  
> -			/*
> -			 * In order to report shared extents accurately,
> -			 * we report each distinct shared/unshared part
> -			 * of a single bmbt record using multiple bmap
> -			 * extents.  To make that happen, we iterate the
> -			 * same map array item multiple times, each
> -			 * time trimming out the subextent that we just
> -			 * reported.
> -			 *
> -			 * Because of this, we must check the out array
> -			 * index (cur_ext) directly against bmv_count-1
> -			 * to avoid overflows.
> -			 */
> -			if (inject_map.br_startblock != NULLFSBLOCK) {
> -				map[i] = inject_map;
> -				i--;
> +		/*
> +		 * In order to report shared extents accurately, we report each
> +		 * distinct shared / unshared part of a single bmbt record with
> +		 * an individual getbmapx record.
> +		 */
> +		bno = got.br_startoff + got.br_blockcount;
> +		rec = got;
> +		do {
> +			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
> +					&rec);
> +			if (error || xfs_getbmap_full(bmv))
> +				goto out_unlock_ilock;
> +		} while (xfs_getbmap_next_rec(&rec, bno));
> +
> +		if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> +			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> +
> +			out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
> +
> +			if (whichfork != XFS_ATTR_FORK && bno < end &&
> +			    !xfs_getbmap_full(bmv)) {
> +				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
> +						bno, end);
>  			}
> -			bmv->bmv_entries++;
> -			cur_ext++;
> +			break;
>  		}
> -	} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
>  
> - out_free_map:
> -	kmem_free(map);
> - out_unlock_ilock:
> +		if (bno >= first_bno + len)
> +			break;
> +	}
> +
> +out_unlock_ilock:
>  	xfs_iunlock(ip, lock);
> - out_unlock_iolock:
> +out_unlock_iolock:
>  	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
>  
> -	for (i = 0; i < cur_ext; i++) {
> +	for (i = 0; i < bmv->bmv_entries; i++) {
>  		/* format results & advance arg */
>  		error = formatter(&arg, &out[i]);
>  		if (error)
> -- 
> 2.14.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-20 23:00   ` Darrick J. Wong
@ 2017-09-20 23:08     ` Darrick J. Wong
  2017-09-21 13:36       ` Christoph Hellwig
  2017-09-21 13:35     ` Christoph Hellwig
  1 sibling, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2017-09-20 23:08 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, Sep 20, 2017 at 04:00:36PM -0700, Darrick J. Wong wrote:
> On Mon, Sep 18, 2017 at 08:26:29AM -0700, Christoph Hellwig wrote:
> > Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> > fixes up various bits that are eventually reported to userspace.
> > 
> > This patch instead rewrites it to use xfs_iext_lookup_extent and
> > xfs_iext_get_extent to iteratively process the extent map.  This not
> > only avoids the need to allocate a map for the returned xfs_bmbt_irec
> > structures but also greatly simplified the code.
> > 
> > There are two intentional behavior changes compared to the old code:
> > 
> >  - the current code reports unwritten extents that don't directly border
> >    a written one as unwritten even when not passing the BMV_IF_PREALLOC
> >    option, contrary to the documentation.  The new code requires the
> >    BMV_IF_PREALLOC flag to report the unwrittent extent bit.
> >  - The new code does never merges consecutive extents, unlike the old
> >    code that sometimes does it based on the boundaries of the
> >    xfs_bmapi_read calls.  Note that the extent merging behavior was
> >    entirely undocumented.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
> >  1 file changed, 208 insertions(+), 317 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> > index cd9a5400ba4f..a87d05978c92 100644
> > --- a/fs/xfs/xfs_bmap_util.c
> > +++ b/fs/xfs/xfs_bmap_util.c
> > @@ -403,125 +403,103 @@ xfs_bmap_count_blocks(
> >  	return 0;
> >  }
> >  
> > -/*
> > - * returns 1 for success, 0 if we failed to map the extent.
> > - */
> > -STATIC int
> > -xfs_getbmapx_fix_eof_hole(
> > -	xfs_inode_t		*ip,		/* xfs incore inode pointer */
> > -	int			whichfork,
> > -	struct getbmapx		*out,		/* output structure */
> > -	int			prealloced,	/* this is a file with
> > -						 * preallocated data space */
> > -	int64_t			end,		/* last block requested */
> > -	xfs_fsblock_t		startblock,
> > -	bool			moretocome)
> > +static int
> > +xfs_getbmap_report_one(
> > +	struct xfs_inode	*ip,
> > +	struct getbmapx		*bmv,
> > +	struct getbmapx		*out,
> > +	int64_t			bmv_end,
> > +	struct xfs_bmbt_irec	*got)
> >  {
> > -	int64_t			fixlen;
> > -	xfs_mount_t		*mp;		/* file system mount point */
> > -	xfs_ifork_t		*ifp;		/* inode fork pointer */
> > -	xfs_extnum_t		lastx;		/* last extent pointer */
> > -	xfs_fileoff_t		fileblock;
> > -
> > -	if (startblock == HOLESTARTBLOCK) {
> > -		mp = ip->i_mount;
> > -		out->bmv_block = -1;
> > -		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> > -		fixlen -= out->bmv_offset;
> > -		if (prealloced && out->bmv_offset + out->bmv_length == end) {
> > -			/* Came to hole at EOF. Trim it. */
> > -			if (fixlen <= 0)
> > -				return 0;
> > -			out->bmv_length = fixlen;
> > -		}
> > +	struct getbmapx		*p = out + bmv->bmv_entries;
> > +	bool			shared = false, trimmed = false;
> > +	int			error;
> > +
> > +	error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
> > +	if (error)
> > +		return error;
> > +
> > +	if (isnullstartblock(got->br_startblock) ||
> > +	    got->br_startblock == DELAYSTARTBLOCK) {
> > +		/*
> > +		 * Delalloc extents that start beyond EOF can occur due to
> > +		 * speculative EOF allocation when the delalloc extent is larger
> > +		 * than the largest freespace extent at conversion time.  These
> > +		 * extents cannot be converted by data writeback, so can exist
> > +		 * here even if we are not supposed to be finding delalloc
> > +		 * extents.
> > +		 */
> > +		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
> > +			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
> > +
> > +		p->bmv_oflags |= BMV_OF_DELALLOC;
> > +		p->bmv_block = -2;
> 
> Could you please turn the special bmv_block values (-2 for delayed
> allocation, -1 for hole) into defined constants in xfs_fs.h?
> 
> I'm particularly cranky about bmv_block == -1 since there isn't even a
> BMV_OF_ flag for holes.
> 
> >  	} else {
> > -		if (startblock == DELAYSTARTBLOCK)
> > -			out->bmv_block = -2;
> > -		else
> > -			out->bmv_block = xfs_fsb_to_db(ip, startblock);
> > -		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
> > -		ifp = XFS_IFORK_PTR(ip, whichfork);
> > -		if (!moretocome &&
> > -		    xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
> > -		   (lastx == xfs_iext_count(ifp) - 1))
> > -			out->bmv_oflags |= BMV_OF_LAST;
> > +		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
> >  	}
> >  
> > -	return 1;
> > +	if (got->br_state == XFS_EXT_UNWRITTEN &&
> > +	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
> > +		p->bmv_oflags |= BMV_OF_PREALLOC;
> 
> Am I the only one who thought (from the xfs_bmap manpage) that you're
> supposed to BMV_IF_PREALLOC if you want the output to contain prealloc
> extents, and omit the flag if you don't want them?
> 
> Versus what the kernel actually does, which seems to be to merge extents
> together if you don't pass the flag:
> 
> $ xfs_io -c 'bmap -vvvv' moo
> moo:
>  EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL
>    0: [0..39]:         335288488..335288527  7 (736424..736463)    40
> 
> $ xfs_io -c 'bmap -vvvv -p' moo
> moo:
>  EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL FLAGS
>    0: [0..7]:          335288488..335288495  7 (736424..736431)     8 000000
>    1: [8..39]:         335288496..335288527  7 (736432..736463)    32 010000
> 
> Eh.  I guess the old code would report prealloc extents, it just doesn't
> flag them, so this is ok.
> 
> > +
> > +	if (shared)
> > +		p->bmv_oflags |= BMV_OF_SHARED;
> > +
> > +	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
> > +	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
> > +
> > +	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> > +	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> > +	bmv->bmv_entries++;
> > +	return 0;
> >  }
> >  
> > -/* Adjust the reported bmap around shared/unshared extent transitions. */
> > -STATIC int
> > -xfs_getbmap_adjust_shared(
> > -	struct xfs_inode		*ip,
> > -	int				whichfork,
> > -	struct xfs_bmbt_irec		*map,
> > -	struct getbmapx			*out,
> > -	struct xfs_bmbt_irec		*next_map)
> > +static void
> > +xfs_getbmap_report_hole(
> > +	struct xfs_inode	*ip,
> > +	struct getbmapx		*bmv,
> > +	struct getbmapx		*out,
> > +	int64_t			bmv_end,
> > +	xfs_fileoff_t		bno,
> > +	xfs_fileoff_t		end)
> >  {
> > -	struct xfs_mount		*mp = ip->i_mount;
> > -	xfs_agnumber_t			agno;
> > -	xfs_agblock_t			agbno;
> > -	xfs_agblock_t			ebno;
> > -	xfs_extlen_t			elen;
> > -	xfs_extlen_t			nlen;
> > -	int				error;
> > +	struct getbmapx		*p = out + bmv->bmv_entries;
> >  
> > -	next_map->br_startblock = NULLFSBLOCK;
> > -	next_map->br_startoff = NULLFILEOFF;
> > -	next_map->br_blockcount = 0;
> > +	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
> > +		return;
> >  
> > -	/* Only written data blocks can be shared. */
> > -	if (!xfs_is_reflink_inode(ip) ||
> > -	    whichfork != XFS_DATA_FORK ||
> > -	    !xfs_bmap_is_real_extent(map))
> > -		return 0;
> > +	p->bmv_block = -1;
> > +	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
> > +	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
> >  
> > -	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
> > -	agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
> > -	error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
> > -			map->br_blockcount, &ebno, &elen, true);
> > -	if (error)
> > -		return error;
> > +	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> > +	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> > +	bmv->bmv_entries++;
> > +}
> >  
> > -	if (ebno == NULLAGBLOCK) {
> > -		/* No shared blocks at all. */
> > -		return 0;
> > -	} else if (agbno == ebno) {
> > -		/*
> > -		 * Shared extent at (agbno, elen).  Shrink the reported
> > -		 * extent length and prepare to move the start of map[i]
> > -		 * to agbno+elen, with the aim of (re)formatting the new
> > -		 * map[i] the next time through the inner loop.
> > -		 */
> > -		out->bmv_length = XFS_FSB_TO_BB(mp, elen);
> > -		out->bmv_oflags |= BMV_OF_SHARED;
> > -		if (elen != map->br_blockcount) {
> > -			*next_map = *map;
> > -			next_map->br_startblock += elen;
> > -			next_map->br_startoff += elen;
> > -			next_map->br_blockcount -= elen;
> > -		}
> > -		map->br_blockcount -= elen;
> > -	} else {
> > -		/*
> > -		 * There's an unshared extent (agbno, ebno - agbno)
> > -		 * followed by shared extent at (ebno, elen).  Shrink
> > -		 * the reported extent length to cover only the unshared
> > -		 * extent and prepare to move up the start of map[i] to
> > -		 * ebno, with the aim of (re)formatting the new map[i]
> > -		 * the next time through the inner loop.
> > -		 */
> > -		*next_map = *map;
> > -		nlen = ebno - agbno;
> > -		out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
> > -		next_map->br_startblock += nlen;
> > -		next_map->br_startoff += nlen;
> > -		next_map->br_blockcount -= nlen;
> > -		map->br_blockcount -= nlen;
> > -	}
> > +static inline bool
> > +xfs_getbmap_full(
> > +	struct getbmapx		*bmv)
> > +{
> > +	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
> > +}
> >  
> > -	return 0;
> > +static bool
> > +xfs_getbmap_next_rec(
> > +	struct xfs_bmbt_irec	*rec,
> > +	xfs_fileoff_t		total_end)
> > +{
> > +	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
> > +
> > +	if (end == total_end)
> > +		return false;
> > +
> > +	rec->br_startoff += rec->br_blockcount;
> > +	if (!isnullstartblock(rec->br_startblock) &&
> > +	    rec->br_startblock != DELAYSTARTBLOCK)
> > +		rec->br_startblock += rec->br_blockcount;
> > +	rec->br_blockcount = total_end - end;
> > +	return true;
> >  }
> >  
> >  /*
> > @@ -538,119 +516,72 @@ xfs_getbmap(
> >  	xfs_bmap_format_t	formatter,	/* format to user */
> >  	void			*arg)		/* formatter arg */
> >  {
> > -	int64_t			bmvend;		/* last block requested */
> > -	int			error = 0;	/* return value */
> > -	int64_t			fixlen;		/* length for -1 case */
> > -	int			i;		/* extent number */
> > -	int			lock;		/* lock state */
> > -	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
> > -	xfs_mount_t		*mp;		/* file system mount point */
> > -	int			nex;		/* # of user extents can do */
> > -	int			subnex;		/* # of bmapi's can do */
> > -	int			nmap;		/* number of map entries */
> > -	struct getbmapx		*out;		/* output structure */
> > -	int			whichfork;	/* data or attr fork */
> > -	int			prealloced;	/* this is a file with
> > -						 * preallocated data space */
> > -	int			iflags;		/* interface flags */
> > -	int			bmapi_flags;	/* flags for xfs_bmapi */
> > -	int			cur_ext = 0;
> > -	struct xfs_bmbt_irec	inject_map;
> > -
> > -	mp = ip->i_mount;
> > -	iflags = bmv->bmv_iflags;
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	int			iflags = bmv->bmv_iflags;
> > +	int			whichfork, lock, i, error = 0;
> > +	int64_t			bmv_end, max_len;
> > +	xfs_fileoff_t		bno, first_bno;
> > +	struct xfs_ifork	*ifp;
> > +	struct getbmapx		*out;
> > +	struct xfs_bmbt_irec	got, rec;
> > +	xfs_filblks_t		len;
> > +	xfs_extnum_t		idx;
> >  
> >  #ifndef DEBUG
> >  	/* Only allow CoW fork queries if we're debugging. */
> >  	if (iflags & BMV_IF_COWFORK)
> >  		return -EINVAL;
> >  #endif
> > +
> >  	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
> >  		return -EINVAL;
> >  
> > +	if (bmv->bmv_count <= 1)
> > +		return -EINVAL;
> > +	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> > +		return -ENOMEM;
> > +
> > +	if (bmv->bmv_length < -1)
> > +		return -EINVAL;
> > +
> > +	bmv->bmv_entries = 0;
> > +	if (bmv->bmv_length == 0)
> > +		return 0;
> > +
> > +	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> 
> I'm still wondering why we allocate a potentially large getbmapx buffer,
> fill it out, and only then format the results to userspace?  I think
> getbmap (the ioctl) is now the only user of these functions, so can't
> we just call the formatter directly from _getbmap_report_one and
> _getbmap_report_hole, like what getfsmap does?
> 
> (I also feel like I've asked this before, so apologies if I'm merely
> forgetting the answer.)

Oh right, it's because we have the inode locked, and copying things to
userspace could incur a page fault, which we can't risk with the inode
locked because some malicious person could create a fragmented file with
a bmap request header at the start of the file, mmap the file, and call
bmap on the fragmented file with the pointer being the mmap region.

Sorry for the noise.  Crankiness about -1 and -2 still apply.

--D

> 
> --D
> 
> > +	if (!out)
> > +		return -ENOMEM;
> > +
> >  	if (iflags & BMV_IF_ATTRFORK)
> >  		whichfork = XFS_ATTR_FORK;
> >  	else if (iflags & BMV_IF_COWFORK)
> >  		whichfork = XFS_COW_FORK;
> >  	else
> >  		whichfork = XFS_DATA_FORK;
> > +	ifp = XFS_IFORK_PTR(ip, whichfork);
> >  
> > +	xfs_ilock(ip, XFS_IOLOCK_SHARED);
> >  	switch (whichfork) {
> >  	case XFS_ATTR_FORK:
> > -		if (XFS_IFORK_Q(ip)) {
> > -			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
> > -			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
> > -			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
> > -				return -EINVAL;
> > -		} else if (unlikely(
> > -			   ip->i_d.di_aformat != 0 &&
> > -			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
> > -			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
> > -					 ip->i_mount);
> > -			return -EFSCORRUPTED;
> > -		}
> > +		if (!XFS_IFORK_Q(ip))
> > +			goto out_unlock_iolock;
> >  
> > -		prealloced = 0;
> > -		fixlen = 1LL << 32;
> > +		max_len = 1LL << 32;
> > +		lock = xfs_ilock_attr_map_shared(ip);
> >  		break;
> >  	case XFS_COW_FORK:
> > -		if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
> > -			return -EINVAL;
> > +		/* No CoW fork? Just return */
> > +		if (!ifp)
> > +			goto out_unlock_iolock;
> >  
> > -		if (xfs_get_cowextsz_hint(ip)) {
> > -			prealloced = 1;
> > -			fixlen = mp->m_super->s_maxbytes;
> > -		} else {
> > -			prealloced = 0;
> > -			fixlen = XFS_ISIZE(ip);
> > -		}
> > -		break;
> > -	default:
> > -		/* Local format data forks report no extents. */
> > -		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> > -			bmv->bmv_entries = 0;
> > -			return 0;
> > -		}
> > -		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
> > -		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
> > -			return -EINVAL;
> > +		if (xfs_get_cowextsz_hint(ip))
> > +			max_len = mp->m_super->s_maxbytes;
> > +		else
> > +			max_len = XFS_ISIZE(ip);
> >  
> > -		if (xfs_get_extsz_hint(ip) ||
> > -		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
> > -			prealloced = 1;
> > -			fixlen = mp->m_super->s_maxbytes;
> > -		} else {
> > -			prealloced = 0;
> > -			fixlen = XFS_ISIZE(ip);
> > -		}
> > +		lock = XFS_ILOCK_SHARED;
> > +		xfs_ilock(ip, lock);
> >  		break;
> > -	}
> > -
> > -	if (bmv->bmv_length == -1) {
> > -		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
> > -		bmv->bmv_length =
> > -			max_t(int64_t, fixlen - bmv->bmv_offset, 0);
> > -	} else if (bmv->bmv_length == 0) {
> > -		bmv->bmv_entries = 0;
> > -		return 0;
> > -	} else if (bmv->bmv_length < 0) {
> > -		return -EINVAL;
> > -	}
> > -
> > -	nex = bmv->bmv_count - 1;
> > -	if (nex <= 0)
> > -		return -EINVAL;
> > -	bmvend = bmv->bmv_offset + bmv->bmv_length;
> > -
> > -
> > -	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> > -		return -ENOMEM;
> > -	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> > -	if (!out)
> > -		return -ENOMEM;
> > -
> > -	xfs_ilock(ip, XFS_IOLOCK_SHARED);
> > -	switch (whichfork) {
> >  	case XFS_DATA_FORK:
> >  		if (!(iflags & BMV_IF_DELALLOC) &&
> >  		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
> > @@ -668,147 +599,107 @@ xfs_getbmap(
> >  			 */
> >  		}
> >  
> > +		if (xfs_get_extsz_hint(ip) ||
> > +		    (ip->i_d.di_flags &
> > +		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
> > +			max_len = mp->m_super->s_maxbytes;
> > +		else
> > +			max_len = XFS_ISIZE(ip);
> > +
> >  		lock = xfs_ilock_data_map_shared(ip);
> >  		break;
> > -	case XFS_COW_FORK:
> > -		lock = XFS_ILOCK_SHARED;
> > -		xfs_ilock(ip, lock);
> > -		break;
> > -	case XFS_ATTR_FORK:
> > -		lock = xfs_ilock_attr_map_shared(ip);
> > -		break;
> >  	}
> >  
> > -	/*
> > -	 * Don't let nex be bigger than the number of extents
> > -	 * we can have assuming alternating holes and real extents.
> > -	 */
> > -	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
> > -		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
> > -
> > -	bmapi_flags = xfs_bmapi_aflag(whichfork);
> > -	if (!(iflags & BMV_IF_PREALLOC))
> > -		bmapi_flags |= XFS_BMAPI_IGSTATE;
> > -
> > -	/*
> > -	 * Allocate enough space to handle "subnex" maps at a time.
> > -	 */
> > -	error = -ENOMEM;
> > -	subnex = 16;
> > -	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
> > -	if (!map)
> > +	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
> > +	case XFS_DINODE_FMT_EXTENTS:
> > +	case XFS_DINODE_FMT_BTREE:
> > +		break;
> > +	case XFS_DINODE_FMT_LOCAL:
> > +		/* Local format inode forks report no extents. */
> >  		goto out_unlock_ilock;
> > +	default:
> > +		error = -EINVAL;
> > +		goto out_unlock_ilock;
> > +	}
> >  
> > -	bmv->bmv_entries = 0;
> > -
> > -	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
> > -	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
> > -		error = 0;
> > -		goto out_free_map;
> > +	if (bmv->bmv_length == -1) {
> > +		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
> > +		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
> >  	}
> >  
> > -	do {
> > -		nmap = (nex> subnex) ? subnex : nex;
> > -		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
> > -				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
> > -				       map, &nmap, bmapi_flags);
> > -		if (error)
> > -			goto out_free_map;
> > -		ASSERT(nmap <= subnex);
> > -
> > -		for (i = 0; i < nmap && bmv->bmv_length &&
> > -				cur_ext < bmv->bmv_count - 1; i++) {
> > -			out[cur_ext].bmv_oflags = 0;
> > -			if (map[i].br_state == XFS_EXT_UNWRITTEN)
> > -				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
> > -			else if (map[i].br_startblock == DELAYSTARTBLOCK)
> > -				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
> > -			out[cur_ext].bmv_offset =
> > -				XFS_FSB_TO_BB(mp, map[i].br_startoff);
> > -			out[cur_ext].bmv_length =
> > -				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
> > -			out[cur_ext].bmv_unused1 = 0;
> > -			out[cur_ext].bmv_unused2 = 0;
> > +	bmv_end = bmv->bmv_offset + bmv->bmv_length;
> >  
> > -			/*
> > -			 * delayed allocation extents that start beyond EOF can
> > -			 * occur due to speculative EOF allocation when the
> > -			 * delalloc extent is larger than the largest freespace
> > -			 * extent at conversion time. These extents cannot be
> > -			 * converted by data writeback, so can exist here even
> > -			 * if we are not supposed to be finding delalloc
> > -			 * extents.
> > -			 */
> > -			if (map[i].br_startblock == DELAYSTARTBLOCK &&
> > -			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
> > -				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
> > -
> > -                        if (map[i].br_startblock == HOLESTARTBLOCK &&
> > -			    whichfork == XFS_ATTR_FORK) {
> > -				/* came to the end of attribute fork */
> > -				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
> > -				goto out_free_map;
> > -			}
> > +	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
> > +	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
> >  
> > -			/* Is this a shared block? */
> > -			error = xfs_getbmap_adjust_shared(ip, whichfork,
> > -					&map[i], &out[cur_ext], &inject_map);
> > -			if (error)
> > -				goto out_free_map;
> > +	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > +		error = xfs_iread_extents(NULL, ip, whichfork);
> > +		if (error)
> > +			goto out_unlock_ilock;
> > +	}
> >  
> > -			if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
> > -					&out[cur_ext], prealloced, bmvend,
> > -					map[i].br_startblock,
> > -					inject_map.br_startblock != NULLFSBLOCK))
> > -				goto out_free_map;
> > +	if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
> > +		/*
> > +		 * Report a whole-file hole if the delalloc flag is set to
> > +		 * stay compatible with the old implementation.
> > +		 */
> > +		if (iflags & BMV_IF_DELALLOC)
> > +			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
> > +					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> > +		goto out_unlock_ilock;
> > +	}
> >  
> > -			bmv->bmv_offset =
> > -				out[cur_ext].bmv_offset +
> > -				out[cur_ext].bmv_length;
> > -			bmv->bmv_length =
> > -				max_t(int64_t, 0, bmvend - bmv->bmv_offset);
> > +	while (!xfs_getbmap_full(bmv)) {
> > +		xfs_trim_extent(&got, first_bno, len);
> >  
> > -			/*
> > -			 * In case we don't want to return the hole,
> > -			 * don't increase cur_ext so that we can reuse
> > -			 * it in the next loop.
> > -			 */
> > -			if ((iflags & BMV_IF_NO_HOLES) &&
> > -			    map[i].br_startblock == HOLESTARTBLOCK) {
> > -				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
> > -				continue;
> > -			}
> > +		/*
> > +		 * Report an entry for a hole if this extent doesn't directly
> > +		 * follow the previous one.
> > +		 */
> > +		if (got.br_startoff > bno) {
> > +			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
> > +					got.br_startoff);
> > +			if (xfs_getbmap_full(bmv))
> > +				break;
> > +		}
> >  
> > -			/*
> > -			 * In order to report shared extents accurately,
> > -			 * we report each distinct shared/unshared part
> > -			 * of a single bmbt record using multiple bmap
> > -			 * extents.  To make that happen, we iterate the
> > -			 * same map array item multiple times, each
> > -			 * time trimming out the subextent that we just
> > -			 * reported.
> > -			 *
> > -			 * Because of this, we must check the out array
> > -			 * index (cur_ext) directly against bmv_count-1
> > -			 * to avoid overflows.
> > -			 */
> > -			if (inject_map.br_startblock != NULLFSBLOCK) {
> > -				map[i] = inject_map;
> > -				i--;
> > +		/*
> > +		 * In order to report shared extents accurately, we report each
> > +		 * distinct shared / unshared part of a single bmbt record with
> > +		 * an individual getbmapx record.
> > +		 */
> > +		bno = got.br_startoff + got.br_blockcount;
> > +		rec = got;
> > +		do {
> > +			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
> > +					&rec);
> > +			if (error || xfs_getbmap_full(bmv))
> > +				goto out_unlock_ilock;
> > +		} while (xfs_getbmap_next_rec(&rec, bno));
> > +
> > +		if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> > +			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> > +
> > +			out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
> > +
> > +			if (whichfork != XFS_ATTR_FORK && bno < end &&
> > +			    !xfs_getbmap_full(bmv)) {
> > +				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
> > +						bno, end);
> >  			}
> > -			bmv->bmv_entries++;
> > -			cur_ext++;
> > +			break;
> >  		}
> > -	} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
> >  
> > - out_free_map:
> > -	kmem_free(map);
> > - out_unlock_ilock:
> > +		if (bno >= first_bno + len)
> > +			break;
> > +	}
> > +
> > +out_unlock_ilock:
> >  	xfs_iunlock(ip, lock);
> > - out_unlock_iolock:
> > +out_unlock_iolock:
> >  	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> >  
> > -	for (i = 0; i < cur_ext; i++) {
> > +	for (i = 0; i < bmv->bmv_entries; i++) {
> >  		/* format results & advance arg */
> >  		error = formatter(&arg, &out[i]);
> >  		if (error)
> > -- 
> > 2.14.1
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] xfs: simplify the xfs_getbmap interface
  2017-09-18 15:26 ` [PATCH 2/2] xfs: simplify the xfs_getbmap interface Christoph Hellwig
@ 2017-09-20 23:08   ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2017-09-20 23:08 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Mon, Sep 18, 2017 at 08:26:30AM -0700, Christoph Hellwig wrote:
> Instead of passing in a formatter callback allocate the bmap buffer
> in the caller and process the entries there.  Additionally replace
> the in-kernel buffer with a new much smaller structure, and unify
> the implementation of the different ioctls in a single function.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Brian Foster <bfoster@redhat.com>

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

> ---
>  fs/xfs/xfs_bmap_util.c |  38 ++++-----------
>  fs/xfs/xfs_bmap_util.h |  10 ++--
>  fs/xfs/xfs_ioctl.c     | 122 ++++++++++++++++++++++++-------------------------
>  3 files changed, 75 insertions(+), 95 deletions(-)
> 
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index a87d05978c92..b540ac65b8b3 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -407,11 +407,11 @@ static int
>  xfs_getbmap_report_one(
>  	struct xfs_inode	*ip,
>  	struct getbmapx		*bmv,
> -	struct getbmapx		*out,
> +	struct kgetbmap		*out,
>  	int64_t			bmv_end,
>  	struct xfs_bmbt_irec	*got)
>  {
> -	struct getbmapx		*p = out + bmv->bmv_entries;
> +	struct kgetbmap		*p = out + bmv->bmv_entries;
>  	bool			shared = false, trimmed = false;
>  	int			error;
>  
> @@ -458,12 +458,12 @@ static void
>  xfs_getbmap_report_hole(
>  	struct xfs_inode	*ip,
>  	struct getbmapx		*bmv,
> -	struct getbmapx		*out,
> +	struct kgetbmap		*out,
>  	int64_t			bmv_end,
>  	xfs_fileoff_t		bno,
>  	xfs_fileoff_t		end)
>  {
> -	struct getbmapx		*p = out + bmv->bmv_entries;
> +	struct kgetbmap		*p = out + bmv->bmv_entries;
>  
>  	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
>  		return;
> @@ -511,47 +511,36 @@ xfs_getbmap_next_rec(
>   */
>  int						/* error code */
>  xfs_getbmap(
> -	xfs_inode_t		*ip,
> +	struct xfs_inode	*ip,
>  	struct getbmapx		*bmv,		/* user bmap structure */
> -	xfs_bmap_format_t	formatter,	/* format to user */
> -	void			*arg)		/* formatter arg */
> +	struct kgetbmap		*out)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
>  	int			iflags = bmv->bmv_iflags;
> -	int			whichfork, lock, i, error = 0;
> +	int			whichfork, lock, error = 0;
>  	int64_t			bmv_end, max_len;
>  	xfs_fileoff_t		bno, first_bno;
>  	struct xfs_ifork	*ifp;
> -	struct getbmapx		*out;
>  	struct xfs_bmbt_irec	got, rec;
>  	xfs_filblks_t		len;
>  	xfs_extnum_t		idx;
>  
> +	if (bmv->bmv_iflags & ~BMV_IF_VALID)
> +		return -EINVAL;
>  #ifndef DEBUG
>  	/* Only allow CoW fork queries if we're debugging. */
>  	if (iflags & BMV_IF_COWFORK)
>  		return -EINVAL;
>  #endif
> -
>  	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
>  		return -EINVAL;
>  
> -	if (bmv->bmv_count <= 1)
> -		return -EINVAL;
> -	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> -		return -ENOMEM;
> -
>  	if (bmv->bmv_length < -1)
>  		return -EINVAL;
> -
>  	bmv->bmv_entries = 0;
>  	if (bmv->bmv_length == 0)
>  		return 0;
>  
> -	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> -	if (!out)
> -		return -ENOMEM;
> -
>  	if (iflags & BMV_IF_ATTRFORK)
>  		whichfork = XFS_ATTR_FORK;
>  	else if (iflags & BMV_IF_COWFORK)
> @@ -698,15 +687,6 @@ xfs_getbmap(
>  	xfs_iunlock(ip, lock);
>  out_unlock_iolock:
>  	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> -
> -	for (i = 0; i < bmv->bmv_entries; i++) {
> -		/* format results & advance arg */
> -		error = formatter(&arg, &out[i]);
> -		if (error)
> -			break;
> -	}
> -
> -	kmem_free(out);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
> index 0eaa81dc49be..6cfe747cb142 100644
> --- a/fs/xfs/xfs_bmap_util.h
> +++ b/fs/xfs/xfs_bmap_util.h
> @@ -34,10 +34,14 @@ int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
>  int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
>  		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
>  
> -/* bmap to userspace formatter - copy to user & advance pointer */
> -typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *);
> +struct kgetbmap {
> +	__s64		bmv_offset;	/* file offset of segment in blocks */
> +	__s64		bmv_block;	/* starting block (64-bit daddr_t)  */
> +	__s64		bmv_length;	/* length of segment, blocks	    */
> +	__s32		bmv_oflags;	/* output flags */
> +};
>  int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
> -		xfs_bmap_format_t formatter, void *arg);
> +		struct kgetbmap *out);
>  
>  /* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
>  int	xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 5049e8ab6e30..8e1ab254aa19 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1539,17 +1539,26 @@ xfs_ioc_setxflags(
>  	return error;
>  }
>  
> -STATIC int
> -xfs_getbmap_format(void **ap, struct getbmapx *bmv)
> +static bool
> +xfs_getbmap_format(
> +	struct kgetbmap		*p,
> +	struct getbmapx __user	*u,
> +	size_t			recsize)
>  {
> -	struct getbmap __user	*base = (struct getbmap __user *)*ap;
> -
> -	/* copy only getbmap portion (not getbmapx) */
> -	if (copy_to_user(base, bmv, sizeof(struct getbmap)))
> -		return -EFAULT;
> -
> -	*ap += sizeof(struct getbmap);
> -	return 0;
> +	if (put_user(p->bmv_offset, &u->bmv_offset) ||
> +	    put_user(p->bmv_block, &u->bmv_block) ||
> +	    put_user(p->bmv_length, &u->bmv_length) ||
> +	    put_user(0, &u->bmv_count) ||
> +	    put_user(0, &u->bmv_entries))
> +		return false;
> +	if (recsize < sizeof(struct getbmapx))
> +		return true;
> +	if (put_user(0, &u->bmv_iflags) ||
> +	    put_user(p->bmv_oflags, &u->bmv_oflags) ||
> +	    put_user(0, &u->bmv_unused1) ||
> +	    put_user(0, &u->bmv_unused2))
> +		return false;
> +	return true;
>  }
>  
>  STATIC int
> @@ -1559,68 +1568,57 @@ xfs_ioc_getbmap(
>  	void			__user *arg)
>  {
>  	struct getbmapx		bmx = { 0 };
> -	int			error;
> +	struct kgetbmap		*buf;
> +	size_t			recsize;
> +	int			error, i;
>  
> -	/* struct getbmap is a strict subset of struct getbmapx. */
> -	if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
> -		return -EFAULT;
> -
> -	if (bmx.bmv_count < 2)
> +	switch (cmd) {
> +	case XFS_IOC_GETBMAPA:
> +		bmx.bmv_iflags = BMV_IF_ATTRFORK;
> +		/*FALLTHRU*/
> +	case XFS_IOC_GETBMAP:
> +		if (file->f_mode & FMODE_NOCMTIME)
> +			bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
> +		/* struct getbmap is a strict subset of struct getbmapx. */
> +		recsize = sizeof(struct getbmap);
> +		break;
> +	case XFS_IOC_GETBMAPX:
> +		recsize = sizeof(struct getbmapx);
> +		break;
> +	default:
>  		return -EINVAL;
> +	}
>  
> -	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
> -	if (file->f_mode & FMODE_NOCMTIME)
> -		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
> -
> -	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
> -			    (__force struct getbmap *)arg+1);
> -	if (error)
> -		return error;
> -
> -	/* copy back header - only size of getbmap */
> -	if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
> -		return -EFAULT;
> -	return 0;
> -}
> -
> -STATIC int
> -xfs_getbmapx_format(void **ap, struct getbmapx *bmv)
> -{
> -	struct getbmapx __user	*base = (struct getbmapx __user *)*ap;
> -
> -	if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
> -		return -EFAULT;
> -
> -	*ap += sizeof(struct getbmapx);
> -	return 0;
> -}
> -
> -STATIC int
> -xfs_ioc_getbmapx(
> -	struct xfs_inode	*ip,
> -	void			__user *arg)
> -{
> -	struct getbmapx		bmx;
> -	int			error;
> -
> -	if (copy_from_user(&bmx, arg, sizeof(bmx)))
> +	if (copy_from_user(&bmx, arg, recsize))
>  		return -EFAULT;
>  
>  	if (bmx.bmv_count < 2)
>  		return -EINVAL;
> +	if (bmx.bmv_count > ULONG_MAX / recsize)
> +		return -ENOMEM;
>  
> -	if (bmx.bmv_iflags & (~BMV_IF_VALID))
> -		return -EINVAL;
> +	buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0);
> +	if (!buf)
> +		return -ENOMEM;
>  
> -	error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
> -			    (__force struct getbmapx *)arg+1);
> +	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, buf);
>  	if (error)
> -		return error;
> +		goto out_free_buf;
>  
> -	/* copy back header */
> -	if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
> -		return -EFAULT;
> +	error = -EFAULT;
> +	if (copy_to_user(arg, &bmx, recsize))
> +		goto out_free_buf;
> +	arg += recsize;
> +
> +	for (i = 0; i < bmx.bmv_entries; i++) {
> +		if (!xfs_getbmap_format(buf + i, arg, recsize))
> +			goto out_free_buf;
> +		arg += recsize;
> +	}
>  
> +	error = 0;
> +out_free_buf:
> +	kmem_free(buf);
>  	return 0;
>  }
>  
> @@ -1877,10 +1875,8 @@ xfs_file_ioctl(
>  
>  	case XFS_IOC_GETBMAP:
>  	case XFS_IOC_GETBMAPA:
> -		return xfs_ioc_getbmap(filp, cmd, arg);
> -
>  	case XFS_IOC_GETBMAPX:
> -		return xfs_ioc_getbmapx(ip, arg);
> +		return xfs_ioc_getbmap(filp, cmd, arg);
>  
>  	case FS_IOC_GETFSMAP:
>  		return xfs_ioc_getfsmap(ip, arg);
> -- 
> 2.14.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-20 23:00   ` Darrick J. Wong
  2017-09-20 23:08     ` Darrick J. Wong
@ 2017-09-21 13:35     ` Christoph Hellwig
  2017-09-21 15:40       ` Darrick J. Wong
  1 sibling, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-21 13:35 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Wed, Sep 20, 2017 at 04:00:36PM -0700, Darrick J. Wong wrote:
> On Mon, Sep 18, 2017 at 08:26:29AM -0700, Christoph Hellwig wrote:
> > Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> > fixes up various bits that are eventually reported to userspace.
> > 
> > This patch instead rewrites it to use xfs_iext_lookup_extent and
> > xfs_iext_get_extent to iteratively process the extent map.  This not
> > only avoids the need to allocate a map for the returned xfs_bmbt_irec
> > structures but also greatly simplified the code.
> > 
> > There are two intentional behavior changes compared to the old code:
> > 
> >  - the current code reports unwritten extents that don't directly border
> >    a written one as unwritten even when not passing the BMV_IF_PREALLOC
> >    option, contrary to the documentation.  The new code requires the
> >    BMV_IF_PREALLOC flag to report the unwrittent extent bit.
> >  - The new code does never merges consecutive extents, unlike the old
> >    code that sometimes does it based on the boundaries of the
> >    xfs_bmapi_read calls.  Note that the extent merging behavior was
> >    entirely undocumented.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
> >  1 file changed, 208 insertions(+), 317 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> > index cd9a5400ba4f..a87d05978c92 100644
> > --- a/fs/xfs/xfs_bmap_util.c
> > +++ b/fs/xfs/xfs_bmap_util.c
> > @@ -403,125 +403,103 @@ xfs_bmap_count_blocks(
> >  	return 0;
> >  }
> >  
> > -/*
> > - * returns 1 for success, 0 if we failed to map the extent.
> > - */
> > -STATIC int
> > -xfs_getbmapx_fix_eof_hole(
> > -	xfs_inode_t		*ip,		/* xfs incore inode pointer */
> > -	int			whichfork,
> > -	struct getbmapx		*out,		/* output structure */
> > -	int			prealloced,	/* this is a file with
> > -						 * preallocated data space */
> > -	int64_t			end,		/* last block requested */
> > -	xfs_fsblock_t		startblock,
> > -	bool			moretocome)
> > +static int
> > +xfs_getbmap_report_one(
> > +	struct xfs_inode	*ip,
> > +	struct getbmapx		*bmv,
> > +	struct getbmapx		*out,
> > +	int64_t			bmv_end,
> > +	struct xfs_bmbt_irec	*got)
> >  {
> > -	int64_t			fixlen;
> > -	xfs_mount_t		*mp;		/* file system mount point */
> > -	xfs_ifork_t		*ifp;		/* inode fork pointer */
> > -	xfs_extnum_t		lastx;		/* last extent pointer */
> > -	xfs_fileoff_t		fileblock;
> > -
> > -	if (startblock == HOLESTARTBLOCK) {
> > -		mp = ip->i_mount;
> > -		out->bmv_block = -1;
> > -		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> > -		fixlen -= out->bmv_offset;
> > -		if (prealloced && out->bmv_offset + out->bmv_length == end) {
> > -			/* Came to hole at EOF. Trim it. */
> > -			if (fixlen <= 0)
> > -				return 0;
> > -			out->bmv_length = fixlen;
> > -		}
> > +	struct getbmapx		*p = out + bmv->bmv_entries;
> > +	bool			shared = false, trimmed = false;
> > +	int			error;
> > +
> > +	error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
> > +	if (error)
> > +		return error;
> > +
> > +	if (isnullstartblock(got->br_startblock) ||
> > +	    got->br_startblock == DELAYSTARTBLOCK) {
> > +		/*
> > +		 * Delalloc extents that start beyond EOF can occur due to
> > +		 * speculative EOF allocation when the delalloc extent is larger
> > +		 * than the largest freespace extent at conversion time.  These
> > +		 * extents cannot be converted by data writeback, so can exist
> > +		 * here even if we are not supposed to be finding delalloc
> > +		 * extents.
> > +		 */
> > +		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
> > +			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
> > +
> > +		p->bmv_oflags |= BMV_OF_DELALLOC;
> > +		p->bmv_block = -2;
> 
> Could you please turn the special bmv_block values (-2 for delayed
> allocation, -1 for hole) into defined constants in xfs_fs.h?
> 
> I'm particularly cranky about bmv_block == -1 since there isn't even a
> BMV_OF_ flag for holes.

I can prepare a patch for it, but I don't want to throw random cleanups
into this series which I need as a preparation for the extent list
rework.

> > +	if (got->br_state == XFS_EXT_UNWRITTEN &&
> > +	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
> > +		p->bmv_oflags |= BMV_OF_PREALLOC;
> 
> Am I the only one who thought (from the xfs_bmap manpage) that you're
> supposed to BMV_IF_PREALLOC if you want the output to contain prealloc
> extents, and omit the flag if you don't want them?
> 
> Versus what the kernel actually does, which seems to be to merge extents
> together if you don't pass the flag:
> 
> $ xfs_io -c 'bmap -vvvv' moo
> moo:
>  EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL
>    0: [0..39]:         335288488..335288527  7 (736424..736463)    40
> 
> $ xfs_io -c 'bmap -vvvv -p' moo
> moo:
>  EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL FLAGS
>    0: [0..7]:          335288488..335288495  7 (736424..736431)     8 000000
>    1: [8..39]:         335288496..335288527  7 (736432..736463)    32 010000
> 
> Eh.  I guess the old code would report prealloc extents, it just doesn't
> flag them, so this is ok.

The old code even flags them if there is no normal extent to merge them
with, but I consider that a bug I didn't want to follow in the new
code.  E.g. try creating a sparse file and just preallocate an extent
in it, and it will be marked as preallocated.

I never understood the point of the BMV_IF_PREALLOC flag - why would
we ever want to not report preallocated extents?  We also set
the new BMV_OF_SHARED unconditionally for example.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-20 23:08     ` Darrick J. Wong
@ 2017-09-21 13:36       ` Christoph Hellwig
  2017-09-21 15:35         ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-21 13:36 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Wed, Sep 20, 2017 at 04:08:24PM -0700, Darrick J. Wong wrote:
> > I'm still wondering why we allocate a potentially large getbmapx buffer,
> > fill it out, and only then format the results to userspace?  I think
> > getbmap (the ioctl) is now the only user of these functions, so can't
> > we just call the formatter directly from _getbmap_report_one and
> > _getbmap_report_hole, like what getfsmap does?
> > 
> > (I also feel like I've asked this before, so apologies if I'm merely
> > forgetting the answer.)
> 
> Oh right, it's because we have the inode locked, and copying things to
> userspace could incur a page fault, which we can't risk with the inode
> locked because some malicious person could create a fragmented file with
> a bmap request header at the start of the file, mmap the file, and call
> bmap on the fragmented file with the pointer being the mmap region.

Yes.

Can I get a Reviewed-by: tag now? :)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-21 13:36       ` Christoph Hellwig
@ 2017-09-21 15:35         ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2017-09-21 15:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Sep 21, 2017 at 03:36:10PM +0200, Christoph Hellwig wrote:
> On Wed, Sep 20, 2017 at 04:08:24PM -0700, Darrick J. Wong wrote:
> > > I'm still wondering why we allocate a potentially large getbmapx buffer,
> > > fill it out, and only then format the results to userspace?  I think
> > > getbmap (the ioctl) is now the only user of these functions, so can't
> > > we just call the formatter directly from _getbmap_report_one and
> > > _getbmap_report_hole, like what getfsmap does?
> > > 
> > > (I also feel like I've asked this before, so apologies if I'm merely
> > > forgetting the answer.)
> > 
> > Oh right, it's because we have the inode locked, and copying things to
> > userspace could incur a page fault, which we can't risk with the inode
> > locked because some malicious person could create a fragmented file with
> > a bmap request header at the start of the file, mmap the file, and call
> > bmap on the fragmented file with the pointer being the mmap region.
> 
> Yes.
> 
> Can I get a Reviewed-by: tag now? :)

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-21 13:35     ` Christoph Hellwig
@ 2017-09-21 15:40       ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2017-09-21 15:40 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Sep 21, 2017 at 03:35:06PM +0200, Christoph Hellwig wrote:
> On Wed, Sep 20, 2017 at 04:00:36PM -0700, Darrick J. Wong wrote:
> > On Mon, Sep 18, 2017 at 08:26:29AM -0700, Christoph Hellwig wrote:
> > > Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> > > fixes up various bits that are eventually reported to userspace.
> > > 
> > > This patch instead rewrites it to use xfs_iext_lookup_extent and
> > > xfs_iext_get_extent to iteratively process the extent map.  This not
> > > only avoids the need to allocate a map for the returned xfs_bmbt_irec
> > > structures but also greatly simplified the code.
> > > 
> > > There are two intentional behavior changes compared to the old code:
> > > 
> > >  - the current code reports unwritten extents that don't directly border
> > >    a written one as unwritten even when not passing the BMV_IF_PREALLOC
> > >    option, contrary to the documentation.  The new code requires the
> > >    BMV_IF_PREALLOC flag to report the unwrittent extent bit.
> > >  - The new code does never merges consecutive extents, unlike the old
> > >    code that sometimes does it based on the boundaries of the
> > >    xfs_bmapi_read calls.  Note that the extent merging behavior was
> > >    entirely undocumented.
> > > 
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > ---
> > >  fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
> > >  1 file changed, 208 insertions(+), 317 deletions(-)
> > > 
> > > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> > > index cd9a5400ba4f..a87d05978c92 100644
> > > --- a/fs/xfs/xfs_bmap_util.c
> > > +++ b/fs/xfs/xfs_bmap_util.c
> > > @@ -403,125 +403,103 @@ xfs_bmap_count_blocks(
> > >  	return 0;
> > >  }
> > >  
> > > -/*
> > > - * returns 1 for success, 0 if we failed to map the extent.
> > > - */
> > > -STATIC int
> > > -xfs_getbmapx_fix_eof_hole(
> > > -	xfs_inode_t		*ip,		/* xfs incore inode pointer */
> > > -	int			whichfork,
> > > -	struct getbmapx		*out,		/* output structure */
> > > -	int			prealloced,	/* this is a file with
> > > -						 * preallocated data space */
> > > -	int64_t			end,		/* last block requested */
> > > -	xfs_fsblock_t		startblock,
> > > -	bool			moretocome)
> > > +static int
> > > +xfs_getbmap_report_one(
> > > +	struct xfs_inode	*ip,
> > > +	struct getbmapx		*bmv,
> > > +	struct getbmapx		*out,
> > > +	int64_t			bmv_end,
> > > +	struct xfs_bmbt_irec	*got)
> > >  {
> > > -	int64_t			fixlen;
> > > -	xfs_mount_t		*mp;		/* file system mount point */
> > > -	xfs_ifork_t		*ifp;		/* inode fork pointer */
> > > -	xfs_extnum_t		lastx;		/* last extent pointer */
> > > -	xfs_fileoff_t		fileblock;
> > > -
> > > -	if (startblock == HOLESTARTBLOCK) {
> > > -		mp = ip->i_mount;
> > > -		out->bmv_block = -1;
> > > -		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> > > -		fixlen -= out->bmv_offset;
> > > -		if (prealloced && out->bmv_offset + out->bmv_length == end) {
> > > -			/* Came to hole at EOF. Trim it. */
> > > -			if (fixlen <= 0)
> > > -				return 0;
> > > -			out->bmv_length = fixlen;
> > > -		}
> > > +	struct getbmapx		*p = out + bmv->bmv_entries;
> > > +	bool			shared = false, trimmed = false;
> > > +	int			error;
> > > +
> > > +	error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
> > > +	if (error)
> > > +		return error;
> > > +
> > > +	if (isnullstartblock(got->br_startblock) ||
> > > +	    got->br_startblock == DELAYSTARTBLOCK) {
> > > +		/*
> > > +		 * Delalloc extents that start beyond EOF can occur due to
> > > +		 * speculative EOF allocation when the delalloc extent is larger
> > > +		 * than the largest freespace extent at conversion time.  These
> > > +		 * extents cannot be converted by data writeback, so can exist
> > > +		 * here even if we are not supposed to be finding delalloc
> > > +		 * extents.
> > > +		 */
> > > +		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
> > > +			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
> > > +
> > > +		p->bmv_oflags |= BMV_OF_DELALLOC;
> > > +		p->bmv_block = -2;
> > 
> > Could you please turn the special bmv_block values (-2 for delayed
> > allocation, -1 for hole) into defined constants in xfs_fs.h?
> > 
> > I'm particularly cranky about bmv_block == -1 since there isn't even a
> > BMV_OF_ flag for holes.
> 
> I can prepare a patch for it, but I don't want to throw random cleanups
> into this series which I need as a preparation for the extent list
> rework.

Yes, please! :)

> > > +	if (got->br_state == XFS_EXT_UNWRITTEN &&
> > > +	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
> > > +		p->bmv_oflags |= BMV_OF_PREALLOC;
> > 
> > Am I the only one who thought (from the xfs_bmap manpage) that you're
> > supposed to BMV_IF_PREALLOC if you want the output to contain prealloc
> > extents, and omit the flag if you don't want them?
> > 
> > Versus what the kernel actually does, which seems to be to merge extents
> > together if you don't pass the flag:
> > 
> > $ xfs_io -c 'bmap -vvvv' moo
> > moo:
> >  EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL
> >    0: [0..39]:         335288488..335288527  7 (736424..736463)    40
> > 
> > $ xfs_io -c 'bmap -vvvv -p' moo
> > moo:
> >  EXT: FILE-OFFSET      BLOCK-RANGE          AG AG-OFFSET        TOTAL FLAGS
> >    0: [0..7]:          335288488..335288495  7 (736424..736431)     8 000000
> >    1: [8..39]:         335288496..335288527  7 (736432..736463)    32 010000
> > 
> > Eh.  I guess the old code would report prealloc extents, it just doesn't
> > flag them, so this is ok.
> 
> The old code even flags them if there is no normal extent to merge them
> with, but I consider that a bug I didn't want to follow in the new
> code.  E.g. try creating a sparse file and just preallocate an extent
> in it, and it will be marked as preallocated.
> 
> I never understood the point of the BMV_IF_PREALLOC flag - why would
> we ever want to not report preallocated extents?  We also set
> the new BMV_OF_SHARED unconditionally for example.

I don't really understand the bmap behavior either, but I do get a whiff
of 'historical reasons' :)

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-11 15:49 ` Brian Foster
@ 2017-09-17 21:44   ` Christoph Hellwig
  0 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-17 21:44 UTC (permalink / raw)
  To: Brian Foster; +Cc: Christoph Hellwig, linux-xfs

On Mon, Sep 11, 2017 at 11:49:25AM -0400, Brian Foster wrote:
> > +		if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> > +			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> > +
> > +			out[bmv->bmv_entries].bmv_oflags |= BMV_OF_LAST;
> > +
> 
> I'm a little confused about the above bit. Isn't ->bmv_entries already
> incremented past the last reported extent?

Yes, it needs to be bmv->bmv_entries - 1.

> Further, if there is a hole
> to be reported, we potentially do that just below (which means that
> ->bmv_entries may or may not refer to the last reported segment here)..?

We can't reach this code when the previous extent was a hole.  For the
whole file hole case we jump straight to out_unlock_ilock after
filling the hole, for the inbetween extents hole case we break out
of the loop when running out of space before filling the second extent,
and the end of file hole case is just below setting BMV_OF_LAST.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
  2017-09-03 15:51 [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
@ 2017-09-11 15:49 ` Brian Foster
  2017-09-17 21:44   ` Christoph Hellwig
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2017-09-11 15:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Sun, Sep 03, 2017 at 05:51:39PM +0200, Christoph Hellwig wrote:
> Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> fixes up various bits that are eventually reported to userspace.
> 
> This patch instead rewrites it to use xfs_iext_lookup_extent and
> xfs_iext_get_extent to iteratively process the extent map.  This not
> only avoids the need to allocate a map for the returned xfs_bmbt_irec
> structures but also greatly simplified the code.
> 
> There are two intentional behavior changes compared to the old code:
> 
>  - the current code reports unwritten extents that don't directly border
>    a written one as unwritten even when not passing the BMV_IF_PREALLOC
>    option, contrary to the documentation.  The new code requires the
>    BMV_IF_PREALLOC flag to report the unwrittent extent bit.
>  - The new code does never merges consecutive extents, unlike the old
>    code that sometimes does it based on the boundaries of the
>    xfs_bmapi_read calls.  Note that the extent merging behavior was
>    entirely undocumented.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
>  1 file changed, 208 insertions(+), 317 deletions(-)
> 
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index cd9a5400ba4f..a11f4c300643 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
...
> @@ -668,147 +599,107 @@ xfs_getbmap(
...
> +		/*
> +		 * In order to report shared extents accurately, we report each
> +		 * distinct shared / unshared part of a single bmbt record with
> +		 * an individual getbmapx record.
> +		 */
> +		bno = got.br_startoff + got.br_blockcount;
> +		rec = got;
> +		do {
> +			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
> +					&rec);
> +			if (error || xfs_getbmap_full(bmv))
> +				goto out_unlock_ilock;
> +		} while (xfs_getbmap_next_rec(&rec, bno));
> +
> +		if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> +			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> +
> +			out[bmv->bmv_entries].bmv_oflags |= BMV_OF_LAST;
> +

I'm a little confused about the above bit. Isn't ->bmv_entries already
incremented past the last reported extent? Further, if there is a hole
to be reported, we potentially do that just below (which means that
->bmv_entries may or may not refer to the last reported segment here)..?

Otherwise the rest of the patch looks good to me.

Brian

> +			if (whichfork != XFS_ATTR_FORK && bno < end &&
> +			    !xfs_getbmap_full(bmv)) {
> +				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
> +						bno, end);
>  			}
> -			bmv->bmv_entries++;
> -			cur_ext++;
> +			break;
>  		}
> -	} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
>  
> - out_free_map:
> -	kmem_free(map);
> - out_unlock_ilock:
> +		if (bno >= first_bno + len)
> +			break;
> +	}
> +
> +out_unlock_ilock:
>  	xfs_iunlock(ip, lock);
> - out_unlock_iolock:
> +out_unlock_iolock:
>  	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
>  
> -	for (i = 0; i < cur_ext; i++) {
> +	for (i = 0; i < bmv->bmv_entries; i++) {
>  		/* format results & advance arg */
>  		error = formatter(&arg, &out[i]);
>  		if (error)
> -- 
> 2.11.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers
@ 2017-09-03 15:51 Christoph Hellwig
  2017-09-11 15:49 ` Brian Foster
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2017-09-03 15:51 UTC (permalink / raw)
  To: linux-xfs

Currently getbmap uses xfs_bmapi_read to query the extent map, and then
fixes up various bits that are eventually reported to userspace.

This patch instead rewrites it to use xfs_iext_lookup_extent and
xfs_iext_get_extent to iteratively process the extent map.  This not
only avoids the need to allocate a map for the returned xfs_bmbt_irec
structures but also greatly simplified the code.

There are two intentional behavior changes compared to the old code:

 - the current code reports unwritten extents that don't directly border
   a written one as unwritten even when not passing the BMV_IF_PREALLOC
   option, contrary to the documentation.  The new code requires the
   BMV_IF_PREALLOC flag to report the unwrittent extent bit.
 - The new code does never merges consecutive extents, unlike the old
   code that sometimes does it based on the boundaries of the
   xfs_bmapi_read calls.  Note that the extent merging behavior was
   entirely undocumented.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_bmap_util.c | 525 ++++++++++++++++++++-----------------------------
 1 file changed, 208 insertions(+), 317 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd9a5400ba4f..a11f4c300643 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -403,125 +403,103 @@ xfs_bmap_count_blocks(
 	return 0;
 }
 
-/*
- * returns 1 for success, 0 if we failed to map the extent.
- */
-STATIC int
-xfs_getbmapx_fix_eof_hole(
-	xfs_inode_t		*ip,		/* xfs incore inode pointer */
-	int			whichfork,
-	struct getbmapx		*out,		/* output structure */
-	int			prealloced,	/* this is a file with
-						 * preallocated data space */
-	int64_t			end,		/* last block requested */
-	xfs_fsblock_t		startblock,
-	bool			moretocome)
+static int
+xfs_getbmap_report_one(
+	struct xfs_inode	*ip,
+	struct getbmapx		*bmv,
+	struct getbmapx		*out,
+	int64_t			bmv_end,
+	struct xfs_bmbt_irec	*got)
 {
-	int64_t			fixlen;
-	xfs_mount_t		*mp;		/* file system mount point */
-	xfs_ifork_t		*ifp;		/* inode fork pointer */
-	xfs_extnum_t		lastx;		/* last extent pointer */
-	xfs_fileoff_t		fileblock;
-
-	if (startblock == HOLESTARTBLOCK) {
-		mp = ip->i_mount;
-		out->bmv_block = -1;
-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
-		fixlen -= out->bmv_offset;
-		if (prealloced && out->bmv_offset + out->bmv_length == end) {
-			/* Came to hole at EOF. Trim it. */
-			if (fixlen <= 0)
-				return 0;
-			out->bmv_length = fixlen;
-		}
+	struct getbmapx		*p = out + bmv->bmv_entries;
+	bool			shared = false, trimmed = false;
+	int			error;
+
+	error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
+	if (error)
+		return error;
+
+	if (isnullstartblock(got->br_startblock) ||
+	    got->br_startblock == DELAYSTARTBLOCK) {
+		/*
+		 * Delalloc extents that start beyond EOF can occur due to
+		 * speculative EOF allocation when the delalloc extent is larger
+		 * than the largest freespace extent at conversion time.  These
+		 * extents cannot be converted by data writeback, so can exist
+		 * here even if we are not supposed to be finding delalloc
+		 * extents.
+		 */
+		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
+			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
+
+		p->bmv_oflags |= BMV_OF_DELALLOC;
+		p->bmv_block = -2;
 	} else {
-		if (startblock == DELAYSTARTBLOCK)
-			out->bmv_block = -2;
-		else
-			out->bmv_block = xfs_fsb_to_db(ip, startblock);
-		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
-		ifp = XFS_IFORK_PTR(ip, whichfork);
-		if (!moretocome &&
-		    xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
-		   (lastx == xfs_iext_count(ifp) - 1))
-			out->bmv_oflags |= BMV_OF_LAST;
+		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
 	}
 
-	return 1;
+	if (got->br_state == XFS_EXT_UNWRITTEN &&
+	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
+		p->bmv_oflags |= BMV_OF_PREALLOC;
+
+	if (shared)
+		p->bmv_oflags |= BMV_OF_SHARED;
+
+	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
+	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
+
+	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
+	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
+	bmv->bmv_entries++;
+	return 0;
 }
 
-/* Adjust the reported bmap around shared/unshared extent transitions. */
-STATIC int
-xfs_getbmap_adjust_shared(
-	struct xfs_inode		*ip,
-	int				whichfork,
-	struct xfs_bmbt_irec		*map,
-	struct getbmapx			*out,
-	struct xfs_bmbt_irec		*next_map)
+static void
+xfs_getbmap_report_hole(
+	struct xfs_inode	*ip,
+	struct getbmapx		*bmv,
+	struct getbmapx		*out,
+	int64_t			bmv_end,
+	xfs_fileoff_t		bno,
+	xfs_fileoff_t		end)
 {
-	struct xfs_mount		*mp = ip->i_mount;
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	xfs_agblock_t			ebno;
-	xfs_extlen_t			elen;
-	xfs_extlen_t			nlen;
-	int				error;
+	struct getbmapx		*p = out + bmv->bmv_entries;
 
-	next_map->br_startblock = NULLFSBLOCK;
-	next_map->br_startoff = NULLFILEOFF;
-	next_map->br_blockcount = 0;
+	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
+		return;
 
-	/* Only written data blocks can be shared. */
-	if (!xfs_is_reflink_inode(ip) ||
-	    whichfork != XFS_DATA_FORK ||
-	    !xfs_bmap_is_real_extent(map))
-		return 0;
+	p->bmv_block = -1;
+	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
+	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
 
-	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
-	agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
-	error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
-			map->br_blockcount, &ebno, &elen, true);
-	if (error)
-		return error;
+	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
+	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
+	bmv->bmv_entries++;
+}
 
-	if (ebno == NULLAGBLOCK) {
-		/* No shared blocks at all. */
-		return 0;
-	} else if (agbno == ebno) {
-		/*
-		 * Shared extent at (agbno, elen).  Shrink the reported
-		 * extent length and prepare to move the start of map[i]
-		 * to agbno+elen, with the aim of (re)formatting the new
-		 * map[i] the next time through the inner loop.
-		 */
-		out->bmv_length = XFS_FSB_TO_BB(mp, elen);
-		out->bmv_oflags |= BMV_OF_SHARED;
-		if (elen != map->br_blockcount) {
-			*next_map = *map;
-			next_map->br_startblock += elen;
-			next_map->br_startoff += elen;
-			next_map->br_blockcount -= elen;
-		}
-		map->br_blockcount -= elen;
-	} else {
-		/*
-		 * There's an unshared extent (agbno, ebno - agbno)
-		 * followed by shared extent at (ebno, elen).  Shrink
-		 * the reported extent length to cover only the unshared
-		 * extent and prepare to move up the start of map[i] to
-		 * ebno, with the aim of (re)formatting the new map[i]
-		 * the next time through the inner loop.
-		 */
-		*next_map = *map;
-		nlen = ebno - agbno;
-		out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
-		next_map->br_startblock += nlen;
-		next_map->br_startoff += nlen;
-		next_map->br_blockcount -= nlen;
-		map->br_blockcount -= nlen;
-	}
+static inline bool
+xfs_getbmap_full(
+	struct getbmapx		*bmv)
+{
+	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
+}
 
-	return 0;
+static bool
+xfs_getbmap_next_rec(
+	struct xfs_bmbt_irec	*rec,
+	xfs_fileoff_t		total_end)
+{
+	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
+
+	if (end == total_end)
+		return false;
+
+	rec->br_startoff += rec->br_blockcount;
+	if (!isnullstartblock(rec->br_startblock) &&
+	    rec->br_startblock != DELAYSTARTBLOCK)
+		rec->br_startblock += rec->br_blockcount;
+	rec->br_blockcount = total_end - end;
+	return true;
 }
 
 /*
@@ -538,119 +516,72 @@ xfs_getbmap(
 	xfs_bmap_format_t	formatter,	/* format to user */
 	void			*arg)		/* formatter arg */
 {
-	int64_t			bmvend;		/* last block requested */
-	int			error = 0;	/* return value */
-	int64_t			fixlen;		/* length for -1 case */
-	int			i;		/* extent number */
-	int			lock;		/* lock state */
-	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
-	xfs_mount_t		*mp;		/* file system mount point */
-	int			nex;		/* # of user extents can do */
-	int			subnex;		/* # of bmapi's can do */
-	int			nmap;		/* number of map entries */
-	struct getbmapx		*out;		/* output structure */
-	int			whichfork;	/* data or attr fork */
-	int			prealloced;	/* this is a file with
-						 * preallocated data space */
-	int			iflags;		/* interface flags */
-	int			bmapi_flags;	/* flags for xfs_bmapi */
-	int			cur_ext = 0;
-	struct xfs_bmbt_irec	inject_map;
-
-	mp = ip->i_mount;
-	iflags = bmv->bmv_iflags;
+	struct xfs_mount	*mp = ip->i_mount;
+	int			iflags = bmv->bmv_iflags;
+	int			whichfork, lock, i, error = 0;
+	int64_t			bmv_end, max_len;
+	xfs_fileoff_t		bno, first_bno;
+	struct xfs_ifork	*ifp;
+	struct getbmapx		*out;
+	struct xfs_bmbt_irec	got, rec;
+	xfs_filblks_t		len;
+	xfs_extnum_t		idx;
 
 #ifndef DEBUG
 	/* Only allow CoW fork queries if we're debugging. */
 	if (iflags & BMV_IF_COWFORK)
 		return -EINVAL;
 #endif
+
 	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
 		return -EINVAL;
 
+	if (bmv->bmv_count <= 1)
+		return -EINVAL;
+	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
+		return -ENOMEM;
+
+	if (bmv->bmv_length < -1)
+		return -EINVAL;
+
+	bmv->bmv_entries = 0;
+	if (bmv->bmv_length == 0)
+		return 0;
+
+	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
+	if (!out)
+		return -ENOMEM;
+
 	if (iflags & BMV_IF_ATTRFORK)
 		whichfork = XFS_ATTR_FORK;
 	else if (iflags & BMV_IF_COWFORK)
 		whichfork = XFS_COW_FORK;
 	else
 		whichfork = XFS_DATA_FORK;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
 
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	switch (whichfork) {
 	case XFS_ATTR_FORK:
-		if (XFS_IFORK_Q(ip)) {
-			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
-			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
-			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
-				return -EINVAL;
-		} else if (unlikely(
-			   ip->i_d.di_aformat != 0 &&
-			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
-			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
-					 ip->i_mount);
-			return -EFSCORRUPTED;
-		}
+		if (!XFS_IFORK_Q(ip))
+			goto out_unlock_iolock;
 
-		prealloced = 0;
-		fixlen = 1LL << 32;
+		max_len = 1LL << 32;
+		lock = xfs_ilock_attr_map_shared(ip);
 		break;
 	case XFS_COW_FORK:
-		if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
-			return -EINVAL;
+		/* No CoW fork? Just return */
+		if (!ifp)
+			goto out_unlock_iolock;
 
-		if (xfs_get_cowextsz_hint(ip)) {
-			prealloced = 1;
-			fixlen = mp->m_super->s_maxbytes;
-		} else {
-			prealloced = 0;
-			fixlen = XFS_ISIZE(ip);
-		}
-		break;
-	default:
-		/* Local format data forks report no extents. */
-		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-			bmv->bmv_entries = 0;
-			return 0;
-		}
-		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
-			return -EINVAL;
+		if (xfs_get_cowextsz_hint(ip))
+			max_len = mp->m_super->s_maxbytes;
+		else
+			max_len = XFS_ISIZE(ip);
 
-		if (xfs_get_extsz_hint(ip) ||
-		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
-			prealloced = 1;
-			fixlen = mp->m_super->s_maxbytes;
-		} else {
-			prealloced = 0;
-			fixlen = XFS_ISIZE(ip);
-		}
+		lock = XFS_ILOCK_SHARED;
+		xfs_ilock(ip, lock);
 		break;
-	}
-
-	if (bmv->bmv_length == -1) {
-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
-		bmv->bmv_length =
-			max_t(int64_t, fixlen - bmv->bmv_offset, 0);
-	} else if (bmv->bmv_length == 0) {
-		bmv->bmv_entries = 0;
-		return 0;
-	} else if (bmv->bmv_length < 0) {
-		return -EINVAL;
-	}
-
-	nex = bmv->bmv_count - 1;
-	if (nex <= 0)
-		return -EINVAL;
-	bmvend = bmv->bmv_offset + bmv->bmv_length;
-
-
-	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
-		return -ENOMEM;
-	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
-	if (!out)
-		return -ENOMEM;
-
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	switch (whichfork) {
 	case XFS_DATA_FORK:
 		if (!(iflags & BMV_IF_DELALLOC) &&
 		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
@@ -668,147 +599,107 @@ xfs_getbmap(
 			 */
 		}
 
+		if (xfs_get_extsz_hint(ip) ||
+		    (ip->i_d.di_flags &
+		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
+			max_len = mp->m_super->s_maxbytes;
+		else
+			max_len = XFS_ISIZE(ip);
+
 		lock = xfs_ilock_data_map_shared(ip);
 		break;
-	case XFS_COW_FORK:
-		lock = XFS_ILOCK_SHARED;
-		xfs_ilock(ip, lock);
-		break;
-	case XFS_ATTR_FORK:
-		lock = xfs_ilock_attr_map_shared(ip);
-		break;
 	}
 
-	/*
-	 * Don't let nex be bigger than the number of extents
-	 * we can have assuming alternating holes and real extents.
-	 */
-	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
-		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
-
-	bmapi_flags = xfs_bmapi_aflag(whichfork);
-	if (!(iflags & BMV_IF_PREALLOC))
-		bmapi_flags |= XFS_BMAPI_IGSTATE;
-
-	/*
-	 * Allocate enough space to handle "subnex" maps at a time.
-	 */
-	error = -ENOMEM;
-	subnex = 16;
-	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
-	if (!map)
+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	case XFS_DINODE_FMT_EXTENTS:
+	case XFS_DINODE_FMT_BTREE:
+		break;
+	case XFS_DINODE_FMT_LOCAL:
+		/* Local format inode forks report no extents. */
 		goto out_unlock_ilock;
+	default:
+		error = -EINVAL;
+		goto out_unlock_ilock;
+	}
 
-	bmv->bmv_entries = 0;
-
-	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
-	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
-		error = 0;
-		goto out_free_map;
+	if (bmv->bmv_length == -1) {
+		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
+		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
 	}
 
-	do {
-		nmap = (nex> subnex) ? subnex : nex;
-		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
-				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
-				       map, &nmap, bmapi_flags);
-		if (error)
-			goto out_free_map;
-		ASSERT(nmap <= subnex);
-
-		for (i = 0; i < nmap && bmv->bmv_length &&
-				cur_ext < bmv->bmv_count - 1; i++) {
-			out[cur_ext].bmv_oflags = 0;
-			if (map[i].br_state == XFS_EXT_UNWRITTEN)
-				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
-			else if (map[i].br_startblock == DELAYSTARTBLOCK)
-				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
-			out[cur_ext].bmv_offset =
-				XFS_FSB_TO_BB(mp, map[i].br_startoff);
-			out[cur_ext].bmv_length =
-				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			out[cur_ext].bmv_unused1 = 0;
-			out[cur_ext].bmv_unused2 = 0;
+	bmv_end = bmv->bmv_offset + bmv->bmv_length;
 
-			/*
-			 * delayed allocation extents that start beyond EOF can
-			 * occur due to speculative EOF allocation when the
-			 * delalloc extent is larger than the largest freespace
-			 * extent at conversion time. These extents cannot be
-			 * converted by data writeback, so can exist here even
-			 * if we are not supposed to be finding delalloc
-			 * extents.
-			 */
-			if (map[i].br_startblock == DELAYSTARTBLOCK &&
-			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
-				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
-
-                        if (map[i].br_startblock == HOLESTARTBLOCK &&
-			    whichfork == XFS_ATTR_FORK) {
-				/* came to the end of attribute fork */
-				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
-				goto out_free_map;
-			}
+	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
+	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
 
-			/* Is this a shared block? */
-			error = xfs_getbmap_adjust_shared(ip, whichfork,
-					&map[i], &out[cur_ext], &inject_map);
-			if (error)
-				goto out_free_map;
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(NULL, ip, whichfork);
+		if (error)
+			goto out_unlock_ilock;
+	}
 
-			if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
-					&out[cur_ext], prealloced, bmvend,
-					map[i].br_startblock,
-					inject_map.br_startblock != NULLFSBLOCK))
-				goto out_free_map;
+	if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
+		/*
+		 * Report a whole-file hole if the delalloc flag is set to
+		 * stay compatible with the old implementation.
+		 */
+		if (iflags & BMV_IF_DELALLOC)
+			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
+					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
+		goto out_unlock_ilock;
+	}
 
-			bmv->bmv_offset =
-				out[cur_ext].bmv_offset +
-				out[cur_ext].bmv_length;
-			bmv->bmv_length =
-				max_t(int64_t, 0, bmvend - bmv->bmv_offset);
+	while (!xfs_getbmap_full(bmv)) {
+		xfs_trim_extent(&got, first_bno, len);
 
-			/*
-			 * In case we don't want to return the hole,
-			 * don't increase cur_ext so that we can reuse
-			 * it in the next loop.
-			 */
-			if ((iflags & BMV_IF_NO_HOLES) &&
-			    map[i].br_startblock == HOLESTARTBLOCK) {
-				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
-				continue;
-			}
+		/*
+		 * Report an entry for a hole if this extent doesn't directly
+		 * follow the previous one.
+		 */
+		if (got.br_startoff > bno) {
+			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
+					got.br_startoff);
+			if (xfs_getbmap_full(bmv))
+				break;
+		}
 
-			/*
-			 * In order to report shared extents accurately,
-			 * we report each distinct shared/unshared part
-			 * of a single bmbt record using multiple bmap
-			 * extents.  To make that happen, we iterate the
-			 * same map array item multiple times, each
-			 * time trimming out the subextent that we just
-			 * reported.
-			 *
-			 * Because of this, we must check the out array
-			 * index (cur_ext) directly against bmv_count-1
-			 * to avoid overflows.
-			 */
-			if (inject_map.br_startblock != NULLFSBLOCK) {
-				map[i] = inject_map;
-				i--;
+		/*
+		 * In order to report shared extents accurately, we report each
+		 * distinct shared / unshared part of a single bmbt record with
+		 * an individual getbmapx record.
+		 */
+		bno = got.br_startoff + got.br_blockcount;
+		rec = got;
+		do {
+			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
+					&rec);
+			if (error || xfs_getbmap_full(bmv))
+				goto out_unlock_ilock;
+		} while (xfs_getbmap_next_rec(&rec, bno));
+
+		if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
+			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
+
+			out[bmv->bmv_entries].bmv_oflags |= BMV_OF_LAST;
+
+			if (whichfork != XFS_ATTR_FORK && bno < end &&
+			    !xfs_getbmap_full(bmv)) {
+				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
+						bno, end);
 			}
-			bmv->bmv_entries++;
-			cur_ext++;
+			break;
 		}
-	} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
 
- out_free_map:
-	kmem_free(map);
- out_unlock_ilock:
+		if (bno >= first_bno + len)
+			break;
+	}
+
+out_unlock_ilock:
 	xfs_iunlock(ip, lock);
- out_unlock_iolock:
+out_unlock_iolock:
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
-	for (i = 0; i < cur_ext; i++) {
+	for (i = 0; i < bmv->bmv_entries; i++) {
 		/* format results & advance arg */
 		error = formatter(&arg, &out[i]);
 		if (error)
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2017-09-21 15:40 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-18 15:26 getbmap refactor V2 Christoph Hellwig
2017-09-18 15:26 ` [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
2017-09-20 13:23   ` Brian Foster
2017-09-20 14:41     ` Christoph Hellwig
2017-09-20 17:03       ` Darrick J. Wong
2017-09-20 23:00   ` Darrick J. Wong
2017-09-20 23:08     ` Darrick J. Wong
2017-09-21 13:36       ` Christoph Hellwig
2017-09-21 15:35         ` Darrick J. Wong
2017-09-21 13:35     ` Christoph Hellwig
2017-09-21 15:40       ` Darrick J. Wong
2017-09-18 15:26 ` [PATCH 2/2] xfs: simplify the xfs_getbmap interface Christoph Hellwig
2017-09-20 23:08   ` Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2017-09-03 15:51 [PATCH 1/2] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
2017-09-11 15:49 ` Brian Foster
2017-09-17 21:44   ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.