All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/16] xfs: first part of rmapbt functionality
@ 2016-03-08  4:16 Dave Chinner
  2016-03-08  4:16 ` [PATCH 01/16] xfs: introduce rmap btree definitions Dave Chinner
                   ` (16 more replies)
  0 siblings, 17 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

Hi folks,

This is the first set of patches for reverse mapping support that
I've grabbed from Darrick's tree. They are are largely unchanged
from his tree, with minor cleanups to the commit messages and
formatting (e.g. de-shouting inline functions), some small bug fixes
and some code/patch rearrangement. Changes are noted in the commit
messages.

This isn't all of the rmap functionality. It's patches up to the
point where I've come across the first piece that needs to be
reworked (the rmap intent execution code), so there's no point
holding these back until I've sorted that out. This builds on top of
for-next and the patch set I posted yesterday.

Darrick, I've changed the authorship of the patches to reflect
the original series this has come from - can you check to see if
there's anything I got wrong when I did that?

Cheers,

Dave.


_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 01/16] xfs: introduce rmap btree definitions
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 02/16] xfs: add rmap btree stats infrastructure Dave Chinner
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

>From : Dave Chinner <dchinner@redhat.com>

Add new per-ag rmap btree definitions to the per-ag structures. The
rmap btree will sit inthe empty slots on disk after the free space
btrees, and hence form a part of the array of space management
btrees. This requires the definition of the btree to be contiguous
with the free space btrees.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_alloc.c  |  6 ++++++
 fs/xfs/libxfs/xfs_btree.c  |  4 ++--
 fs/xfs/libxfs/xfs_btree.h  |  3 +++
 fs/xfs/libxfs/xfs_format.h | 22 +++++++++++++++++-----
 fs/xfs/libxfs/xfs_types.h  |  4 ++--
 5 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a708e38..d0b78b7 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2282,6 +2282,10 @@ xfs_agf_verify(
 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
 		return false;
 
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
+		return false;
+
 	/*
 	 * during growfs operations, the perag is not fully initialised,
 	 * so we can't use it for any useful checking. growfs ensures we can't
@@ -2413,6 +2417,8 @@ xfs_alloc_read_agf(
 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
 		pag->pagf_levels[XFS_BTNUM_CNTi] =
 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+		pag->pagf_levels[XFS_BTNUM_RMAPi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
 		spin_lock_init(&pag->pagb_lock);
 		pag->pagb_count = 0;
 		pag->pagb_tree = RB_ROOT;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 1f88e1c..5953764 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -43,9 +43,9 @@ kmem_zone_t	*xfs_btree_cur_zone;
  * Btree magic numbers.
  */
 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
-	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
+	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
 	  XFS_FIBT_MAGIC },
-	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
+	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
 	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
 };
 #define xfs_btree_magic(cur) \
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 2e874be..ac9c355 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -63,6 +63,7 @@ union xfs_btree_rec {
 #define	XFS_BTNUM_BMAP	((xfs_btnum_t)XFS_BTNUM_BMAPi)
 #define	XFS_BTNUM_INO	((xfs_btnum_t)XFS_BTNUM_INOi)
 #define	XFS_BTNUM_FINO	((xfs_btnum_t)XFS_BTNUM_FINOi)
+#define	XFS_BTNUM_RMAP	((xfs_btnum_t)XFS_BTNUM_RMAPi)
 
 /*
  * For logging record fields.
@@ -95,6 +96,7 @@ do {    \
 	case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
 	case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
 	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
+	case XFS_BTNUM_RMAP: break;	\
 	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\
 	}       \
 } while (0)
@@ -115,6 +117,7 @@ do {    \
 		__XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
 	case XFS_BTNUM_FINO:	\
 		__XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
+	case XFS_BTNUM_RMAP: break; \
 	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
 	}       \
 } while (0)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index dc97eb21..4a48977 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -455,6 +455,7 @@ xfs_sb_has_compat_feature(
 }
 
 #define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)		/* free inode btree */
+#define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
 		(XFS_SB_FEAT_RO_COMPAT_FINOBT)
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
@@ -538,6 +539,12 @@ static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp)
 		(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID);
 }
 
+static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
+}
+
 /*
  * end of superblock version macros
  */
@@ -598,10 +605,10 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
 #define	XFS_AGI_GOOD_VERSION(v)	((v) == XFS_AGI_VERSION)
 
 /*
- * Btree number 0 is bno, 1 is cnt.  This value gives the size of the
+ * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
  * arrays below.
  */
-#define	XFS_BTNUM_AGF	((int)XFS_BTNUM_CNTi + 1)
+#define	XFS_BTNUM_AGF	((int)XFS_BTNUM_RMAPi + 1)
 
 /*
  * The second word of agf_levels in the first a.g. overlaps the EFS
@@ -618,12 +625,10 @@ typedef struct xfs_agf {
 	__be32		agf_seqno;	/* sequence # starting from 0 */
 	__be32		agf_length;	/* size in blocks of a.g. */
 	/*
-	 * Freespace information
+	 * Freespace and rmap information
 	 */
 	__be32		agf_roots[XFS_BTNUM_AGF];	/* root blocks */
-	__be32		agf_spare0;	/* spare field */
 	__be32		agf_levels[XFS_BTNUM_AGF];	/* btree levels */
-	__be32		agf_spare1;	/* spare field */
 
 	__be32		agf_flfirst;	/* first freelist block's index */
 	__be32		agf_fllast;	/* last freelist block's index */
@@ -1308,6 +1313,13 @@ typedef __be32 xfs_inobt_ptr_t;
 #define	XFS_FIBT_BLOCK(mp)		((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
 
 /*
+ * Reverse mapping btree format definitions
+ *
+ * There is a btree for the reverse map per allocation group
+ */
+#define	XFS_RMAP_CRC_MAGIC	0x524d4233	/* 'RMB3' */
+
+/*
  * The first data block of an AG depends on whether the filesystem was formatted
  * with the finobt feature. If so, account for the finobt reserved root btree
  * block.
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index b79dc66..3d50364 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -108,8 +108,8 @@ typedef enum {
 } xfs_lookup_t;
 
 typedef enum {
-	XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
-	XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+	XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
+	XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
 } xfs_btnum_t;
 
 struct xfs_name {
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 02/16] xfs: add rmap btree stats infrastructure
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
  2016-03-08  4:16 ` [PATCH 01/16] xfs: introduce rmap btree definitions Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 03/16] xfs: rmap btree add more reserved blocks Dave Chinner
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

>From : Dave Chinner <dchinner@redhat.com>

The rmap btree will require the same stats as all the other generic
btrees, so add al the code for that now.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_btree.h |  7 ++++---
 fs/xfs/xfs_stats.c        |  1 +
 fs/xfs/xfs_stats.h        | 18 +++++++++++++++++-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index ac9c355..1fcf272 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -96,8 +96,8 @@ do {    \
 	case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
 	case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
 	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
-	case XFS_BTNUM_RMAP: break;	\
-	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\
+	case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \
+	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
 	}       \
 } while (0)
 
@@ -117,7 +117,8 @@ do {    \
 		__XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
 	case XFS_BTNUM_FINO:	\
 		__XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
-	case XFS_BTNUM_RMAP: break; \
+	case XFS_BTNUM_RMAP:	\
+		__XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \
 	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
 	}       \
 } while (0)
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 8686df6..f04f547 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -61,6 +61,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
 		{ "bmbt2",		XFSSTAT_END_BMBT_V2		},
 		{ "ibt2",		XFSSTAT_END_IBT_V2		},
 		{ "fibt2",		XFSSTAT_END_FIBT_V2		},
+		{ "rmapbt",		XFSSTAT_END_RMAP_V2		},
 		/* we print both series of quota information together */
 		{ "qm",			XFSSTAT_END_QM			},
 	};
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 483b0ef..657865f 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -197,7 +197,23 @@ struct xfsstats {
 	__uint32_t		xs_fibt_2_alloc;
 	__uint32_t		xs_fibt_2_free;
 	__uint32_t		xs_fibt_2_moves;
-#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_FIBT_V2+6)
+#define XFSSTAT_END_RMAP_V2		(XFSSTAT_END_FIBT_V2+15)
+	__uint32_t		xs_rmap_2_lookup;
+	__uint32_t		xs_rmap_2_compare;
+	__uint32_t		xs_rmap_2_insrec;
+	__uint32_t		xs_rmap_2_delrec;
+	__uint32_t		xs_rmap_2_newroot;
+	__uint32_t		xs_rmap_2_killroot;
+	__uint32_t		xs_rmap_2_increment;
+	__uint32_t		xs_rmap_2_decrement;
+	__uint32_t		xs_rmap_2_lshift;
+	__uint32_t		xs_rmap_2_rshift;
+	__uint32_t		xs_rmap_2_split;
+	__uint32_t		xs_rmap_2_join;
+	__uint32_t		xs_rmap_2_alloc;
+	__uint32_t		xs_rmap_2_free;
+	__uint32_t		xs_rmap_2_moves;
+#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_RMAP_V2+6)
 	__uint32_t		xs_qm_dqreclaims;
 	__uint32_t		xs_qm_dqreclaim_misses;
 	__uint32_t		xs_qm_dquot_dups;
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 03/16] xfs: rmap btree add more reserved blocks
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
  2016-03-08  4:16 ` [PATCH 01/16] xfs: introduce rmap btree definitions Dave Chinner
  2016-03-08  4:16 ` [PATCH 02/16] xfs: add rmap btree stats infrastructure Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-10 14:16   ` Christoph Hellwig
  2016-03-10 14:22   ` Christoph Hellwig
  2016-03-08  4:16 ` [PATCH 04/16] libxfs: rearrange xfs_bmap_add_free parameters Dave Chinner
                   ` (13 subsequent siblings)
  16 siblings, 2 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

>From : Dave Chinner <dchinner@redhat.com>

XFS reserves a small amount of space in each AG for the minimum
number of free blocks needed for operation. Adding the rmap btree
increases the number of reserved blocks, but it also increases the
complexity of the calculation as the free inode btree is optional
(like the rmbt).

Rather than calculate the prealloc blocks every time we need to
check it, add a function to calculate it at mount time and store it
in the struct xfs_mount, and convert the XFS_PREALLOC_BLOCKS macro
just to use the xfs-mount variable directly.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_alloc.c  | 11 +++++++++++
 fs/xfs/libxfs/xfs_alloc.h  |  2 ++
 fs/xfs/libxfs/xfs_format.h |  9 +--------
 fs/xfs/xfs_fsops.c         |  6 +++---
 fs/xfs/xfs_mount.c         |  2 ++
 fs/xfs/xfs_mount.h         |  1 +
 6 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index d0b78b7..19b7521 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -49,6 +49,17 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
 		xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
 
+xfs_extlen_t
+xfs_prealloc_blocks(
+	struct xfs_mount	*mp)
+{
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return XFS_RMAP_BLOCK(mp) + 1;
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		return XFS_FIBT_BLOCK(mp) + 1;
+	return XFS_IBT_BLOCK(mp) + 1;
+}
+
 /*
  * Lookup the record equal to [bno, len] in the btree given by cur.
  */
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 135eb3d..d260916 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -237,4 +237,6 @@ int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
 			xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
 int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
 
+xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
+
 #endif	/* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 4a48977..ebe0eec 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1319,18 +1319,11 @@ typedef __be32 xfs_inobt_ptr_t;
  */
 #define	XFS_RMAP_CRC_MAGIC	0x524d4233	/* 'RMB3' */
 
-/*
- * The first data block of an AG depends on whether the filesystem was formatted
- * with the finobt feature. If so, account for the finobt reserved root btree
- * block.
- */
-#define XFS_PREALLOC_BLOCKS(mp) \
+#define	XFS_RMAP_BLOCK(mp) \
 	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
 	 XFS_FIBT_BLOCK(mp) + 1 : \
 	 XFS_IBT_BLOCK(mp) + 1)
 
-
-
 /*
  * BMAP Btree format definitions
  *
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ee3aaa0a..32e24ec 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -246,7 +246,7 @@ xfs_growfs_data_private(
 		agf->agf_flfirst = 0;
 		agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
 		agf->agf_flcount = 0;
-		tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
+		tmpsize = agsize - mp->m_ag_prealloc_blocks;
 		agf->agf_freeblks = cpu_to_be32(tmpsize);
 		agf->agf_longest = cpu_to_be32(tmpsize);
 		if (xfs_sb_version_hascrc(&mp->m_sb))
@@ -343,7 +343,7 @@ xfs_growfs_data_private(
 						agno, 0);
 
 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
 		arec->ar_blockcount = cpu_to_be32(
 			agsize - be32_to_cpu(arec->ar_startblock));
 
@@ -372,7 +372,7 @@ xfs_growfs_data_private(
 						agno, 0);
 
 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
 		arec->ar_blockcount = cpu_to_be32(
 			agsize - be32_to_cpu(arec->ar_startblock));
 		nfree += be32_to_cpu(arec->ar_blockcount);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 536a0ee..ef5de545 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -231,6 +231,8 @@ xfs_initialize_perag(
 
 	if (maxagi)
 		*maxagi = index;
+
+	mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
 	return 0;
 
 out_unwind:
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1c8611f..9788686 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -93,6 +93,7 @@ typedef struct xfs_mount {
 	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
 	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
 	uint			m_in_maxlevels;	/* max inobt btree levels. */
+	xfs_extlen_t		m_ag_prealloc_blocks; /* reserved ag blocks */
 	struct radix_tree_root	m_perag_tree;	/* per-ag accounting info */
 	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */
 	struct mutex		m_growlock;	/* growfs mutex */
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 04/16] libxfs: rearrange xfs_bmap_add_free parameters
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (2 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 03/16] xfs: rmap btree add more reserved blocks Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08 17:18   ` Christoph Hellwig
  2016-03-08  4:16 ` [PATCH 05/16] xfs: add owner field to extent allocation and freeing Dave Chinner
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: "Darrick J. Wong" <darrick.wong@oracle.com>

The order is different to convention, making it a bit strange.
Reorder it according to convention before we start adding new
parameters.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_bmap.c       | 12 ++++++------
 fs/xfs/libxfs/xfs_bmap.h       |  4 ++--
 fs/xfs/libxfs/xfs_bmap_btree.c |  2 +-
 fs/xfs/libxfs/xfs_ialloc.c     |  9 ++++-----
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index cb58d72..7aef8d8 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -573,10 +573,10 @@ xfs_bmap_validate_ret(
  */
 void
 xfs_bmap_add_free(
+	struct xfs_mount	*mp,		/* mount point structure */
+	struct xfs_bmap_free	*flist,		/* list of extents */
 	xfs_fsblock_t		bno,		/* fs block number of extent */
-	xfs_filblks_t		len,		/* length of extent */
-	xfs_bmap_free_t		*flist,		/* list of extents */
-	xfs_mount_t		*mp)		/* mount point structure */
+	xfs_filblks_t		len)		/* length of extent */
 {
 	xfs_bmap_free_item_t	*cur;		/* current (next) element */
 	xfs_bmap_free_item_t	*new;		/* new element */
@@ -702,7 +702,7 @@ xfs_bmap_btree_to_extents(
 	cblock = XFS_BUF_TO_BLOCK(cbp);
 	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 		return error;
-	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
+	xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1);
 	ip->i_d.di_nblocks--;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
@@ -5016,8 +5016,8 @@ xfs_bmap_del_extent(
 	 * If we need to, add to list of extents to delete.
 	 */
 	if (do_fx)
-		xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
-			mp);
+		xfs_bmap_add_free(mp, flist, del->br_startblock,
+			del->br_blockcount);
 	/*
 	 * Adjust inode # blocks in the file.
 	 */
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 423a34e..e081c76 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -191,8 +191,8 @@ void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 
 int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
-void	xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
-		struct xfs_bmap_free *flist, struct xfs_mount *mp);
+void	xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist,
+			  xfs_fsblock_t bno, xfs_filblks_t len);
 void	xfs_bmap_cancel(struct xfs_bmap_free *flist);
 int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
 			struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6282f6e..db0c71e 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -526,7 +526,7 @@ xfs_bmbt_free_block(
 	struct xfs_trans	*tp = cur->bc_tp;
 	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
 
-	xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
+	xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1);
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 22297f9..e3c0af7 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1828,9 +1828,8 @@ xfs_difree_inode_chunk(
 
 	if (!xfs_inobt_issparse(rec->ir_holemask)) {
 		/* not sparse, calculate extent info directly */
-		xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
-				  XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
-				  mp->m_ialloc_blks, flist, mp);
+		xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, sagbno),
+				  mp->m_ialloc_blks);
 		return;
 	}
 
@@ -1873,8 +1872,8 @@ xfs_difree_inode_chunk(
 
 		ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
 		ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
-		xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
-				  flist, mp);
+		xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno),
+				  contigblk);
 
 		/* reset range to current bit and carry on... */
 		startidx = endidx = nextbit;
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 05/16] xfs: add owner field to extent allocation and freeing
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (3 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 04/16] libxfs: rearrange xfs_bmap_add_free parameters Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-10 14:19   ` Christoph Hellwig
  2016-03-08  4:16 ` [PATCH 06/16] xfs: introduce rmap extent operation stubs Dave Chinner
                   ` (11 subsequent siblings)
  16 siblings, 1 reply; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: "Darrick J. Wong" <darrick.wong@oracle.com>

For the rmap btree to work, we have to fed the extent owner
information to the the allocation and freeing functions. This
information is what will end up in the rmap btree that tracks
allocated extents. While we technically don't need the owner
information when freeing extents, passing it allows us to validate
that the extent we are removing from the rmap btree actually
belonged to the owner we expected it to belong to.

We also define a special set of owner values for internal metadata
that would otherwise have no owner. This allows us to tell the
difference between metadata owned by different per-ag btrees, as
well as static fs metadata (e.g. AG headers) and internal journal
blocks.

There are also a couple of special cases we need to take care of -
during EFI recovery, we don't actually know who the original owner
was, so we need to pass a wildcard to indicate that we aren't
checking the owner for validity. We also need special handling in
growfs, as we "free" the space in the last AG when extending it, but
because it's new space it has no actual owner...

Extend the owner field to include both the owner type and some sort
of index within the owner.  The index field will be used to support
reverse mappings when reflink is enabled.

This is based upon a patch originally from Dave Chinner. It has been
extended to add more owner information with the intent of helping
recovery operations when things go wrong (e.g. offset of user data
block in a file).

[dchinner: de-shout the xfs_rmap_*_owner helpers]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_alloc.c        | 12 ++++++--
 fs/xfs/libxfs/xfs_alloc.h        |  4 ++-
 fs/xfs/libxfs/xfs_bmap.c         | 17 +++++++++--
 fs/xfs/libxfs/xfs_bmap.h         |  4 ++-
 fs/xfs/libxfs/xfs_bmap_btree.c   |  6 +++-
 fs/xfs/libxfs/xfs_format.h       | 65 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_ialloc.c       |  7 +++--
 fs/xfs/libxfs/xfs_ialloc_btree.c |  7 ++++-
 fs/xfs/xfs_bmap_util.c           |  3 +-
 fs/xfs/xfs_fsops.c               | 16 +++++++---
 fs/xfs/xfs_log_recover.c         |  5 +++-
 fs/xfs/xfs_trans.h               |  2 +-
 fs/xfs/xfs_trans_extfree.c       |  5 ++--
 13 files changed, 132 insertions(+), 21 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 19b7521..0222e03 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1595,6 +1595,7 @@ xfs_free_ag_extent(
 	xfs_agnumber_t	agno,	/* allocation group number */
 	xfs_agblock_t	bno,	/* starting block number */
 	xfs_extlen_t	len,	/* length of extent */
+	struct xfs_owner_info	*oinfo,	/* extent owner */
 	int		isfl)	/* set if is freelist blocks - no sb acctg */
 {
 	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
@@ -2015,13 +2016,15 @@ xfs_alloc_fix_freelist(
 	 * back on the free list? Maybe we should only do this when space is
 	 * getting low or the AGFL is more than half full?
 	 */
+	xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
 	while (pag->pagf_flcount > need) {
 		struct xfs_buf	*bp;
 
 		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
 		if (error)
 			goto out_agbp_relse;
-		error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
+		error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
+					   &targs.oinfo, 1);
 		if (error)
 			goto out_agbp_relse;
 		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
@@ -2031,6 +2034,7 @@ xfs_alloc_fix_freelist(
 	memset(&targs, 0, sizeof(targs));
 	targs.tp = tp;
 	targs.mp = mp;
+	xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
 	targs.agbp = agbp;
 	targs.agno = args->agno;
 	targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
@@ -2684,7 +2688,8 @@ int				/* error */
 xfs_free_extent(
 	xfs_trans_t	*tp,	/* transaction pointer */
 	xfs_fsblock_t	bno,	/* starting block number of extent */
-	xfs_extlen_t	len)	/* length of extent */
+	xfs_extlen_t	len,	/* length of extent */
+	struct xfs_owner_info	*oinfo)	/* extent owner */
 {
 	xfs_alloc_arg_t	args;
 	int		error;
@@ -2720,7 +2725,8 @@ xfs_free_extent(
 		goto error0;
 	}
 
-	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
+	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
+				   len, oinfo, 0);
 	if (!error)
 		xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
 error0:
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index d260916..6d0f328 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -123,6 +123,7 @@ typedef struct xfs_alloc_arg {
 	char		isfl;		/* set if is freelist blocks - !acctg */
 	char		userdata;	/* mask defining userdata treatment */
 	xfs_fsblock_t	firstblock;	/* io first block allocated */
+	struct xfs_owner_info	oinfo;		/* owner of blocks being allocated */
 } xfs_alloc_arg_t;
 
 /*
@@ -210,7 +211,8 @@ int				/* error */
 xfs_free_extent(
 	struct xfs_trans *tp,	/* transaction pointer */
 	xfs_fsblock_t	bno,	/* starting block number of extent */
-	xfs_extlen_t	len);	/* length of extent */
+	xfs_extlen_t	len,	/* length of extent */
+	struct xfs_owner_info	*oinfo);	/* extent owner */
 
 int					/* error */
 xfs_alloc_lookup_le(
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7aef8d8..59d4fd1 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -576,7 +576,8 @@ xfs_bmap_add_free(
 	struct xfs_mount	*mp,		/* mount point structure */
 	struct xfs_bmap_free	*flist,		/* list of extents */
 	xfs_fsblock_t		bno,		/* fs block number of extent */
-	xfs_filblks_t		len)		/* length of extent */
+	xfs_filblks_t		len,		/* length of extent */
+	struct xfs_owner_info	*oinfo)		/* extent owner */
 {
 	xfs_bmap_free_item_t	*cur;		/* current (next) element */
 	xfs_bmap_free_item_t	*new;		/* new element */
@@ -597,9 +598,14 @@ xfs_bmap_add_free(
 	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 #endif
 	ASSERT(xfs_bmap_free_item_zone != NULL);
+
 	new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
 	new->xbfi_startblock = bno;
 	new->xbfi_blockcount = (xfs_extlen_t)len;
+	if (oinfo)
+		memcpy(&new->xbfi_oinfo, oinfo, sizeof(struct xfs_owner_info));
+	else
+		memset(&new->xbfi_oinfo, 0, sizeof(struct xfs_owner_info));
 	for (prev = NULL, cur = flist->xbf_first;
 	     cur != NULL;
 	     prev = cur, cur = cur->xbfi_next) {
@@ -679,6 +685,7 @@ xfs_bmap_btree_to_extents(
 	xfs_mount_t		*mp;	/* mount point structure */
 	__be64			*pp;	/* ptr to block address */
 	struct xfs_btree_block	*rblock;/* root btree block */
+	struct xfs_owner_info	oinfo;
 
 	mp = ip->i_mount;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -702,7 +709,8 @@ xfs_bmap_btree_to_extents(
 	cblock = XFS_BUF_TO_BLOCK(cbp);
 	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 		return error;
-	xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1);
+	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
+	xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1, &oinfo);
 	ip->i_d.di_nblocks--;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
@@ -783,6 +791,7 @@ xfs_bmap_extents_to_btree(
 	memset(&args, 0, sizeof(args));
 	args.tp = tp;
 	args.mp = mp;
+	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 	args.firstblock = *firstblock;
 	if (*firstblock == NULLFSBLOCK) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
@@ -929,6 +938,7 @@ xfs_bmap_local_to_extents(
 	memset(&args, 0, sizeof(args));
 	args.tp = tp;
 	args.mp = ip->i_mount;
+	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 	args.firstblock = *firstblock;
 	/*
 	 * Allocate a block.  We know we need only one, since the
@@ -4831,6 +4841,7 @@ xfs_bmap_del_extent(
 		nblks = 0;
 		do_fx = 0;
 	}
+
 	/*
 	 * Set flag value to use in switch statement.
 	 * Left-contig is 2, right-contig is 1.
@@ -5017,7 +5028,7 @@ xfs_bmap_del_extent(
 	 */
 	if (do_fx)
 		xfs_bmap_add_free(mp, flist, del->br_startblock,
-			del->br_blockcount);
+				  del->br_blockcount, NULL);
 	/*
 	 * Adjust inode # blocks in the file.
 	 */
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index e081c76..06dbe08 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -66,6 +66,7 @@ typedef struct xfs_bmap_free_item
 {
 	xfs_fsblock_t		xbfi_startblock;/* starting fs block number */
 	xfs_extlen_t		xbfi_blockcount;/* number of blocks in extent */
+	struct xfs_owner_info	xbfi_oinfo;	/* extent owner */
 	struct xfs_bmap_free_item *xbfi_next;	/* link to next entry */
 } xfs_bmap_free_item_t;
 
@@ -192,7 +193,8 @@ void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
 void	xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist,
-			  xfs_fsblock_t bno, xfs_filblks_t len);
+			  xfs_fsblock_t bno, xfs_filblks_t len,
+			  struct xfs_owner_info *oinfo);
 void	xfs_bmap_cancel(struct xfs_bmap_free *flist);
 int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
 			struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index db0c71e..e247b02 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -446,6 +446,8 @@ xfs_bmbt_alloc_block(
 	args.mp = cur->bc_mp;
 	args.fsbno = cur->bc_private.b.firstblock;
 	args.firstblock = args.fsbno;
+	xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
+			cur->bc_private.b.whichfork);
 
 	if (args.fsbno == NULLFSBLOCK) {
 		args.fsbno = be64_to_cpu(start->l);
@@ -525,8 +527,10 @@ xfs_bmbt_free_block(
 	struct xfs_inode	*ip = cur->bc_private.b.ip;
 	struct xfs_trans	*tp = cur->bc_tp;
 	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+	struct xfs_owner_info	oinfo;
 
-	xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1);
+	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
+	xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1, &oinfo);
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index ebe0eec..232c145 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1319,6 +1319,71 @@ typedef __be32 xfs_inobt_ptr_t;
  */
 #define	XFS_RMAP_CRC_MAGIC	0x524d4233	/* 'RMB3' */
 
+/*
+ * Ownership info for an extent.  This is used to create reverse-mapping
+ * entries.
+ */
+#define XFS_RMAP_INO_ATTR_FORK	(1)
+#define XFS_RMAP_BMBT_BLOCK	(2)
+struct xfs_owner_info {
+	uint64_t		oi_owner;
+	xfs_fileoff_t		oi_offset;
+	unsigned int		oi_flags;
+};
+
+static inline void
+xfs_rmap_ag_owner(
+	struct xfs_owner_info	*oi,
+	uint64_t		owner)
+{
+	oi->oi_owner = owner;
+	oi->oi_offset = 0;
+	oi->oi_flags = 0;
+}
+
+static inline void
+xfs_rmap_ino_bmbt_owner(
+	struct xfs_owner_info	*oi,
+	xfs_ino_t		ino,
+	int			whichfork)
+{
+	oi->oi_owner = ino;
+	oi->oi_offset = 0;
+	oi->oi_flags = XFS_RMAP_BMBT_BLOCK;
+	if (whichfork == XFS_ATTR_FORK)
+		oi->oi_flags |= XFS_RMAP_INO_ATTR_FORK;
+}
+
+static inline void
+xfs_rmap_ino_owner(
+	struct xfs_owner_info	*oi,
+	xfs_ino_t		ino,
+	int			whichfork,
+	xfs_fileoff_t		offset)
+{
+	oi->oi_owner = ino;
+	oi->oi_offset = offset;
+	oi->oi_flags = 0;
+	if (whichfork == XFS_ATTR_FORK)
+		oi->oi_flags |= XFS_RMAP_INO_ATTR_FORK;
+}
+
+/*
+ * Special owner types.
+ *
+ * Seeing as we only support up to 8EB, we have the upper bit of the owner field
+ * to tell us we have a special owner value. We use these for static metadata
+ * allocated at mkfs/growfs time, as well as for freespace management metadata.
+ */
+#define XFS_RMAP_OWN_NULL	(-1ULL)	/* No owner, for growfs */
+#define XFS_RMAP_OWN_UNKNOWN	(-2ULL)	/* Unknown owner, for EFI recovery */
+#define XFS_RMAP_OWN_FS		(-3ULL)	/* static fs metadata */
+#define XFS_RMAP_OWN_LOG	(-4ULL)	/* static fs metadata */
+#define XFS_RMAP_OWN_AG		(-5ULL)	/* AG freespace btree blocks */
+#define XFS_RMAP_OWN_INOBT	(-6ULL)	/* Inode btree blocks */
+#define XFS_RMAP_OWN_INODES	(-7ULL)	/* Inode chunk */
+#define XFS_RMAP_OWN_MIN	(-8ULL) /* guard */
+
 #define	XFS_RMAP_BLOCK(mp) \
 	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
 	 XFS_FIBT_BLOCK(mp) + 1 : \
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index e3c0af7..aa3d10e 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -614,6 +614,7 @@ xfs_ialloc_ag_alloc(
 	args.tp = tp;
 	args.mp = tp->t_mountp;
 	args.fsbno = NULLFSBLOCK;
+	xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INODES);
 
 #ifdef DEBUG
 	/* randomly do sparse inode allocations */
@@ -1824,12 +1825,14 @@ xfs_difree_inode_chunk(
 	int		nextbit;
 	xfs_agblock_t	agbno;
 	int		contigblk;
+	struct xfs_owner_info	oinfo;
 	DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
 
 	if (!xfs_inobt_issparse(rec->ir_holemask)) {
 		/* not sparse, calculate extent info directly */
 		xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, sagbno),
-				  mp->m_ialloc_blks);
+				  mp->m_ialloc_blks, &oinfo);
 		return;
 	}
 
@@ -1873,7 +1876,7 @@ xfs_difree_inode_chunk(
 		ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
 		ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
 		xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno),
-				  contigblk);
+				  contigblk, &oinfo);
 
 		/* reset range to current bit and carry on... */
 		startidx = endidx = nextbit;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 89c21d7..1a7b016 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -96,6 +96,7 @@ xfs_inobt_alloc_block(
 	memset(&args, 0, sizeof(args));
 	args.tp = cur->bc_tp;
 	args.mp = cur->bc_mp;
+	xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INOBT);
 	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
 	args.minlen = 1;
 	args.maxlen = 1;
@@ -125,8 +126,12 @@ xfs_inobt_free_block(
 	struct xfs_btree_cur	*cur,
 	struct xfs_buf		*bp)
 {
+	struct xfs_owner_info	oinfo;
+
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
 	return xfs_free_extent(cur->bc_tp,
-			XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
+			XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
+			&oinfo);
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a32c1dc..834639d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -149,7 +149,8 @@ xfs_bmap_finish(
 		next = free->xbfi_next;
 
 		error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock,
-					      free->xbfi_blockcount);
+					      free->xbfi_blockcount,
+					      &free->xbfi_oinfo);
 		if (error)
 			return error;
 
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 32e24ec..b4ab22c 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -439,6 +439,8 @@ xfs_growfs_data_private(
 	 * There are new blocks in the old last a.g.
 	 */
 	if (new) {
+		struct xfs_owner_info	oinfo;
+
 		/*
 		 * Change the agi length.
 		 */
@@ -466,14 +468,20 @@ xfs_growfs_data_private(
 		       be32_to_cpu(agi->agi_length));
 
 		xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
+
 		/*
 		 * Free the new space.
+		 *
+		 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
+		 * this doesn't actually exist in the rmap btree.
 		 */
-		error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno,
-			be32_to_cpu(agf->agf_length) - new), new);
-		if (error) {
+		xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+		error = xfs_free_extent(tp,
+				XFS_AGB_TO_FSB(mp, agno,
+					be32_to_cpu(agf->agf_length) - new),
+				new, &oinfo);
+		if (error)
 			goto error0;
-		}
 	}
 
 	/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 396565f..7c9bc7c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4179,6 +4179,7 @@ xlog_recover_process_efi(
 	int			error = 0;
 	xfs_extent_t		*extp;
 	xfs_fsblock_t		startblock_fsb;
+	struct xfs_owner_info	oinfo;
 
 	ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
 
@@ -4211,10 +4212,12 @@ xlog_recover_process_efi(
 		goto abort_error;
 	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
 
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_UNKNOWN);
 	for (i = 0; i < efip->efi_format.efi_nextents; i++) {
 		extp = &(efip->efi_format.efi_extents[i]);
 		error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
-					      extp->ext_len);
+					      extp->ext_len,
+					      &oinfo);
 		if (error)
 			goto abort_error;
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e7c49cf..d49dfef 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -221,7 +221,7 @@ struct xfs_efd_log_item	*xfs_trans_get_efd(xfs_trans_t *,
 				  uint);
 int		xfs_trans_free_extent(struct xfs_trans *,
 				      struct xfs_efd_log_item *, xfs_fsblock_t,
-				      xfs_extlen_t);
+				      xfs_extlen_t, struct xfs_owner_info *);
 int		xfs_trans_commit(struct xfs_trans *);
 int		__xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
 int		xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index a96ae54..d1b8833 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -118,13 +118,14 @@ xfs_trans_free_extent(
 	struct xfs_trans	*tp,
 	struct xfs_efd_log_item	*efdp,
 	xfs_fsblock_t		start_block,
-	xfs_extlen_t		ext_len)
+	xfs_extlen_t		ext_len,
+	struct xfs_owner_info	*oinfo)
 {
 	uint			next_extent;
 	struct xfs_extent	*extp;
 	int			error;
 
-	error = xfs_free_extent(tp, start_block, ext_len);
+	error = xfs_free_extent(tp, start_block, ext_len, oinfo);
 
 	/*
 	 * Mark the transaction dirty, even on error. This ensures the
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 06/16] xfs: introduce rmap extent operation stubs
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (4 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 05/16] xfs: add owner field to extent allocation and freeing Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 07/16] xfs: define the on-disk rmap btree format Dave Chinner
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Add the stubs into the extent allocation and freeing paths that the
rmap btree implementation will hook into. While doing this, add the
trace points that will be used to track rmap btree extent
manipulations.

[darrick.wong@oracle.com: Extend the stubs to take full owner info.]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/Makefile                |  1 +
 fs/xfs/libxfs/xfs_alloc.c      | 18 ++++++++-
 fs/xfs/libxfs/xfs_rmap.c       | 89 ++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_rmap_btree.h | 30 ++++++++++++++
 fs/xfs/xfs_trace.h             | 45 +++++++++++++++++++++
 5 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 fs/xfs/libxfs/xfs_rmap.c
 create mode 100644 fs/xfs/libxfs/xfs_rmap_btree.h

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index f646391..c202ce3 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -51,6 +51,7 @@ xfs-y				+= $(addprefix libxfs/, \
 				   xfs_inode_fork.o \
 				   xfs_inode_buf.o \
 				   xfs_log_rlimit.o \
+				   xfs_rmap.o \
 				   xfs_sb.o \
 				   xfs_symlink_remote.o \
 				   xfs_trans_resv.o \
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0222e03..9ecbe0f 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -26,6 +26,7 @@
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_extent_busy.h"
@@ -647,6 +648,14 @@ xfs_alloc_ag_vextent(
 	ASSERT(!args->wasfromfl || !args->isfl);
 	ASSERT(args->agbno % args->alignment == 0);
 
+	/* if not file data, insert new block into the reverse map btree */
+	if (args->oinfo.oi_owner) {
+		error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
+				       args->agbno, args->len, &args->oinfo);
+		if (error)
+			return error;
+	}
+
 	if (!args->wasfromfl) {
 		error = xfs_alloc_update_counters(args->tp, args->pag,
 						  args->agbp,
@@ -1613,12 +1622,19 @@ xfs_free_ag_extent(
 	xfs_extlen_t	nlen;		/* new length of freespace */
 	xfs_perag_t	*pag;		/* per allocation group data */
 
+	bno_cur = cnt_cur = NULL;
 	mp = tp->t_mountp;
+
+	if (oinfo->oi_owner) {
+		error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
+		if (error)
+			goto error0;
+	}
+
 	/*
 	 * Allocate and initialize a cursor for the by-block btree.
 	 */
 	bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
-	cnt_cur = NULL;
 	/*
 	 * Look for a neighboring block on the left (lower block numbers)
 	 * that is contiguous with this space.
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
new file mode 100644
index 0000000..3e17294
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -0,0 +1,89 @@
+
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_btree.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+#include "xfs_error.h"
+#include "xfs_extent_busy.h"
+
+int
+xfs_rmap_free(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	struct xfs_owner_info	*oinfo)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	int			error = 0;
+
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return 0;
+
+	trace_xfs_rmap_free_extent(mp, agno, bno, len, oinfo);
+	if (1)
+		goto out_error;
+	trace_xfs_rmap_free_extent_done(mp, agno, bno, len, oinfo);
+	return 0;
+
+out_error:
+	trace_xfs_rmap_free_extent_error(mp, agno, bno, len, oinfo);
+	return error;
+}
+
+int
+xfs_rmap_alloc(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	struct xfs_owner_info	*oinfo)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	int			error = 0;
+
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return 0;
+
+	trace_xfs_rmap_alloc_extent(mp, agno, bno, len, oinfo);
+	if (1)
+		goto out_error;
+	trace_xfs_rmap_alloc_extent_done(mp, agno, bno, len, oinfo);
+	return 0;
+
+out_error:
+	trace_xfs_rmap_alloc_extent_error(mp, agno, bno, len, oinfo);
+	return error;
+}
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
new file mode 100644
index 0000000..a3b8f90
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_RMAP_BTREE_H__
+#define	__XFS_RMAP_BTREE_H__
+
+struct xfs_buf;
+
+int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
+		   xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+		   struct xfs_owner_info *oinfo);
+int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
+		  xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+		  struct xfs_owner_info *oinfo);
+
+#endif	/* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c8d5842..7968e92 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1701,6 +1701,51 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
 DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
 
+DECLARE_EVENT_CLASS(xfs_rmap_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len,
+		 struct xfs_owner_info *oinfo),
+	TP_ARGS(mp, agno, agbno, len, oinfo),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+		__field(uint64_t, owner)
+		__field(uint64_t, offset)
+		__field(unsigned long, flags)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+		__entry->owner = oinfo->oi_owner;
+		__entry->offset = oinfo->oi_offset;
+		__entry->flags = oinfo->oi_flags;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u, owner 0x%llx, offset %llu, flags 0x%lx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len,
+		  __entry->owner,
+		  __entry->offset,
+		  __entry->flags)
+);
+#define DEFINE_RMAP_EVENT(name) \
+DEFINE_EVENT(xfs_rmap_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_agblock_t agbno, xfs_extlen_t len, \
+		 struct xfs_owner_info *oinfo), \
+	TP_ARGS(mp, agno, agbno, len, oinfo))
+DEFINE_RMAP_EVENT(xfs_rmap_free_extent);
+DEFINE_RMAP_EVENT(xfs_rmap_free_extent_done);
+DEFINE_RMAP_EVENT(xfs_rmap_free_extent_error);
+DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent);
+DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent_done);
+DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent_error);
+
 DECLARE_EVENT_CLASS(xfs_da_class,
 	TP_PROTO(struct xfs_da_args *args),
 	TP_ARGS(args),
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 07/16] xfs: define the on-disk rmap btree format
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (5 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 06/16] xfs: introduce rmap extent operation stubs Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 08/16] xfs: add rmap btree growfs support Dave Chinner
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Now we have all the surrounding call infrastructure in place, we can
start filling out the rmap btree implementation. Start with the
on-disk btree format; add everything needed to read, write and
manipulate rmap btree blocks. This prepares the way for adding the
btree operations implementation.

[darrick.wong@oracle.com: record owner and offset info in rmap btree]
[darrick.wong@oracle.com: fork, bmbt and unwritten state in rmap btree]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/Makefile                |   1 +
 fs/xfs/libxfs/xfs_btree.c      |   3 +
 fs/xfs/libxfs/xfs_btree.h      |  18 +++--
 fs/xfs/libxfs/xfs_format.h     |  95 +++++++++++++++++++++++
 fs/xfs/libxfs/xfs_rmap_btree.c | 170 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_rmap_btree.h |  31 ++++++++
 fs/xfs/libxfs/xfs_sb.c         |   6 ++
 fs/xfs/libxfs/xfs_shared.h     |   2 +
 fs/xfs/xfs_mount.h             |   2 +
 fs/xfs/xfs_ondisk.h            |   3 +
 10 files changed, 323 insertions(+), 8 deletions(-)
 create mode 100644 fs/xfs/libxfs/xfs_rmap_btree.c

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c202ce3..9391080 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -52,6 +52,7 @@ xfs-y				+= $(addprefix libxfs/, \
 				   xfs_inode_buf.o \
 				   xfs_log_rlimit.o \
 				   xfs_rmap.o \
+				   xfs_rmap_btree.o \
 				   xfs_sb.o \
 				   xfs_symlink_remote.o \
 				   xfs_trans_resv.o \
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5953764..1240117 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1144,6 +1144,9 @@ xfs_btree_set_refs(
 	case XFS_BTNUM_BMAP:
 		xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
 		break;
+	case XFS_BTNUM_RMAP:
+		xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
+		break;
 	default:
 		ASSERT(0);
 	}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 1fcf272..07fa27e 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -38,17 +38,19 @@ union xfs_btree_ptr {
 };
 
 union xfs_btree_key {
-	xfs_bmbt_key_t		bmbt;
-	xfs_bmdr_key_t		bmbr;	/* bmbt root block */
-	xfs_alloc_key_t		alloc;
-	xfs_inobt_key_t		inobt;
+	struct xfs_bmbt_key		bmbt;
+	xfs_bmdr_key_t			bmbr;	/* bmbt root block */
+	xfs_alloc_key_t			alloc;
+	struct xfs_inobt_key		inobt;
+	struct xfs_rmap_key		rmap;
 };
 
 union xfs_btree_rec {
-	xfs_bmbt_rec_t		bmbt;
-	xfs_bmdr_rec_t		bmbr;	/* bmbt root block */
-	xfs_alloc_rec_t		alloc;
-	xfs_inobt_rec_t		inobt;
+	struct xfs_bmbt_rec		bmbt;
+	xfs_bmdr_rec_t			bmbr;	/* bmbt root block */
+	struct xfs_alloc_rec		alloc;
+	struct xfs_inobt_rec		inobt;
+	struct xfs_rmap_rec		rmap;
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 232c145..508b3ea 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1384,11 +1384,106 @@ xfs_rmap_ino_owner(
 #define XFS_RMAP_OWN_INODES	(-7ULL)	/* Inode chunk */
 #define XFS_RMAP_OWN_MIN	(-8ULL) /* guard */
 
+#define XFS_RMAP_NON_INODE_OWNER(owner)	(!!((owner) & (1ULL << 63)))
+
+/*
+ * Data record structure
+ */
+struct xfs_rmap_rec {
+	__be32		rm_startblock;	/* extent start block */
+	__be32		rm_blockcount;	/* extent length */
+	__be64		rm_owner;	/* extent owner */
+	__be64		rm_offset;	/* offset within the owner */
+};
+
+/*
+ * rmap btree record
+ *  rm_blockcount:31 is the unwritten extent flag (same as l0:63 in bmbt)
+ *  rm_blockcount:0-30 are the extent length
+ *  rm_offset:63 is the attribute fork flag
+ *  rm_offset:62 is the bmbt block flag
+ *  rm_offset:0-61 is the block offset within the inode
+ */
+#define XFS_RMAP_OFF_ATTR	((__uint64_t)1ULL << 63)
+#define XFS_RMAP_OFF_BMBT	((__uint64_t)1ULL << 62)
+#define XFS_RMAP_LEN_UNWRITTEN	((xfs_extlen_t)1U << 31)
+
+#define XFS_RMAP_OFF_MASK	~(XFS_RMAP_OFF_ATTR | XFS_RMAP_OFF_BMBT)
+#define XFS_RMAP_LEN_MASK	~XFS_RMAP_LEN_UNWRITTEN
+
+#define XFS_RMAP_OFF(off)		((off) & XFS_RMAP_OFF_MASK)
+#define XFS_RMAP_LEN(len)		((len) & XFS_RMAP_LEN_MASK)
+
+#define XFS_RMAP_IS_BMBT(off)		(!!((off) & XFS_RMAP_OFF_BMBT))
+#define XFS_RMAP_IS_ATTR_FORK(off)	(!!((off) & XFS_RMAP_OFF_ATTR))
+#define XFS_RMAP_IS_UNWRITTEN(len)	(!!((len) & XFS_RMAP_LEN_UNWRITTEN))
+
+#define RMAPBT_STARTBLOCK_BITLEN	32
+#define RMAPBT_EXNTFLAG_BITLEN		1
+#define RMAPBT_BLOCKCOUNT_BITLEN	31
+#define RMAPBT_OWNER_BITLEN		64
+#define RMAPBT_ATTRFLAG_BITLEN		1
+#define RMAPBT_BMBTFLAG_BITLEN		1
+#define RMAPBT_OFFSET_BITLEN		62
+
+struct xfs_rmap_irec {
+	xfs_agblock_t	rm_startblock;	/* extent start block */
+	xfs_extlen_t	rm_blockcount;	/* extent length */
+	__uint64_t	rm_owner;	/* extent owner */
+	__uint64_t	rm_offset;	/* offset within the owner */
+};
+
+/*
+ * Key structure
+ *
+ * We don't use the length for lookups
+ */
+struct xfs_rmap_key {
+	__be32		rm_startblock;	/* extent start block */
+	__be64		rm_owner;	/* extent owner */
+	__be64		rm_offset;	/* offset within the owner */
+} __attribute__((packed));
+
+/* btree pointer type */
+typedef __be32 xfs_rmap_ptr_t;
+
 #define	XFS_RMAP_BLOCK(mp) \
 	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
 	 XFS_FIBT_BLOCK(mp) + 1 : \
 	 XFS_IBT_BLOCK(mp) + 1)
 
+static inline void
+xfs_owner_info_unpack(
+	struct xfs_owner_info	*oinfo,
+	uint64_t		*owner,
+	uint64_t		*offset)
+{
+	__uint64_t		r;
+
+	*owner = oinfo->oi_owner;
+	r = oinfo->oi_offset;
+	if (oinfo->oi_flags & XFS_RMAP_INO_ATTR_FORK)
+		r |= XFS_RMAP_OFF_ATTR;
+	if (oinfo->oi_flags & XFS_RMAP_BMBT_BLOCK)
+		r |= XFS_RMAP_OFF_BMBT;
+	*offset = r;
+}
+
+static inline void
+xfs_owner_info_pack(
+	struct xfs_owner_info	*oinfo,
+	uint64_t		owner,
+	uint64_t		offset)
+{
+	oinfo->oi_owner = owner;
+	oinfo->oi_offset = XFS_RMAP_OFF(offset);
+	oinfo->oi_flags = 0;
+	if (XFS_RMAP_IS_ATTR_FORK(offset))
+		oinfo->oi_flags |= XFS_RMAP_INO_ATTR_FORK;
+	if (XFS_RMAP_IS_BMBT(offset))
+		oinfo->oi_flags |= XFS_RMAP_BMBT_BLOCK;
+}
+
 /*
  * BMAP Btree format definitions
  *
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
new file mode 100644
index 0000000..882e8e2
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_error.h"
+#include "xfs_extent_busy.h"
+
+static struct xfs_btree_cur *
+xfs_rmapbt_dup_cursor(
+	struct xfs_btree_cur	*cur)
+{
+	return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
+			cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
+
+static bool
+xfs_rmapbt_verify(
+	struct xfs_buf		*bp)
+{
+	struct xfs_mount	*mp = bp->b_target->bt_mount;
+	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
+	struct xfs_perag	*pag = bp->b_pag;
+	unsigned int		level;
+
+	/*
+	 * magic number and level verification
+	 *
+	 * During growfs operations, we can't verify the exact level or owner as
+	 * the perag is not fully initialised and hence not attached to the
+	 * buffer.  In this case, check against the maximum tree depth.
+	 *
+	 * Similarly, during log recovery we will have a perag structure
+	 * attached, but the agf information will not yet have been initialised
+	 * from the on disk AGF. Again, we can only check against maximum limits
+	 * in this case.
+	 */
+	if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
+		return false;
+
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return false;
+	if (!xfs_btree_sblock_v5hdr_verify(bp))
+		return false;
+
+	level = be16_to_cpu(block->bb_level);
+	if (pag && pag->pagf_init) {
+		if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
+			return false;
+	} else if (level >= mp->m_ag_maxlevels)
+		return false;
+
+	return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
+}
+
+static void
+xfs_rmapbt_read_verify(
+	struct xfs_buf	*bp)
+{
+	if (!xfs_btree_sblock_verify_crc(bp))
+		xfs_buf_ioerror(bp, -EFSBADCRC);
+	else if (!xfs_rmapbt_verify(bp))
+		xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+	if (bp->b_error) {
+		trace_xfs_btree_corrupt(bp, _RET_IP_);
+		xfs_verifier_error(bp);
+	}
+}
+
+static void
+xfs_rmapbt_write_verify(
+	struct xfs_buf	*bp)
+{
+	if (!xfs_rmapbt_verify(bp)) {
+		trace_xfs_btree_corrupt(bp, _RET_IP_);
+		xfs_buf_ioerror(bp, -EFSCORRUPTED);
+		xfs_verifier_error(bp);
+		return;
+	}
+	xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
+	.name			= "xfs_rmapbt",
+	.verify_read		= xfs_rmapbt_read_verify,
+	.verify_write		= xfs_rmapbt_write_verify,
+};
+
+static const struct xfs_btree_ops xfs_rmapbt_ops = {
+	.rec_len		= sizeof(struct xfs_rmap_rec),
+	.key_len		= sizeof(struct xfs_rmap_key),
+
+	.dup_cursor		= xfs_rmapbt_dup_cursor,
+	.buf_ops		= &xfs_rmapbt_buf_ops,
+};
+
+/*
+ * Allocate a new allocation btree cursor.
+ */
+struct xfs_btree_cur *
+xfs_rmapbt_init_cursor(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_agnumber_t		agno)
+{
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+	struct xfs_btree_cur	*cur;
+
+	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+	cur->bc_tp = tp;
+	cur->bc_mp = mp;
+	cur->bc_btnum = XFS_BTNUM_RMAP;
+	cur->bc_flags = XFS_BTREE_CRC_BLOCKS;
+	cur->bc_blocklog = mp->m_sb.sb_blocklog;
+	cur->bc_ops = &xfs_rmapbt_ops;
+	cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+
+	cur->bc_private.a.agbp = agbp;
+	cur->bc_private.a.agno = agno;
+
+	return cur;
+}
+
+/*
+ * Calculate number of records in an rmap btree block.
+ */
+int
+xfs_rmapbt_maxrecs(
+	struct xfs_mount	*mp,
+	int			blocklen,
+	int			leaf)
+{
+	blocklen -= XFS_RMAP_BLOCK_LEN;
+
+	if (leaf)
+		return blocklen / sizeof(struct xfs_rmap_rec);
+	return blocklen /
+		(sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index a3b8f90..2e02362 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -19,6 +19,37 @@
 #define	__XFS_RMAP_BTREE_H__
 
 struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/* rmaps only exist on crc enabled filesystems */
+#define XFS_RMAP_BLOCK_LEN	XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_RMAP_REC_ADDR(block, index) \
+	((struct xfs_rmap_rec *) \
+		((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+		 (((index) - 1) * sizeof(struct xfs_rmap_rec))))
+
+#define XFS_RMAP_KEY_ADDR(block, index) \
+	((struct xfs_rmap_key *) \
+		((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+		 ((index) - 1) * sizeof(struct xfs_rmap_key)))
+
+#define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \
+	((xfs_rmap_ptr_t *) \
+		((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+		 (maxrecs) * sizeof(struct xfs_rmap_key) + \
+		 ((index) - 1) * sizeof(xfs_rmap_ptr_t)))
+
+struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
+				struct xfs_trans *tp, struct xfs_buf *bp,
+				xfs_agnumber_t agno);
+int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
 
 int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
 		   xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 8a53eaa..fadf750 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -36,6 +36,7 @@
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_log.h"
+#include "xfs_rmap_btree.h"
 
 /*
  * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -729,6 +730,11 @@ xfs_sb_mount_common(
 	mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
 	mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
 
+	mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
+	mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
+	mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
+	mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+
 	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
 	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
 					sbp->sb_inopblock);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 81ac870..4912072 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -38,6 +38,7 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
 extern const struct xfs_buf_ops xfs_agf_buf_ops;
 extern const struct xfs_buf_ops xfs_agfl_buf_ops;
 extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
 extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
 extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
 extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
@@ -212,6 +213,7 @@ int	xfs_log_calc_minimum_size(struct xfs_mount *);
 #define	XFS_INO_BTREE_REF	3
 #define	XFS_ALLOC_BTREE_REF	2
 #define	XFS_BMAP_BTREE_REF	2
+#define	XFS_RMAP_BTREE_REF	2
 #define	XFS_DIR_BTREE_REF	2
 #define	XFS_INO_REF		2
 #define	XFS_ATTR_BTREE_REF	1
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9788686..b409a2a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -90,6 +90,8 @@ typedef struct xfs_mount {
 	uint			m_bmap_dmnr[2];	/* min bmap btree records */
 	uint			m_inobt_mxr[2];	/* max inobt btree records */
 	uint			m_inobt_mnr[2];	/* min inobt btree records */
+	uint			m_rmap_mxr[2];	/* max rmap btree records */
+	uint			m_rmap_mnr[2];	/* min rmap btree records */
 	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
 	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
 	uint			m_in_maxlevels;	/* max inobt btree levels. */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 9a78408..07fb5d8 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -42,11 +42,14 @@ xfs_check_ondisk_structs(void)
 	XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr,		56);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key,		4);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec,		16);
+	XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key,		20);
+	XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec,		24);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp,		8);
 	XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t,			8);
 	XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t,			4);
 	XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t,			8);
 	XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t,			4);
+	XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t,			4);
 
 	/* dir/attr trees */
 	XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr,	80);
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 08/16] xfs: add rmap btree growfs support
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (6 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 07/16] xfs: define the on-disk rmap btree format Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 09/16] xfs: rmap btree transaction reservations Dave Chinner
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Now we can read and write rmap btree blocks, we can add support to
the growfs code to initialise new rmap btree blocks.

[darrick.wong@oracle.com: fill out the rmap offset fields]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_fsops.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b4ab22c..042f215 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -32,6 +32,7 @@
 #include "xfs_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_fsops.h"
 #include "xfs_itable.h"
@@ -243,6 +244,12 @@ xfs_growfs_data_private(
 		agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
 		agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
 		agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+			agf->agf_roots[XFS_BTNUM_RMAPi] =
+						cpu_to_be32(XFS_RMAP_BLOCK(mp));
+			agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+		}
+
 		agf->agf_flfirst = 0;
 		agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
 		agf->agf_flcount = 0;
@@ -382,6 +389,72 @@ xfs_growfs_data_private(
 		if (error)
 			goto error0;
 
+		/* RMAP btree root block */
+		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+			struct xfs_rmap_rec	*rrec;
+			struct xfs_btree_block	*block;
+
+			bp = xfs_growfs_get_hdr_buf(mp,
+				XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
+				BTOBB(mp->m_sb.sb_blocksize), 0,
+				&xfs_rmapbt_buf_ops);
+			if (!bp) {
+				error = -ENOMEM;
+				goto error0;
+			}
+
+			xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
+						agno, XFS_BTREE_CRC_BLOCKS);
+			block = XFS_BUF_TO_BLOCK(bp);
+
+
+			/*
+			 * mark the AG header regions as static metadata The BNO
+			 * btree block is the first block after the headers, so
+			 * it's location defines the size of region the static
+			 * metadata consumes.
+			 *
+			 * Note: unlike mkfs, we never have to account for log
+			 * space when growing the data regions
+			 */
+			rrec = XFS_RMAP_REC_ADDR(block, 1);
+			rrec->rm_startblock = 0;
+			rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
+			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
+			rrec->rm_offset = 0;
+			be16_add_cpu(&block->bb_numrecs, 1);
+
+			/* account freespace btree root blocks */
+			rrec = XFS_RMAP_REC_ADDR(block, 2);
+			rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
+			rrec->rm_blockcount = cpu_to_be32(2);
+			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+			rrec->rm_offset = 0;
+			be16_add_cpu(&block->bb_numrecs, 1);
+
+			/* account inode btree root blocks */
+			rrec = XFS_RMAP_REC_ADDR(block, 3);
+			rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
+			rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
+							XFS_IBT_BLOCK(mp));
+			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
+			rrec->rm_offset = 0;
+			be16_add_cpu(&block->bb_numrecs, 1);
+
+			/* account for rmap btree root */
+			rrec = XFS_RMAP_REC_ADDR(block, 4);
+			rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
+			rrec->rm_blockcount = cpu_to_be32(1);
+			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+			rrec->rm_offset = 0;
+			be16_add_cpu(&block->bb_numrecs, 1);
+
+			error = xfs_bwrite(bp);
+			xfs_buf_relse(bp);
+			if (error)
+				goto error0;
+		}
+
 		/*
 		 * INO btree root block
 		 */
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 09/16] xfs: rmap btree transaction reservations
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (7 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 08/16] xfs: add rmap btree growfs support Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 10/16] xfs: rmap btree requires more reserved free space Dave Chinner
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

>From : Dave Chinner <dchinner@redhat.com>

The rmap btrees will use the AGFL as the block allocation source, so
we need to ensure that the transaction reservations reflect the fact
this tree is modified by allocation and freeing. Hence we need to
extend all the extent allocation/free reservations used in
transactions to handle this.

Note that this also gets rid of the unused XFS_ALLOCFREE_LOG_RES
macro, as we now do buffer reservations based on the number of
buffers logged via xfs_calc_buf_res(). Hence we only need the buffer
count calculation now.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_trans_resv.c | 56 +++++++++++++++++++++++++++++-------------
 fs/xfs/libxfs/xfs_trans_resv.h | 10 --------
 2 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 68cb1e7..d495f82 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -64,6 +64,28 @@ xfs_calc_buf_res(
 }
 
 /*
+ * Per-extent log reservation for the allocation btree changes
+ * involved in freeing or allocating an extent. When rmap is not enabled,
+ * there are only two trees that will be modified (free space trees), and when
+ * rmap is enabled there will be three (freespace + rmap trees). The number of
+ * blocks reserved is based on the formula:
+ *
+ * num trees * ((2 blocks/level * max depth) - 1)
+ */
+static uint
+xfs_allocfree_log_count(
+	struct xfs_mount *mp,
+	uint		num_ops)
+{
+	uint		num_trees = 2;
+
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		num_trees++;
+
+	return num_ops * num_trees * (2 * mp->m_ag_maxlevels - 1);
+}
+
+/*
  * Logging inodes is really tricksy. They are logged in memory format,
  * which means that what we write into the log doesn't directly translate into
  * the amount of space they use on disk.
@@ -126,7 +148,7 @@ xfs_calc_inode_res(
  */
 STATIC uint
 xfs_calc_finobt_res(
-	struct xfs_mount 	*mp,
+	struct xfs_mount	*mp,
 	int			alloc,
 	int			modify)
 {
@@ -137,7 +159,7 @@ xfs_calc_finobt_res(
 
 	res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
 	if (alloc)
-		res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 
+		res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 					XFS_FSB_TO_B(mp, 1));
 	if (modify)
 		res += (uint)XFS_FSB_TO_B(mp, 1);
@@ -188,10 +210,10 @@ xfs_calc_write_reservation(
 		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
 				      XFS_FSB_TO_B(mp, 1)) +
 		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
 				      XFS_FSB_TO_B(mp, 1))),
 		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
 				      XFS_FSB_TO_B(mp, 1))));
 }
 
@@ -217,10 +239,10 @@ xfs_calc_itruncate_reservation(
 		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
 				      XFS_FSB_TO_B(mp, 1))),
 		    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
 				      XFS_FSB_TO_B(mp, 1)) +
 		    xfs_calc_buf_res(5, 0) +
-		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				     XFS_FSB_TO_B(mp, 1)) +
 		    xfs_calc_buf_res(2 + mp->m_ialloc_blks +
 				     mp->m_in_maxlevels, 0)));
@@ -247,7 +269,7 @@ xfs_calc_rename_reservation(
 		     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
 				      XFS_FSB_TO_B(mp, 1))),
 		    (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
+		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3),
 				      XFS_FSB_TO_B(mp, 1))));
 }
 
@@ -286,7 +308,7 @@ xfs_calc_link_reservation(
 		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
 				      XFS_FSB_TO_B(mp, 1))),
 		    (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				      XFS_FSB_TO_B(mp, 1))));
 }
 
@@ -324,7 +346,7 @@ xfs_calc_remove_reservation(
 		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
 				      XFS_FSB_TO_B(mp, 1))),
 		    (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
 				      XFS_FSB_TO_B(mp, 1))));
 }
 
@@ -371,7 +393,7 @@ xfs_calc_create_resv_alloc(
 		mp->m_sb.sb_sectsize +
 		xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
 		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				 XFS_FSB_TO_B(mp, 1));
 }
 
@@ -399,7 +421,7 @@ xfs_calc_icreate_resv_alloc(
 	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
 		mp->m_sb.sb_sectsize +
 		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				 XFS_FSB_TO_B(mp, 1)) +
 		xfs_calc_finobt_res(mp, 0, 0);
 }
@@ -483,7 +505,7 @@ xfs_calc_ifree_reservation(
 		xfs_calc_buf_res(1, 0) +
 		xfs_calc_buf_res(2 + mp->m_ialloc_blks +
 				 mp->m_in_maxlevels, 0) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				 XFS_FSB_TO_B(mp, 1)) +
 		xfs_calc_finobt_res(mp, 0, 1);
 }
@@ -513,7 +535,7 @@ xfs_calc_growdata_reservation(
 	struct xfs_mount	*mp)
 {
 	return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				 XFS_FSB_TO_B(mp, 1));
 }
 
@@ -535,7 +557,7 @@ xfs_calc_growrtalloc_reservation(
 		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
 				 XFS_FSB_TO_B(mp, 1)) +
 		xfs_calc_inode_res(mp, 1) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				 XFS_FSB_TO_B(mp, 1));
 }
 
@@ -611,7 +633,7 @@ xfs_calc_addafork_reservation(
 		xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
 		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
 				 XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+		xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
 				 XFS_FSB_TO_B(mp, 1));
 }
 
@@ -634,7 +656,7 @@ xfs_calc_attrinval_reservation(
 		    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
 				     XFS_FSB_TO_B(mp, 1))),
 		   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+		    xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
 				     XFS_FSB_TO_B(mp, 1))));
 }
 
@@ -701,7 +723,7 @@ xfs_calc_attrrm_reservation(
 					XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
 		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
 		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+		     xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
 				      XFS_FSB_TO_B(mp, 1))));
 }
 
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 7978150..0eb46ed 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -68,16 +68,6 @@ struct xfs_trans_resv {
 #define M_RES(mp)	(&(mp)->m_resv)
 
 /*
- * Per-extent log reservation for the allocation btree changes
- * involved in freeing or allocating an extent.
- * 2 trees * (2 blocks/level * max depth - 1) * block size
- */
-#define	XFS_ALLOCFREE_LOG_RES(mp,nx) \
-	((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
-#define	XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
-	((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
-
-/*
  * Per-directory log reservation for any directory change.
  * dir blocks: (1 btree block per level + data block + free block) * dblock size
  * bmap btree: (levels + 2) * max depth * block size
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 10/16] xfs: rmap btree requires more reserved free space
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (8 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 09/16] xfs: rmap btree transaction reservations Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 11/16] xfs: add rmap btree operations Dave Chinner
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

The rmap btree is allocated from the AGFL, which means we have to
ensure ENOSPC is reported to userspace before we run out of free
space in each AG. The last allocation in an AG can cause a full
height rmap btree split, and that means we have to reserve at least
this many blocks *in each AG* to be placed on the AGFL at ENOSPC.
Update the various space calculation functiosn to handle this.

Also, because the macros are now executing conditional code and are
called quite frequently, convert them to functions that initialise
varaibles in the struct xfs_mount, use the new variables everywhere
and document the calculations better.

[darrick.wong@oracle.com: don't reserve blocks if !rmap]
[dchinner@redhat.com: update m_ag_max_usable after growfs]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_alloc.h | 41 ++++-----------------------
 fs/xfs/libxfs/xfs_bmap.c  |  2 +-
 fs/xfs/libxfs/xfs_sb.c    |  2 ++
 fs/xfs/xfs_discard.c      |  2 +-
 fs/xfs/xfs_fsops.c        |  5 ++--
 fs/xfs/xfs_log_recover.c  |  1 +
 fs/xfs/xfs_mount.c        |  2 +-
 fs/xfs/xfs_mount.h        |  2 ++
 fs/xfs/xfs_super.c        |  2 +-
 10 files changed, 88 insertions(+), 42 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9ecbe0f..585ebfa 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -62,6 +62,72 @@ xfs_prealloc_blocks(
 }
 
 /*
+ * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
+ * AGF buffer (PV 947395), we place constraints on the relationship among actual
+ * allocations for data blocks, freelist blocks, and potential file data bmap
+ * btree blocks. However, these restrictions may result in no actual space
+ * allocated for a delayed extent, for example, a data block in a certain AG is
+ * allocated but there is no additional block for the additional bmap btree
+ * block due to a split of the bmap btree of the file. The result of this may
+ * lead to an infinite loop when the file gets flushed to disk and all delayed
+ * extents need to be actually allocated. To get around this, we explicitly set
+ * aside a few blocks which will not be reserved in delayed allocation.
+ *
+ * The minimum number of needed freelist blocks is 4 fsbs _per AG_ when we are
+ * not using rmap btrees a potential split of file's bmap btree requires 1 fsb,
+ * so we set the number of set-aside blocks to 4 + 4*agcount when not using rmap
+ * btrees.
+ *
+ * When rmap btrees are active, we have to consider that using the last block in
+ * the AG can cause a full height rmap btree split and we need enough blocks on
+ * the AGFL to be able to handle this. That means we have, in addition to the
+ * above consideration, another (2 * mp->m_ag_levels) - 1 blocks required to be
+ * available to the free list.
+ */
+unsigned int
+xfs_alloc_set_aside(
+	struct xfs_mount *mp)
+{
+	unsigned int	blocks;
+
+	blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return blocks;
+	return blocks + (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+}
+
+/*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ *	- the AG superblock, AGF, AGI and AGFL
+ *	- the AGF (bno and cnt) and AGI btree root blocks, and optionally
+ *	  the AGI free inode and rmap btree root blocks.
+ *	- blocks on the AGFL according to xfs_alloc_set_aside() limits
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+unsigned int
+xfs_alloc_ag_max_usable(struct xfs_mount *mp)
+{
+	unsigned int	blocks;
+
+	blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
+	blocks += XFS_ALLOC_AGFL_RESERVE;
+	blocks += 3;			/* AGF, AGI btree root blocks */
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		blocks++;		/* finobt root block */
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		/* rmap root block + full tree split on full AG */
+		blocks += 1 + (2 * mp->m_ag_maxlevels) - 1;
+	}
+
+	return mp->m_sb.sb_agblocks - blocks;
+}
+
+/*
  * Lookup the record equal to [bno, len] in the btree given by cur.
  */
 STATIC int				/* error */
@@ -1914,6 +1980,11 @@ xfs_alloc_min_freelist(
 	/* space needed by-size freespace btree */
 	min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
 				       mp->m_ag_maxlevels);
+	/* space needed reverse mapping used space btree */
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		min_free += min_t(unsigned int,
+				  pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
+				  mp->m_ag_maxlevels);
 
 	return min_free;
 }
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 6d0f328..ea2868d 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -56,42 +56,6 @@ typedef unsigned int xfs_alloctype_t;
 #define	XFS_ALLOC_FLAG_FREEING	0x00000002  /* indicate caller is freeing extents*/
 
 /*
- * In order to avoid ENOSPC-related deadlock caused by
- * out-of-order locking of AGF buffer (PV 947395), we place
- * constraints on the relationship among actual allocations for
- * data blocks, freelist blocks, and potential file data bmap
- * btree blocks. However, these restrictions may result in no
- * actual space allocated for a delayed extent, for example, a data
- * block in a certain AG is allocated but there is no additional
- * block for the additional bmap btree block due to a split of the
- * bmap btree of the file. The result of this may lead to an
- * infinite loop in xfssyncd when the file gets flushed to disk and
- * all delayed extents need to be actually allocated. To get around
- * this, we explicitly set aside a few blocks which will not be
- * reserved in delayed allocation. Considering the minimum number of
- * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
- * btree requires 1 fsb, so we set the number of set-aside blocks
- * to 4 + 4*agcount.
- */
-#define XFS_ALLOC_SET_ASIDE(mp)  (4 + ((mp)->m_sb.sb_agcount * 4))
-
-/*
- * When deciding how much space to allocate out of an AG, we limit the
- * allocation maximum size to the size the AG. However, we cannot use all the
- * blocks in the AG - some are permanently used by metadata. These
- * blocks are generally:
- *	- the AG superblock, AGF, AGI and AGFL
- *	- the AGF (bno and cnt) and AGI btree root blocks
- *	- 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
- *
- * The AG headers are sector sized, so the amount of space they take up is
- * dependent on filesystem geometry. The others are all single blocks.
- */
-#define XFS_ALLOC_AG_MAX_USABLE(mp)	\
-	((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
-
-
-/*
  * Argument structure for xfs_alloc routines.
  * This is turned into a structure to avoid having 20 arguments passed
  * down several levels of the stack.
@@ -133,6 +97,11 @@ typedef struct xfs_alloc_arg {
 #define XFS_ALLOC_INITIAL_USER_DATA	(1 << 1)/* special case start of file */
 #define XFS_ALLOC_USERDATA_ZERO		(1 << 2)/* zero extent on allocation */
 
+/* freespace limit calculations */
+#define XFS_ALLOC_AGFL_RESERVE	4
+unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
+unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
+
 xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
 		struct xfs_perag *pag, xfs_extlen_t need);
 unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 59d4fd1..f8d33c5 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3724,7 +3724,7 @@ xfs_bmap_btalloc(
 	args.fsbno = ap->blkno;
 
 	/* Trim the allocation back to the maximum an AG can fit. */
-	args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
+	args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
 	args.firstblock = *ap->firstblock;
 	blen = 0;
 	if (nullfb) {
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index fadf750..62217e1 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -744,6 +744,8 @@ xfs_sb_mount_common(
 		mp->m_ialloc_min_blks = sbp->sb_spino_align;
 	else
 		mp->m_ialloc_min_blks = mp->m_ialloc_blks;
+	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+	mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
 }
 
 /*
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 272c3f8..4ff499a 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,7 +179,7 @@ xfs_ioc_trim(
 	 * matter as trimming blocks is an advisory interface.
 	 */
 	if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
-	    range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
+	    range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
 	    range.len < mp->m_sb.sb_blocksize)
 		return -EINVAL;
 
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 042f215..19c43bf 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -586,6 +586,7 @@ xfs_growfs_data_private(
 	} else
 		mp->m_maxicount = 0;
 	xfs_set_low_space_thresholds(mp);
+	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
 
 	/* update secondary superblocks. */
 	for (agno = 1; agno < nagcount; agno++) {
@@ -723,7 +724,7 @@ xfs_fs_counts(
 	cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
 	cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
 	cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
-							XFS_ALLOC_SET_ASIDE(mp);
+						mp->m_alloc_set_aside;
 
 	spin_lock(&mp->m_sb_lock);
 	cnt->freertx = mp->m_sb.sb_frextents;
@@ -796,7 +797,7 @@ retry:
 		__int64_t	free;
 
 		free = percpu_counter_sum(&mp->m_fdblocks) -
-							XFS_ALLOC_SET_ASIDE(mp);
+						mp->m_alloc_set_aside;
 		if (!free)
 			goto out; /* ENOSPC and fdblks_delta = 0 */
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7c9bc7c..4232f2d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -5028,6 +5028,7 @@ xlog_do_recover(
 		xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
 		return error;
 	}
+	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
 
 	xlog_recover_check_summary(log);
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index ef5de545..accd597 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1197,7 +1197,7 @@ xfs_mod_fdblocks(
 		batch = XFS_FDBLOCKS_BATCH;
 
 	__percpu_counter_add(&mp->m_fdblocks, delta, batch);
-	if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
+	if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
 				     XFS_FDBLOCKS_BATCH) >= 0) {
 		/* we had space! */
 		return 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b409a2a..945cd2e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -96,6 +96,8 @@ typedef struct xfs_mount {
 	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
 	uint			m_in_maxlevels;	/* max inobt btree levels. */
 	xfs_extlen_t		m_ag_prealloc_blocks; /* reserved ag blocks */
+	uint			m_alloc_set_aside; /* space we can't use */
+	uint			m_ag_max_usable; /* max space per AG */
 	struct radix_tree_root	m_perag_tree;	/* per-ag accounting info */
 	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */
 	struct mutex		m_growlock;	/* growfs mutex */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d760934..da4398d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1088,7 +1088,7 @@ xfs_fs_statfs(
 	statp->f_blocks = sbp->sb_dblocks - lsize;
 	spin_unlock(&mp->m_sb_lock);
 
-	statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+	statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
 	statp->f_bavail = statp->f_bfree;
 
 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 11/16] xfs: add rmap btree operations
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (9 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 10/16] xfs: rmap btree requires more reserved free space Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 12/16] xfs: add tracepoints for the rmap-mirrors-bmbt functions Dave Chinner
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Implement the generic btree operations needed to manipulate rmap
btree blocks. This is very similar to the per-ag freespace btree
implementation, and uses the AGFL for allocation and freeing of
blocks.

Adapt the rmap btree to store owner offsets within each rmap record,
and to handle the primary key being redefined as the tuple
[agblk, owner, offset].  The expansion of the primary key is crucial
to allowing multiple owners per extent.  Unfortunately, doing so adds
the requirement that all rmap records for file extents (metadata
always has one owner) correspond to some bmbt entry somewhere.

[darrick.wong@oracle.com: adapt the btree ops to deal with offsets]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_btree.h      |   1 +
 fs/xfs/libxfs/xfs_rmap.c       |  82 ++++++++++++++
 fs/xfs/libxfs/xfs_rmap_btree.c | 237 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_rmap_btree.h |   7 ++
 4 files changed, 327 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 07fa27e..6443c74 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -212,6 +212,7 @@ typedef struct xfs_btree_cur
 		xfs_alloc_rec_incore_t	a;
 		xfs_bmbt_irec_t		b;
 		xfs_inobt_rec_incore_t	i;
+		struct xfs_rmap_irec	r;
 	}		bc_rec;		/* current insert/search record value */
 	struct xfs_buf	*bc_bufs[XFS_BTREE_MAXLEVELS];	/* buf ptr per level */
 	int		bc_ptrs[XFS_BTREE_MAXLEVELS];	/* key/record # */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3e17294..f6fe742 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -36,6 +36,88 @@
 #include "xfs_error.h"
 #include "xfs_extent_busy.h"
 
+/*
+ * Lookup the first record less than or equal to [bno, len, owner, offset]
+ * in the btree given by cur.
+ */
+int
+xfs_rmap_lookup_le(
+	struct xfs_btree_cur	*cur,
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	uint64_t		owner,
+	uint64_t		offset,
+	int			*stat)
+{
+	cur->bc_rec.r.rm_startblock = bno;
+	cur->bc_rec.r.rm_blockcount = len;
+	cur->bc_rec.r.rm_owner = owner;
+	cur->bc_rec.r.rm_offset = offset;
+	return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Lookup the record exactly matching [bno, len, owner, offset]
+ * in the btree given by cur.
+ */
+int
+xfs_rmap_lookup_eq(
+	struct xfs_btree_cur	*cur,
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	uint64_t		owner,
+	uint64_t		offset,
+	int			*stat)
+{
+	cur->bc_rec.r.rm_startblock = bno;
+	cur->bc_rec.r.rm_blockcount = len;
+	cur->bc_rec.r.rm_owner = owner;
+	cur->bc_rec.r.rm_offset = offset;
+	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, owner, offset].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_rmap_update(
+	struct xfs_btree_cur	*cur,
+	struct xfs_rmap_irec	*irec)
+{
+	union xfs_btree_rec	rec;
+
+	rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
+	rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
+	rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
+	rec.rmap.rm_offset = cpu_to_be64(irec->rm_offset);
+	return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_rmap_get_rec(
+	struct xfs_btree_cur	*cur,
+	struct xfs_rmap_irec	*irec,
+	int			*stat)
+{
+	union xfs_btree_rec	*rec;
+	int			error;
+
+	error = xfs_btree_get_rec(cur, &rec, stat);
+	if (error || !*stat)
+		return error;
+
+	irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
+	irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
+	irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
+	irec->rm_offset = be64_to_cpu(rec->rmap.rm_offset);
+	return 0;
+}
+
 int
 xfs_rmap_free(
 	struct xfs_trans	*tp,
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 882e8e2..54dd118 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -34,6 +34,31 @@
 #include "xfs_error.h"
 #include "xfs_extent_busy.h"
 
+/*
+ * Reverse map btree.
+ *
+ * This is a per-ag tree used to track the owner(s) of a given extent. With
+ * reflink it is possible for there to be multiple owners, which is a departure
+ * from classic XFS. Owner records for data extents are inserted when the
+ * extent is mapped and removed when an extent is unmapped.  Owner records for
+ * all other block types (i.e. metadata) are inserted when an extent is
+ * allocated and removed when an extent is freed. There can only be one owner
+ * of a metadata extent, usually an inode or some other metadata structure like
+ * an AG btree.
+ *
+ * The rmap btree is part of the free space management, so blocks for the tree
+ * are sourced from the agfl. Hence we need transaction reservation support for
+ * this tree so that the freelist is always large enough. This also impacts on
+ * the minimum space we need to leave free in the AG.
+ *
+ * The tree is ordered by [ag block, owner, offset]. This is a large key size,
+ * but it is the only way to enforce unique keys when a block can be owned by
+ * multiple files at any offset. There's no need to order/search by extent
+ * size for online updating/management of the tree. It is intended that most
+ * reverse lookups will be to find the owner(s) of a particular block, or to
+ * try to recover tree and file data from corrupt primary metadata.
+ */
+
 static struct xfs_btree_cur *
 xfs_rmapbt_dup_cursor(
 	struct xfs_btree_cur	*cur)
@@ -42,6 +67,166 @@ xfs_rmapbt_dup_cursor(
 			cur->bc_private.a.agbp, cur->bc_private.a.agno);
 }
 
+STATIC void
+xfs_rmapbt_set_root(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	int			inc)
+{
+	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+	xfs_agnumber_t		seqno = be32_to_cpu(agf->agf_seqno);
+	int			btnum = cur->bc_btnum;
+	struct xfs_perag	*pag = xfs_perag_get(cur->bc_mp, seqno);
+
+	ASSERT(ptr->s != 0);
+
+	agf->agf_roots[btnum] = ptr->s;
+	be32_add_cpu(&agf->agf_levels[btnum], inc);
+	pag->pagf_levels[btnum] += inc;
+	xfs_perag_put(pag);
+
+	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_rmapbt_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*start,
+	union xfs_btree_ptr	*new,
+	int			*stat)
+{
+	int			error;
+	xfs_agblock_t		bno;
+
+	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+	/* Allocate the new block from the freelist. If we can't, give up.  */
+	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+				       &bno, 1);
+	if (error) {
+		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+		return error;
+	}
+
+	if (bno == NULLAGBLOCK) {
+		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+		*stat = 0;
+		return 0;
+	}
+
+	xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
+	xfs_trans_agbtree_delta(cur->bc_tp, 1);
+	new->s = cpu_to_be32(bno);
+
+	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+	*stat = 1;
+	return 0;
+}
+
+STATIC int
+xfs_rmapbt_free_block(
+	struct xfs_btree_cur	*cur,
+	struct xfs_buf		*bp)
+{
+	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+	xfs_agblock_t		bno;
+	int			error;
+
+	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+	if (error)
+		return error;
+
+	xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+			      XFS_EXTENT_BUSY_SKIP_DISCARD);
+	xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+	xfs_trans_binval(cur->bc_tp, bp);
+	return 0;
+}
+
+STATIC int
+xfs_rmapbt_get_minrecs(
+	struct xfs_btree_cur	*cur,
+	int			level)
+{
+	return cur->bc_mp->m_rmap_mnr[level != 0];
+}
+
+STATIC int
+xfs_rmapbt_get_maxrecs(
+	struct xfs_btree_cur	*cur,
+	int			level)
+{
+	return cur->bc_mp->m_rmap_mxr[level != 0];
+}
+
+STATIC void
+xfs_rmapbt_init_key_from_rec(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	key->rmap.rm_startblock = rec->rmap.rm_startblock;
+	key->rmap.rm_owner = rec->rmap.rm_owner;
+	key->rmap.rm_offset = rec->rmap.rm_offset;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_key(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	rec->rmap.rm_startblock = key->rmap.rm_startblock;
+	rec->rmap.rm_owner = key->rmap.rm_owner;
+	rec->rmap.rm_offset = key->rmap.rm_offset;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_cur(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*rec)
+{
+	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
+	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
+	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
+	rec->rmap.rm_offset = cpu_to_be64(cur->bc_rec.r.rm_offset);
+}
+
+STATIC void
+xfs_rmapbt_init_ptr_from_cur(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+	ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+	ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
+
+	ptr->s = agf->agf_roots[cur->bc_btnum];
+}
+
+STATIC __int64_t
+xfs_rmapbt_key_diff(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*key)
+{
+	struct xfs_rmap_irec	*rec = &cur->bc_rec.r;
+	struct xfs_rmap_key	*kp = &key->rmap;
+	__int64_t		d;
+
+	d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+	if (d)
+		return d;
+	d = (__int64_t)be64_to_cpu(kp->rm_owner) - rec->rm_owner;
+	if (d)
+		return d;
+	d = (__int64_t)be64_to_cpu(kp->rm_offset) - rec->rm_offset;
+	return d;
+}
+
 static bool
 xfs_rmapbt_verify(
 	struct xfs_buf		*bp)
@@ -116,12 +301,64 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
 	.verify_write		= xfs_rmapbt_write_verify,
 };
 
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_rmapbt_keys_inorder(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*k1,
+	union xfs_btree_key	*k2)
+{
+	if (be32_to_cpu(k1->rmap.rm_startblock) <
+	    be32_to_cpu(k2->rmap.rm_startblock))
+		return 1;
+	if (be64_to_cpu(k1->rmap.rm_owner) <
+	    be64_to_cpu(k2->rmap.rm_owner))
+		return 1;
+	if (be64_to_cpu(k1->rmap.rm_offset) <=
+	    be64_to_cpu(k2->rmap.rm_offset))
+		return 1;
+	return 0;
+}
+
+STATIC int
+xfs_rmapbt_recs_inorder(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*r1,
+	union xfs_btree_rec	*r2)
+{
+	if (be32_to_cpu(r1->rmap.rm_startblock) <
+	    be32_to_cpu(r2->rmap.rm_startblock))
+		return 1;
+	if (be64_to_cpu(r1->rmap.rm_offset) <
+	    be64_to_cpu(r2->rmap.rm_offset))
+		return 1;
+	if (be64_to_cpu(r1->rmap.rm_owner) <=
+	    be64_to_cpu(r2->rmap.rm_owner))
+		return 1;
+	return 0;
+}
+#endif	/* DEBUG */
+
 static const struct xfs_btree_ops xfs_rmapbt_ops = {
 	.rec_len		= sizeof(struct xfs_rmap_rec),
 	.key_len		= sizeof(struct xfs_rmap_key),
 
 	.dup_cursor		= xfs_rmapbt_dup_cursor,
+	.set_root		= xfs_rmapbt_set_root,
+	.alloc_block		= xfs_rmapbt_alloc_block,
+	.free_block		= xfs_rmapbt_free_block,
+	.get_minrecs		= xfs_rmapbt_get_minrecs,
+	.get_maxrecs		= xfs_rmapbt_get_maxrecs,
+	.init_key_from_rec	= xfs_rmapbt_init_key_from_rec,
+	.init_rec_from_key	= xfs_rmapbt_init_rec_from_key,
+	.init_rec_from_cur	= xfs_rmapbt_init_rec_from_cur,
+	.init_ptr_from_cur	= xfs_rmapbt_init_ptr_from_cur,
+	.key_diff		= xfs_rmapbt_key_diff,
 	.buf_ops		= &xfs_rmapbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+	.keys_inorder		= xfs_rmapbt_keys_inorder,
+	.recs_inorder		= xfs_rmapbt_recs_inorder,
+#endif
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 2e02362..a5c97f8 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -51,6 +51,13 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
 				xfs_agnumber_t agno);
 int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
 
+int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t	bno,
+		xfs_extlen_t len, uint64_t owner, uint64_t offset, int *stat);
+int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t	bno,
+		xfs_extlen_t len, uint64_t owner, uint64_t offset, int *stat);
+int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
+		int *stat);
+
 int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
 		   xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
 		   struct xfs_owner_info *oinfo);
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 12/16] xfs: add tracepoints for the rmap-mirrors-bmbt functions
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (10 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 11/16] xfs: add rmap btree operations Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 13/16] xfs: add an extent to the rmap btree Dave Chinner
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: "Darrick J. Wong" <darrick.wong@oracle.com>

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_trace.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 251 insertions(+)

diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7968e92..f5fb33f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1746,6 +1746,257 @@ DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent);
 DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent_done);
 DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent_error);
 
+/* rmap-mirrors-bmbt traces */
+DECLARE_EVENT_CLASS(xfs_rmap_bmbt3_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_ino_t ino,
+		 int whichfork,
+		 struct xfs_bmbt_irec *left,
+		 struct xfs_bmbt_irec *prev,
+		 struct xfs_bmbt_irec *right),
+	TP_ARGS(mp, agno, ino, whichfork, left, prev, right),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_ino_t, ino)
+		__field(int, whichfork)
+		__field(xfs_fileoff_t, l_loff)
+		__field(xfs_fsblock_t, l_poff)
+		__field(xfs_filblks_t, l_len)
+		__field(xfs_fileoff_t, p_loff)
+		__field(xfs_fsblock_t, p_poff)
+		__field(xfs_filblks_t, p_len)
+		__field(xfs_fileoff_t, r_loff)
+		__field(xfs_fsblock_t, r_poff)
+		__field(xfs_filblks_t, r_len)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->ino = ino;
+		__entry->whichfork = whichfork;
+		__entry->l_loff = left->br_startoff;
+		__entry->l_poff = left->br_startblock;
+		__entry->l_len = left->br_blockcount;
+		__entry->p_loff = prev->br_startoff;
+		__entry->p_poff = prev->br_startblock;
+		__entry->p_len = prev->br_blockcount;
+		__entry->r_loff = right->br_startoff;
+		__entry->r_poff = right->br_startblock;
+		__entry->r_len = right->br_blockcount;
+	),
+	TP_printk("dev %d:%d agno %u ino 0x%llx %s (%llu:%lld:%lld):(%llu:%lld:%lld):(%llu:%lld:%lld)",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->ino,
+		  __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
+		  __entry->l_poff,
+		  __entry->l_len,
+		  __entry->l_loff,
+		  __entry->p_poff,
+		  __entry->p_len,
+		  __entry->p_loff,
+		  __entry->r_poff,
+		  __entry->r_len,
+		  __entry->r_loff)
+);
+#define DEFINE_RMAP_BMBT3_EVENT(name) \
+DEFINE_EVENT(xfs_rmap_bmbt3_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_ino_t ino, \
+		 int whichfork, \
+		 struct xfs_bmbt_irec *left, \
+		 struct xfs_bmbt_irec *prev, \
+		 struct xfs_bmbt_irec *right), \
+	TP_ARGS(mp, agno, ino, whichfork, left, prev, right))
+
+DECLARE_EVENT_CLASS(xfs_rmap_bmbt2_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_ino_t ino,
+		 int whichfork,
+		 struct xfs_bmbt_irec *left,
+		 struct xfs_bmbt_irec *prev),
+	TP_ARGS(mp, agno, ino, whichfork, left, prev),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_ino_t, ino)
+		__field(int, whichfork)
+		__field(xfs_fileoff_t, l_loff)
+		__field(xfs_fsblock_t, l_poff)
+		__field(xfs_filblks_t, l_len)
+		__field(xfs_fileoff_t, p_loff)
+		__field(xfs_fsblock_t, p_poff)
+		__field(xfs_filblks_t, p_len)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->ino = ino;
+		__entry->whichfork = whichfork;
+		__entry->l_loff = left->br_startoff;
+		__entry->l_poff = left->br_startblock;
+		__entry->l_len = left->br_blockcount;
+		__entry->p_loff = prev->br_startoff;
+		__entry->p_poff = prev->br_startblock;
+		__entry->p_len = prev->br_blockcount;
+	),
+	TP_printk("dev %d:%d agno %u ino 0x%llx %s (%llu:%lld:%lld):(%llu:%lld:%lld)",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->ino,
+		  __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
+		  __entry->l_poff,
+		  __entry->l_len,
+		  __entry->l_loff,
+		  __entry->p_poff,
+		  __entry->p_len,
+		  __entry->p_loff)
+);
+#define DEFINE_RMAP_BMBT2_EVENT(name) \
+DEFINE_EVENT(xfs_rmap_bmbt2_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_ino_t ino, \
+		 int whichfork, \
+		 struct xfs_bmbt_irec *left, \
+		 struct xfs_bmbt_irec *prev), \
+	TP_ARGS(mp, agno, ino, whichfork, left, prev))
+
+DECLARE_EVENT_CLASS(xfs_rmap_bmbt1_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_ino_t ino,
+		 int whichfork,
+		 struct xfs_bmbt_irec *left),
+	TP_ARGS(mp, agno, ino, whichfork, left),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_ino_t, ino)
+		__field(int, whichfork)
+		__field(xfs_fileoff_t, l_loff)
+		__field(xfs_fsblock_t, l_poff)
+		__field(xfs_filblks_t, l_len)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->ino = ino;
+		__entry->whichfork = whichfork;
+		__entry->l_loff = left->br_startoff;
+		__entry->l_poff = left->br_startblock;
+		__entry->l_len = left->br_blockcount;
+	),
+	TP_printk("dev %d:%d agno %u ino 0x%llx %s (%llu:%lld:%lld)",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->ino,
+		  __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
+		  __entry->l_poff,
+		  __entry->l_len,
+		  __entry->l_loff)
+);
+#define DEFINE_RMAP_BMBT1_EVENT(name) \
+DEFINE_EVENT(xfs_rmap_bmbt1_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_ino_t ino, \
+		 int whichfork, \
+		 struct xfs_bmbt_irec *left), \
+	TP_ARGS(mp, agno, ino, whichfork, left))
+
+DECLARE_EVENT_CLASS(xfs_rmap_adjust_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_ino_t ino,
+		 int whichfork,
+		 struct xfs_bmbt_irec *left,
+		 long adj),
+	TP_ARGS(mp, agno, ino, whichfork, left, adj),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_ino_t, ino)
+		__field(int, whichfork)
+		__field(xfs_fileoff_t, l_loff)
+		__field(xfs_fsblock_t, l_poff)
+		__field(xfs_filblks_t, l_len)
+		__field(long, adj)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->ino = ino;
+		__entry->whichfork = whichfork;
+		__entry->l_loff = left->br_startoff;
+		__entry->l_poff = left->br_startblock;
+		__entry->l_len = left->br_blockcount;
+		__entry->adj = adj;
+	),
+	TP_printk("dev %d:%d agno %u ino 0x%llx %s (%llu:%lld:%lld) adj %ld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->ino,
+		  __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
+		  __entry->l_poff,
+		  __entry->l_len,
+		  __entry->l_loff,
+		  __entry->adj)
+);
+#define DEFINE_RMAP_ADJUST_EVENT(name) \
+DEFINE_EVENT(xfs_rmap_adjust_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_ino_t ino, \
+		 int whichfork, \
+		 struct xfs_bmbt_irec *left, \
+		 long adj), \
+	TP_ARGS(mp, agno, ino, whichfork, left, adj))
+
+DECLARE_EVENT_CLASS(xfs_rmapbt_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len,
+		 uint64_t owner, uint64_t offset),
+	TP_ARGS(mp, agno, agbno, len, owner, offset),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+		__field(uint64_t, owner)
+		__field(uint64_t, offset)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+		__entry->owner = owner;
+		__entry->offset = offset;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u, owner 0x%llx, offset %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len,
+		  __entry->owner,
+		  __entry->offset)
+);
+#define DEFINE_RMAPBT_EVENT(name) \
+DEFINE_EVENT(xfs_rmapbt_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_agblock_t agbno, xfs_extlen_t len, \
+		 uint64_t owner, uint64_t offset), \
+	TP_ARGS(mp, agno, agbno, len, owner, offset))
+
+DEFINE_RMAP_BMBT3_EVENT(xfs_rmap_combine);
+DEFINE_RMAP_BMBT2_EVENT(xfs_rmap_lcombine);
+DEFINE_RMAP_BMBT2_EVENT(xfs_rmap_rcombine);
+DEFINE_RMAP_BMBT1_EVENT(xfs_rmap_insert);
+DEFINE_RMAP_BMBT1_EVENT(xfs_rmap_delete);
+DEFINE_RMAP_ADJUST_EVENT(xfs_rmap_move);
+DEFINE_RMAP_ADJUST_EVENT(xfs_rmap_slide);
+DEFINE_RMAP_ADJUST_EVENT(xfs_rmap_resize);
+DEFINE_RMAPBT_EVENT(xfs_rmapbt_update);
+DEFINE_RMAPBT_EVENT(xfs_rmapbt_insert);
+DEFINE_RMAPBT_EVENT(xfs_rmapbt_delete);
+
 DECLARE_EVENT_CLASS(xfs_da_class,
 	TP_PROTO(struct xfs_da_args *args),
 	TP_ARGS(args),
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 13/16] xfs: add an extent to the rmap btree
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (11 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 12/16] xfs: add tracepoints for the rmap-mirrors-bmbt functions Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 14/16] xfs: remove an extent from " Dave Chinner
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Now all the btree, free space and transaction infrastructure is in
place, we can finally add the code to insert reverse mappings to the
rmap btree. Freeing will be done in a spearate patch, so just the
addition operation can be focussed on here.

[darrick.wong@oracle.com: handle owner offsets, fork and bmbt bits
 when adding rmaps]
[dchinner: remove remaining debug printk statements]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_rmap.c       | 159 ++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/libxfs/xfs_rmap_btree.h |   1 +
 2 files changed, 159 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index f6fe742..56627c1c 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -144,6 +144,36 @@ out_error:
 	return error;
 }
 
+/*
+ * A mergeable rmap should have the same owner, cannot be unwritten, and
+ * must be a bmbt rmap if we're asking about a bmbt rmap.
+ */
+static bool
+xfs_rmap_is_mergeable(
+	struct xfs_rmap_irec	*irec,
+	uint64_t		owner,
+	uint64_t		offset)
+{
+	if (irec->rm_owner == XFS_RMAP_OWN_NULL)
+		return false;
+	if (irec->rm_owner != owner)
+		return false;
+	if (XFS_RMAP_IS_UNWRITTEN(irec->rm_blockcount))
+		return false;
+	if (XFS_RMAP_IS_ATTR_FORK(offset) ^
+	    XFS_RMAP_IS_ATTR_FORK(irec->rm_offset))
+		return false;
+	if (XFS_RMAP_IS_BMBT(offset) ^ XFS_RMAP_IS_BMBT(irec->rm_offset))
+		return false;
+	return true;
+}
+
+/*
+ * When we allocate a new block, the first thing we do is add a reference to
+ * the extent in the rmap btree. This takes the form of a [agbno, length,
+ * owner, offset] record.  Flags are encoded in the high bits of the offset
+ * field.
+ */
 int
 xfs_rmap_alloc(
 	struct xfs_trans	*tp,
@@ -154,18 +184,145 @@ xfs_rmap_alloc(
 	struct xfs_owner_info	*oinfo)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_btree_cur	*cur;
+	struct xfs_rmap_irec	ltrec;
+	struct xfs_rmap_irec	gtrec;
+	int			have_gt;
 	int			error = 0;
+	int			i;
+	uint64_t		owner;
+	uint64_t		offset;
 
 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
 		return 0;
 
 	trace_xfs_rmap_alloc_extent(mp, agno, bno, len, oinfo);
-	if (1)
+	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+	xfs_owner_info_unpack(oinfo, &owner, &offset);
+
+	/*
+	 * For the initial lookup, look for and exact match or the left-adjacent
+	 * record for our insertion point. This will also give us the record for
+	 * start block contiguity tests.
+	 */
+	error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, &i);
+	if (error)
+		goto out_error;
+	XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+	error = xfs_rmap_get_rec(cur, &ltrec, &i);
+	if (error)
+		goto out_error;
+	XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+	if (!xfs_rmap_is_mergeable(&ltrec, owner, offset))
+		ltrec.rm_owner = XFS_RMAP_OWN_NULL;
+
+	XFS_WANT_CORRUPTED_GOTO(mp,
+		ltrec.rm_owner == XFS_RMAP_OWN_NULL ||
+		ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error);
+
+	/*
+	 * Increment the cursor to see if we have a right-adjacent record to our
+	 * insertion point. This will give us the record for end block
+	 * contiguity tests.
+	 */
+	error = xfs_btree_increment(cur, 0, &have_gt);
+	if (error)
 		goto out_error;
+	if (have_gt) {
+		error = xfs_rmap_get_rec(cur, &gtrec, &i);
+		if (error)
+			goto out_error;
+		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+		XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
+					out_error);
+	} else {
+		gtrec.rm_owner = XFS_RMAP_OWN_NULL;
+	}
+	if (!xfs_rmap_is_mergeable(&gtrec, owner, offset))
+		gtrec.rm_owner = XFS_RMAP_OWN_NULL;
+
+	/*
+	 * Note: cursor currently points one record to the right of ltrec, even
+	 * if there is no record in the tree to the right.
+	 */
+	if (ltrec.rm_owner == owner &&
+	    ltrec.rm_startblock + ltrec.rm_blockcount == bno) {
+		/*
+		 * left edge contiguous, merge into left record.
+		 *
+		 *       ltbno     ltlen
+		 * orig:   |ooooooooo|
+		 * adding:           |aaaaaaaaa|
+		 * result: |rrrrrrrrrrrrrrrrrrr|
+		 *                  bno       len
+		 */
+		ltrec.rm_blockcount += len;
+		if (gtrec.rm_owner == owner &&
+		    bno + len == gtrec.rm_startblock) {
+			//printk("add middle\n");
+			/*
+			 * right edge also contiguous, delete right record
+			 * and merge into left record.
+			 *
+			 *       ltbno     ltlen    gtbno     gtlen
+			 * orig:   |ooooooooo|         |ooooooooo|
+			 * adding:           |aaaaaaaaa|
+			 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
+			 */
+			ltrec.rm_blockcount += gtrec.rm_blockcount;
+			error = xfs_btree_delete(cur, &i);
+			if (error)
+				goto out_error;
+			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+		}
+
+		/* point the cursor back to the left record and update */
+		error = xfs_btree_decrement(cur, 0, &have_gt);
+		if (error)
+			goto out_error;
+		error = xfs_rmap_update(cur, &ltrec);
+		if (error)
+			goto out_error;
+	} else if (gtrec.rm_owner == owner &&
+		   bno + len == gtrec.rm_startblock) {
+		/*
+		 * right edge contiguous, merge into right record.
+		 *
+		 *                 gtbno     gtlen
+		 * Orig:             |ooooooooo|
+		 * adding: |aaaaaaaaa|
+		 * Result: |rrrrrrrrrrrrrrrrrrr|
+		 *        bno       len
+		 */
+		gtrec.rm_startblock = bno;
+		gtrec.rm_blockcount += len;
+		error = xfs_rmap_update(cur, &gtrec);
+		if (error)
+			goto out_error;
+	} else {
+		/*
+		 * no contiguous edge with identical owner, insert
+		 * new record at current cursor position.
+		 */
+		cur->bc_rec.r.rm_startblock = bno;
+		cur->bc_rec.r.rm_blockcount = len;
+		cur->bc_rec.r.rm_owner = owner;
+		cur->bc_rec.r.rm_offset = offset;
+		error = xfs_btree_insert(cur, &i);
+		if (error)
+			goto out_error;
+		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+	}
+
 	trace_xfs_rmap_alloc_extent_done(mp, agno, bno, len, oinfo);
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 	return 0;
 
 out_error:
 	trace_xfs_rmap_alloc_extent_error(mp, agno, bno, len, oinfo);
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index a5c97f8..0dfc151 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -58,6 +58,7 @@ int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t	bno,
 int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
 		int *stat);
 
+/* functions for updating the rmapbt for bmbt blocks and AG btree blocks */
 int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
 		   xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
 		   struct xfs_owner_info *oinfo);
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 14/16] xfs: remove an extent from the rmap btree
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (12 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 13/16] xfs: add an extent to the rmap btree Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 15/16] xfs: add rmap btree insert and delete helpers Dave Chinner
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: Dave Chinner <dchinner@redhat.com>

Now that we have records in the rmap btree, we need to remove them
when extents are freed. This needs to find the relevant record in
the btree and remove/trim/split it accordingly.

[darrick.wong@oracle.com: make rmap routines handle the enlarged keyspace]
[dchinner: remove remaining unused debug printks]

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_rmap.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 156 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 56627c1c..d2e01b2 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -118,6 +118,24 @@ xfs_rmap_get_rec(
 	return 0;
 }
 
+/*
+ * Find the extent in the rmap btree and remove it.
+ *
+ * The record we find should always be an exact match for the extent that we're
+ * looking for, since we insert them into the btree without modification.
+ *
+ * Special Case #1: when growing the filesystem, we "free" an extent when
+ * growing the last AG. This extent is new space and so it is not tracked as
+ * used space in the btree. The growfs code will pass in an owner of
+ * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
+ * extent. We verify that - the extent lookup result in a record that does not
+ * overlap.
+ *
+ * Special Case #2: EFIs do not record the owner of the extent, so when
+ * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
+ * btree to ignore the owner (i.e. wildcard match) so we don't trigger
+ * corruption checks during log recovery.
+ */
 int
 xfs_rmap_free(
 	struct xfs_trans	*tp,
@@ -128,19 +146,156 @@ xfs_rmap_free(
 	struct xfs_owner_info	*oinfo)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_btree_cur	*cur;
+	struct xfs_rmap_irec	ltrec;
+	uint64_t		ltoff;
 	int			error = 0;
+	int			i;
+	uint64_t		owner;
+	uint64_t		offset;
 
 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
 		return 0;
 
 	trace_xfs_rmap_free_extent(mp, agno, bno, len, oinfo);
-	if (1)
+	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+	xfs_owner_info_unpack(oinfo, &owner, &offset);
+
+	/*
+	 * We should always have a left record because there's a static record
+	 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
+	 * will not ever be removed from the tree.
+	 */
+	error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, &i);
+	if (error)
 		goto out_error;
+	XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+	error = xfs_rmap_get_rec(cur, &ltrec, &i);
+	if (error)
+		goto out_error;
+	XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+	ltoff = ltrec.rm_offset & ~XFS_RMAP_OFF_BMBT;
+
+	/*
+	 * For growfs, the incoming extent must be beyond the left record we
+	 * just found as it is new space and won't be used by anyone. This is
+	 * just a corruption check as we don't actually do anything with this
+	 * extent.
+	 */
+	if (owner == XFS_RMAP_OWN_NULL) {
+		XFS_WANT_CORRUPTED_GOTO(mp, bno > ltrec.rm_startblock +
+						ltrec.rm_blockcount, out_error);
+		goto out_done;
+	}
+
+	/* make sure the extent we found covers the entire freeing range. */
+	XFS_WANT_CORRUPTED_GOTO(mp, !XFS_RMAP_IS_UNWRITTEN(ltrec.rm_blockcount),
+		out_error);
+	XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
+		ltrec.rm_startblock + XFS_RMAP_LEN(ltrec.rm_blockcount) >=
+		bno + len, out_error);
+
+	/* make sure the owner matches what we expect to find in the tree */
+	XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
+				    XFS_RMAP_NON_INODE_OWNER(owner), out_error);
+
+	/* check the offset, if necessary */
+	if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
+		if (XFS_RMAP_IS_BMBT(offset)) {
+			XFS_WANT_CORRUPTED_GOTO(mp,
+					XFS_RMAP_IS_BMBT(ltrec.rm_offset),
+					out_error);
+		} else {
+			XFS_WANT_CORRUPTED_GOTO(mp,
+					ltrec.rm_offset <= offset, out_error);
+			XFS_WANT_CORRUPTED_GOTO(mp,
+					offset <= ltoff + ltrec.rm_blockcount,
+					out_error);
+		}
+	}
+
+	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
+		/* exact match, simply remove the record from rmap tree */
+		error = xfs_btree_delete(cur, &i);
+		if (error)
+			goto out_error;
+		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+	} else if (ltrec.rm_startblock == bno) {
+		/*
+		 * overlap left hand side of extent: move the start, trim the
+		 * length and update the current record.
+		 *
+		 *       ltbno                ltlen
+		 * Orig:    |oooooooooooooooooooo|
+		 * Freeing: |fffffffff|
+		 * Result:            |rrrrrrrrrr|
+		 *         bno       len
+		 */
+		ltrec.rm_startblock += len;
+		ltrec.rm_blockcount -= len;
+		error = xfs_rmap_update(cur, &ltrec);
+		if (error)
+			goto out_error;
+	} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
+		/*
+		 * overlap right hand side of extent: trim the length and update
+		 * the current record.
+		 *
+		 *       ltbno                ltlen
+		 * Orig:    |oooooooooooooooooooo|
+		 * Freeing:            |fffffffff|
+		 * Result:  |rrrrrrrrrr|
+		 *                    bno       len
+		 */
+		ltrec.rm_blockcount -= len;
+		error = xfs_rmap_update(cur, &ltrec);
+		if (error)
+			goto out_error;
+	} else {
+
+		/*
+		 * overlap middle of extent: trim the length of the existing
+		 * record to the length of the new left-extent size, increment
+		 * the insertion position so we can insert a new record
+		 * containing the remaining right-extent space.
+		 *
+		 *       ltbno                ltlen
+		 * Orig:    |oooooooooooooooooooo|
+		 * Freeing:       |fffffffff|
+		 * Result:  |rrrrr|         |rrrr|
+		 *               bno       len
+		 */
+		xfs_extlen_t	orig_len = ltrec.rm_blockcount;
+
+		ltrec.rm_blockcount = bno - ltrec.rm_startblock;
+		error = xfs_rmap_update(cur, &ltrec);
+		if (error)
+			goto out_error;
+
+		error = xfs_btree_increment(cur, 0, &i);
+		if (error)
+			goto out_error;
+
+		cur->bc_rec.r.rm_startblock = bno + len;
+		cur->bc_rec.r.rm_blockcount = orig_len - len -
+						     ltrec.rm_blockcount;
+		cur->bc_rec.r.rm_owner = ltrec.rm_owner;
+		cur->bc_rec.r.rm_offset = offset;
+		error = xfs_btree_insert(cur, &i);
+		if (error)
+			goto out_error;
+	}
+
+out_done:
 	trace_xfs_rmap_free_extent_done(mp, agno, bno, len, oinfo);
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 	return 0;
 
 out_error:
 	trace_xfs_rmap_free_extent_error(mp, agno, bno, len, oinfo);
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 	return error;
 }
 
@@ -262,7 +417,6 @@ xfs_rmap_alloc(
 		ltrec.rm_blockcount += len;
 		if (gtrec.rm_owner == owner &&
 		    bno + len == gtrec.rm_startblock) {
-			//printk("add middle\n");
 			/*
 			 * right edge also contiguous, delete right record
 			 * and merge into left record.
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 15/16] xfs: add rmap btree insert and delete helpers
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (13 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 14/16] xfs: remove an extent from " Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-03-08  4:16 ` [PATCH 16/16] xfs: piggyback rmapbt update intents in the bmap free structure Dave Chinner
  2016-03-10 14:14 ` [PATCH 0/16] xfs: first part of rmapbt functionality Christoph Hellwig
  16 siblings, 0 replies; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: "Darrick J. Wong" <darrick.wong@oracle.com>

Add a couple of helper functions to encapsulate rmap btree insert and
delete operations.  Add tracepoints to the update function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_rmap.c       | 62 ++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_rmap_btree.h |  2 ++
 2 files changed, 64 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index d2e01b2..479cb73 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -88,6 +88,10 @@ xfs_rmap_update(
 {
 	union xfs_btree_rec	rec;
 
+	trace_xfs_rmapbt_update(cur->bc_mp, cur->bc_private.a.agno,
+			irec->rm_startblock, irec->rm_blockcount,
+			irec->rm_owner, irec->rm_offset);
+
 	rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
 	rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
 	rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
@@ -95,6 +99,64 @@ xfs_rmap_update(
 	return xfs_btree_update(cur, &rec);
 }
 
+int
+xfs_rmapbt_insert(
+	struct xfs_btree_cur	*rcur,
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len,
+	uint64_t		owner,
+	uint64_t		offset)
+{
+	int			i;
+	int			error;
+
+	trace_xfs_rmapbt_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno,
+			len, owner, offset);
+
+	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, &i);
+	if (error)
+		goto done;
+	XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done);
+
+	rcur->bc_rec.r.rm_startblock = agbno;
+	rcur->bc_rec.r.rm_blockcount = len;
+	rcur->bc_rec.r.rm_owner = owner;
+	rcur->bc_rec.r.rm_offset = offset;
+	error = xfs_btree_insert(rcur, &i);
+	if (error)
+		goto done;
+	XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+done:
+	return error;
+}
+
+STATIC int
+xfs_rmapbt_delete(
+	struct xfs_btree_cur	*rcur,
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len,
+	uint64_t		owner,
+	uint64_t		offset)
+{
+	int			i;
+	int			error;
+
+	trace_xfs_rmapbt_delete(rcur->bc_mp, rcur->bc_private.a.agno, agbno,
+			len, owner, offset);
+
+	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, &i);
+	if (error)
+		goto done;
+	XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+
+	error = xfs_btree_delete(rcur, &i);
+	if (error)
+		goto done;
+	XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+done:
+	return error;
+}
+
 /*
  * Get the data from the pointed-to record.
  */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 0dfc151..d7c9722 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -55,6 +55,8 @@ int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t	bno,
 		xfs_extlen_t len, uint64_t owner, uint64_t offset, int *stat);
 int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t	bno,
 		xfs_extlen_t len, uint64_t owner, uint64_t offset, int *stat);
+int xfs_rmapbt_insert(struct xfs_btree_cur *rcur, xfs_agblock_t	agbno,
+		xfs_extlen_t len, uint64_t owner, uint64_t offset);
 int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
 		int *stat);
 
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 16/16] xfs: piggyback rmapbt update intents in the bmap free structure
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (14 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 15/16] xfs: add rmap btree insert and delete helpers Dave Chinner
@ 2016-03-08  4:16 ` Dave Chinner
  2016-04-11 23:23   ` Darrick J. Wong
  2016-03-10 14:14 ` [PATCH 0/16] xfs: first part of rmapbt functionality Christoph Hellwig
  16 siblings, 1 reply; 29+ messages in thread
From: Dave Chinner @ 2016-03-08  4:16 UTC (permalink / raw)
  To: xfs

From: "Darrick J. Wong" <darrick.wong@oracle.com>

Extend the xfs_bmap_free structure to track a list of rmapbt update
intents. Record the changes being made in the new rmapbt intent list
and add hooks to process the changes to xfs_bmap_finish().

Subsequent patches will implement the rmapbt updates recorded in
the intents, allowing us to re-order the rmapbt changes to avoid
deadlocks (e.g. AG ordering rules) and sanely log the changes
without blowing out transaction reservation sizes.

This patch is derived from a couple of original patches from
Darrick, split and merged by me, with a minor change to use
list_head for the linked list.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c       | 179 +++++++++++++++++++++++++++++++-
 fs/xfs/libxfs/xfs_bmap.h       |  16 ++-
 fs/xfs/libxfs/xfs_rmap.c       | 228 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_rmap_btree.h |  57 +++++++++++
 fs/xfs/xfs_bmap_util.c         |   6 ++
 5 files changed, 479 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f8d33c5..3ee3b8c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -45,6 +45,7 @@
 #include "xfs_symlink.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_filestream.h"
+#include "xfs_rmap_btree.h"
 
 
 kmem_zone_t		*xfs_bmap_free_item_zone;
@@ -648,6 +649,8 @@ xfs_bmap_cancel(
 	xfs_bmap_free_item_t	*free;	/* free list item */
 	xfs_bmap_free_item_t	*next;
 
+	xfs_rmap_cancel(&flist->xbf_rlist);
+
 	if (flist->xbf_count == 0)
 		return;
 	ASSERT(flist->xbf_first != NULL);
@@ -1869,6 +1872,10 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_combine(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &LEFT, &RIGHT, &PREV);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1901,6 +1908,10 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &LEFT, PREV.br_blockcount);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1932,6 +1943,10 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &RIGHT, -PREV.br_blockcount);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1961,6 +1976,10 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, new);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1996,6 +2015,10 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &LEFT, new->br_blockcount);
+		if (error)
+			goto done;
 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
 			startblockval(PREV.br_startblock));
 		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
@@ -2031,6 +2054,10 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, new);
+		if (error)
+			goto done;
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
@@ -2079,6 +2106,8 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &RIGHT, -new->br_blockcount);
 
 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
 			startblockval(PREV.br_startblock));
@@ -2115,6 +2144,10 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, new);
+		if (error)
+			goto done;
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
@@ -2184,6 +2217,10 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, new);
+		if (error)
+			goto done;
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
@@ -2425,6 +2462,10 @@ xfs_bmap_add_extent_unwritten_real(
 				RIGHT.br_blockcount, LEFT.br_state)))
 				goto done;
 		}
+		error = xfs_rmap_combine(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &LEFT, &RIGHT, &PREV);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -2462,6 +2503,10 @@ xfs_bmap_add_extent_unwritten_real(
 				LEFT.br_state)))
 				goto done;
 		}
+		error = xfs_rmap_lcombine(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &LEFT, &PREV);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -2497,6 +2542,10 @@ xfs_bmap_add_extent_unwritten_real(
 				newext)))
 				goto done;
 		}
+		error = xfs_rmap_rcombine(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &RIGHT, &PREV);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -2523,6 +2572,11 @@ xfs_bmap_add_extent_unwritten_real(
 				newext)))
 				goto done;
 		}
+
+		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, new, 0);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -2570,6 +2624,14 @@ xfs_bmap_add_extent_unwritten_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &PREV, new->br_blockcount);
+		if (error)
+			goto done;
+		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &LEFT, new->br_blockcount);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING:
@@ -2608,6 +2670,14 @@ xfs_bmap_add_extent_unwritten_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &PREV, new->br_blockcount);
+		if (error)
+			goto done;
+		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, new);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -2650,6 +2720,14 @@ xfs_bmap_add_extent_unwritten_real(
 				newext)))
 				goto done;
 		}
+		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &PREV, -new->br_blockcount);
+		if (error)
+			goto done;
+		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &RIGHT, -new->br_blockcount);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_RIGHT_FILLING:
@@ -2690,6 +2768,14 @@ xfs_bmap_add_extent_unwritten_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &PREV, -new->br_blockcount);
+		if (error)
+			goto done;
+		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, new);
+		if (error)
+			goto done;
 		break;
 
 	case 0:
@@ -2751,6 +2837,19 @@ xfs_bmap_add_extent_unwritten_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &PREV, new->br_startoff -
+				PREV.br_startoff - PREV.br_blockcount);
+		if (error)
+			goto done;
+		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, new);
+		if (error)
+			goto done;
+		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+				XFS_DATA_FORK, &r[1]);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -2954,6 +3053,7 @@ xfs_bmap_add_extent_hole_real(
 	int			rval=0;	/* return value (logging flags) */
 	int			state;	/* state bits, accessed thru macros */
 	struct xfs_mount	*mp;
+	struct xfs_bmbt_irec	prev;	/* fake previous extent entry */
 
 	mp = bma->ip->i_mount;
 	ifp = XFS_IFORK_PTR(bma->ip, whichfork);
@@ -3061,6 +3161,12 @@ xfs_bmap_add_extent_hole_real(
 			if (error)
 				goto done;
 		}
+		prev = *new;
+		prev.br_startblock = nullstartblock(0);
+		error = xfs_rmap_combine(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &left, &right, &prev);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_LEFT_CONTIG:
@@ -3093,6 +3199,10 @@ xfs_bmap_add_extent_hole_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &left, new->br_blockcount);
+		if (error)
+			goto done;
 		break;
 
 	case BMAP_RIGHT_CONTIG:
@@ -3127,6 +3237,10 @@ xfs_bmap_add_extent_hole_real(
 			if (error)
 				goto done;
 		}
+		error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, &right, -new->br_blockcount);
+		if (error)
+			goto done;
 		break;
 
 	case 0:
@@ -3155,6 +3269,10 @@ xfs_bmap_add_extent_hole_real(
 				goto done;
 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 		}
+		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+				whichfork, new);
+		if (error)
+			goto done;
 		break;
 	}
 
@@ -4289,7 +4407,6 @@ xfs_bmapi_delay(
 	return 0;
 }
 
-
 static int
 xfs_bmapi_allocate(
 	struct xfs_bmalloca	*bma)
@@ -4603,6 +4720,7 @@ xfs_bmapi_write(
 	bma.userdata = 0;
 	bma.flist = flist;
 	bma.firstblock = firstblock;
+	bma.rlist = &flist->xbf_rlist;
 
 	while (bno < end && n < *nmap) {
 		inhole = eof || bma.got.br_startoff > bno;
@@ -4861,6 +4979,10 @@ xfs_bmap_del_extent(
 		XFS_IFORK_NEXT_SET(ip, whichfork,
 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
 		flags |= XFS_ILOG_CORE;
+		error = xfs_rmap_delete(mp, &flist->xbf_rlist, ip->i_ino,
+				whichfork, &got);
+		if (error)
+			goto done;
 		if (!cur) {
 			flags |= xfs_ilog_fext(whichfork);
 			break;
@@ -4888,6 +5010,10 @@ xfs_bmap_del_extent(
 		}
 		xfs_bmbt_set_startblock(ep, del_endblock);
 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+				whichfork, &got, del->br_blockcount);
+		if (error)
+			goto done;
 		if (!cur) {
 			flags |= xfs_ilog_fext(whichfork);
 			break;
@@ -4914,6 +5040,10 @@ xfs_bmap_del_extent(
 			break;
 		}
 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+				whichfork, &got, -del->br_blockcount);
+		if (error)
+			goto done;
 		if (!cur) {
 			flags |= xfs_ilog_fext(whichfork);
 			break;
@@ -4939,6 +5069,15 @@ xfs_bmap_del_extent(
 		if (!delay) {
 			new.br_startblock = del_endblock;
 			flags |= XFS_ILOG_CORE;
+			error = xfs_rmap_resize(mp, &flist->xbf_rlist,
+					ip->i_ino, whichfork, &got,
+					temp - got.br_blockcount);
+			if (error)
+				goto done;
+			error = xfs_rmap_insert(mp, &flist->xbf_rlist,
+					ip->i_ino, whichfork, &new);
+			if (error)
+				goto done;
 			if (cur) {
 				if ((error = xfs_bmbt_update(cur,
 						got.br_startoff,
@@ -5175,6 +5314,7 @@ xfs_bunmapi(
 			got.br_startoff + got.br_blockcount - 1);
 		if (bno < start)
 			break;
+
 		/*
 		 * Then deal with the (possibly delayed) allocated space
 		 * we found.
@@ -5477,7 +5617,8 @@ xfs_bmse_merge(
 	struct xfs_bmbt_rec_host	*gotp,		/* extent to shift */
 	struct xfs_bmbt_rec_host	*leftp,		/* preceding extent */
 	struct xfs_btree_cur		*cur,
-	int				*logflags)	/* output */
+	int				*logflags,	/* output */
+	struct xfs_rmap_list		*rlist)		/* rmap intent list */
 {
 	struct xfs_bmbt_irec		got;
 	struct xfs_bmbt_irec		left;
@@ -5508,6 +5649,13 @@ xfs_bmse_merge(
 	XFS_IFORK_NEXT_SET(ip, whichfork,
 			   XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
 	*logflags |= XFS_ILOG_CORE;
+	error = xfs_rmap_resize(mp, rlist, ip->i_ino, whichfork, &left,
+			blockcount - left.br_blockcount);
+	if (error)
+		return error;
+	error = xfs_rmap_delete(mp, rlist, ip->i_ino, whichfork, &got);
+	if (error)
+		return error;
 	if (!cur) {
 		*logflags |= XFS_ILOG_DEXT;
 		return 0;
@@ -5550,7 +5698,8 @@ xfs_bmse_shift_one(
 	struct xfs_bmbt_rec_host	*gotp,
 	struct xfs_btree_cur		*cur,
 	int				*logflags,
-	enum shift_direction		direction)
+	enum shift_direction		direction,
+	struct xfs_rmap_list		*rlist)
 {
 	struct xfs_ifork		*ifp;
 	struct xfs_mount		*mp;
@@ -5600,7 +5749,7 @@ xfs_bmse_shift_one(
 				       offset_shift_fsb)) {
 			return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
 					      *current_ext, gotp, adj_irecp,
-					      cur, logflags);
+					      cur, logflags, rlist);
 		}
 	} else {
 		startoff = got.br_startoff + offset_shift_fsb;
@@ -5637,6 +5786,10 @@ update_current_ext:
 		(*current_ext)--;
 	xfs_bmbt_set_startoff(gotp, startoff);
 	*logflags |= XFS_ILOG_CORE;
+	error = xfs_rmap_slide(mp, rlist, ip->i_ino, whichfork,
+			&got, startoff - got.br_startoff);
+	if (error)
+		return error;
 	if (!cur) {
 		*logflags |= XFS_ILOG_DEXT;
 		return 0;
@@ -5776,9 +5929,11 @@ xfs_bmap_shift_extents(
 	}
 
 	while (nexts++ < num_exts) {
+		xfs_bmbt_get_all(gotp, &got);
+
 		error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
 					   &current_ext, gotp, cur, &logflags,
-					   direction);
+					   direction, &flist->xbf_rlist);
 		if (error)
 			goto del_cursor;
 		/*
@@ -5831,6 +5986,7 @@ xfs_bmap_split_extent_at(
 	int				whichfork = XFS_DATA_FORK;
 	struct xfs_btree_cur		*cur = NULL;
 	struct xfs_bmbt_rec_host	*gotp;
+	struct xfs_bmbt_irec		rgot;
 	struct xfs_bmbt_irec		got;
 	struct xfs_bmbt_irec		new; /* split extent */
 	struct xfs_mount		*mp = ip->i_mount;
@@ -5840,6 +5996,7 @@ xfs_bmap_split_extent_at(
 	int				error = 0;
 	int				logflags = 0;
 	int				i = 0;
+	long				adj;
 
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5879,6 +6036,7 @@ xfs_bmap_split_extent_at(
 	if (got.br_startoff >= split_fsb)
 		return 0;
 
+	rgot = got;
 	gotblkcnt = split_fsb - got.br_startoff;
 	new.br_startoff = split_fsb;
 	new.br_startblock = got.br_startblock + gotblkcnt;
@@ -5934,6 +6092,17 @@ xfs_bmap_split_extent_at(
 		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
 	}
 
+	/* update rmapbt */
+	adj = -(long)rgot.br_blockcount + gotblkcnt;
+	error = xfs_rmap_resize(mp, &free_list->xbf_rlist, ip->i_ino,
+			whichfork, &rgot, adj);
+	if (error)
+		goto del_cursor;
+	error = xfs_rmap_insert(mp, &free_list->xbf_rlist, ip->i_ino,
+			whichfork, &new);
+	if (error)
+		goto del_cursor;
+
 	/*
 	 * Convert to a btree if necessary.
 	 */
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 06dbe08..38cd9b5 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -56,6 +56,7 @@ struct xfs_bmalloca {
 	bool			conv;	/* overwriting unwritten extents */
 	char			userdata;/* userdata mask */
 	int			flags;
+	struct xfs_rmap_list	*rlist;
 };
 
 /*
@@ -70,6 +71,11 @@ typedef struct xfs_bmap_free_item
 	struct xfs_bmap_free_item *xbfi_next;	/* link to next entry */
 } xfs_bmap_free_item_t;
 
+struct xfs_rmap_list {
+	struct list_head	rl_list;
+	int			rl_count;
+};
+
 /*
  * Header for free extent list.
  *
@@ -89,6 +95,7 @@ typedef	struct xfs_bmap_free
 	xfs_bmap_free_item_t	*xbf_first;	/* list of to-be-free extents */
 	int			xbf_count;	/* count of items on list */
 	int			xbf_low;	/* alloc in low mode */
+	struct xfs_rmap_list	xbf_rlist;	/* rmap intent list */
 } xfs_bmap_free_t;
 
 #define	XFS_BMAP_MAX_NMAP	4
@@ -142,8 +149,13 @@ static inline int xfs_bmapi_aflag(int w)
 
 static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
 {
-	((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
-		(flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK);
+	flp->xbf_first = NULL;
+	flp->xbf_count = 0;
+	flp->xbf_low = 0;
+	*fbp = NULLFSBLOCK;
+
+	INIT_LIST_HEAD(&flp->xbf_rlist.rl_list);
+	flp->xbf_rlist.rl_count = 0;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 479cb73..a7130c5 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -35,6 +35,7 @@
 #include "xfs_trace.h"
 #include "xfs_error.h"
 #include "xfs_extent_busy.h"
+#include "xfs_bmap.h"
 
 /*
  * Lookup the first record less than or equal to [bno, len, owner, offset]
@@ -542,3 +543,230 @@ out_error:
 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 	return error;
 }
+
+
+/*
+ * Free up any items left in the list.
+ */
+void
+xfs_rmap_cancel(
+	struct xfs_rmap_list	*rlist)	/* list of bmap_free_items */
+{
+	if (list_empty(&rlist->rl_list))
+		return;
+	while (!list_empty(&rlist->rl_list)) {
+		struct xfs_rmap_intent *free;
+
+		free = list_first_entry(&rlist->rl_list, struct xfs_rmap_intent,
+					ri_list);
+		list_del(&free->ri_list);
+		kmem_free(free);
+	}
+	rlist->rl_count = 0;
+}
+
+/*
+ * Free up any items left in the intent list.
+ */
+int
+xfs_rmap_finish(
+	struct xfs_mount	*mp,
+	struct xfs_trans	**tpp,
+	struct xfs_inode	*ip,
+	struct xfs_rmap_list	*rlist)
+{
+	/* Not yet implemented, just cancel until implemented */
+	xfs_rmap_cancel(rlist);
+	return 0;
+}
+
+/*
+ * Record a rmap intent; the list is kept sorted first by AG and then by
+ * increasing age.
+ */
+static int
+__xfs_rmap_add(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	struct xfs_rmap_intent	*ri)
+{
+	struct xfs_rmap_intent	*new;
+
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return 0;
+
+	new = kmem_zalloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
+	*new = *ri;
+	INIT_LIST_HEAD(&new->ri_list);
+
+	/* XXX: ordering will be needed */
+	list_add(&new->ri_list, &rlist->rl_list);
+	rlist->rl_count++;
+	return 0;
+}
+
+/* Combine two adjacent rmap extents */
+int
+xfs_rmap_combine(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*left,
+	struct xfs_bmbt_irec	*right,
+	struct xfs_bmbt_irec	*prev)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_COMBINE;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *prev;
+	ri.ri_u.a.left = *left;
+	ri.ri_u.a.right = *right;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Extend a left rmap extent */
+int
+xfs_rmap_lcombine(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*LEFT,
+	struct xfs_bmbt_irec	*PREV)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_LCOMBINE;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *PREV;
+	ri.ri_u.a.left = *LEFT;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Extend a right rmap extent */
+int
+xfs_rmap_rcombine(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*RIGHT,
+	struct xfs_bmbt_irec	*PREV)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_RCOMBINE;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *PREV;
+	ri.ri_u.a.right = *RIGHT;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Insert a rmap extent */
+int
+xfs_rmap_insert(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*new)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_INSERT;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *new;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Delete a rmap extent */
+int
+xfs_rmap_delete(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*new)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_DELETE;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *new;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Change the start of an rmap */
+int
+xfs_rmap_move(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*PREV,
+	long			start_adj)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_MOVE;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *PREV;
+	ri.ri_u.b.adj = start_adj;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Change the logical offset of an rmap */
+int
+xfs_rmap_slide(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*PREV,
+	long			start_adj)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_SLIDE;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *PREV;
+	ri.ri_u.b.adj = start_adj;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Change the size of an rmap */
+int
+xfs_rmap_resize(
+	struct xfs_mount	*mp,
+	struct xfs_rmap_list	*rlist,
+	xfs_ino_t		ino,
+	int			whichfork,
+	struct xfs_bmbt_irec	*PREV,
+	long			size_adj)
+{
+	struct xfs_rmap_intent	ri;
+
+	ri.ri_type = XFS_RMAP_RESIZE;
+	ri.ri_ino = ino;
+	ri.ri_whichfork = whichfork;
+	ri.ri_prev = *PREV;
+	ri.ri_u.b.adj = size_adj;
+
+	return __xfs_rmap_add(mp, rlist, &ri);
+}
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index d7c9722..599fa3a 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -21,6 +21,7 @@
 struct xfs_buf;
 struct xfs_btree_cur;
 struct xfs_mount;
+struct xfs_rmap_list;
 
 /* rmaps only exist on crc enabled filesystems */
 #define XFS_RMAP_BLOCK_LEN	XFS_BTREE_SBLOCK_CRC_LEN
@@ -68,4 +69,60 @@ int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
 		  xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
 		  struct xfs_owner_info *oinfo);
 
+/* functions for updating the rmapbt based on bmbt map/unmap operations */
+int xfs_rmap_combine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *LEFT,
+		struct xfs_bmbt_irec *RIGHT, struct xfs_bmbt_irec *PREV);
+int xfs_rmap_lcombine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *LEFT,
+		struct xfs_bmbt_irec *PREV);
+int xfs_rmap_rcombine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *RIGHT,
+		struct xfs_bmbt_irec *PREV);
+int xfs_rmap_insert(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *rec);
+int xfs_rmap_delete(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *rec);
+int xfs_rmap_move(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
+		long start_adj);
+int xfs_rmap_slide(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
+		long start_adj);
+int xfs_rmap_resize(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
+		long size_adj);
+
+enum xfs_rmap_intent_type {
+	XFS_RMAP_COMBINE,
+	XFS_RMAP_LCOMBINE,
+	XFS_RMAP_RCOMBINE,
+	XFS_RMAP_INSERT,
+	XFS_RMAP_DELETE,
+	XFS_RMAP_MOVE,
+	XFS_RMAP_SLIDE,
+	XFS_RMAP_RESIZE,
+};
+
+struct xfs_rmap_intent {
+	struct list_head			ri_list;
+	enum xfs_rmap_intent_type		ri_type;
+	xfs_ino_t				ri_ino;
+	int					ri_whichfork;
+	struct xfs_bmbt_irec			ri_prev;
+	union {
+		struct {
+			struct xfs_bmbt_irec	left;
+			struct xfs_bmbt_irec	right;
+		} a;
+		struct {
+			long			adj;
+		} b;
+	} ri_u;
+};
+
+void	xfs_rmap_cancel(struct xfs_rmap_list *rlist);
+int	xfs_rmap_finish(struct xfs_mount *mp, struct xfs_trans **tpp,
+			struct xfs_inode *ip, struct xfs_rmap_list *rlist);
+
 #endif	/* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 834639d..a9cf94e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -40,6 +40,7 @@
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_log.h"
+#include "xfs_rmap_btree.h"
 
 /* Kernel only BMAP related definitions and functions */
 
@@ -109,6 +110,11 @@ xfs_bmap_finish(
 	struct xfs_bmap_free_item	*next;	/* next item on free list */
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+	error = xfs_rmap_finish((*tp)->t_mountp, tp, ip, &flist->xbf_rlist);
+	if (error)
+		return error;
+
 	if (flist->xbf_count == 0)
 		return 0;
 
-- 
2.7.0

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 04/16] libxfs: rearrange xfs_bmap_add_free parameters
  2016-03-08  4:16 ` [PATCH 04/16] libxfs: rearrange xfs_bmap_add_free parameters Dave Chinner
@ 2016-03-08 17:18   ` Christoph Hellwig
  0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-03-08 17:18 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/16] xfs: first part of rmapbt functionality
  2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
                   ` (15 preceding siblings ...)
  2016-03-08  4:16 ` [PATCH 16/16] xfs: piggyback rmapbt update intents in the bmap free structure Dave Chinner
@ 2016-03-10 14:14 ` Christoph Hellwig
  2016-03-10 16:57   ` Darrick J. Wong
  2016-03-10 21:44   ` Dave Chinner
  16 siblings, 2 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-03-10 14:14 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Tue, Mar 08, 2016 at 03:16:02PM +1100, Dave Chinner wrote:
> This isn't all of the rmap functionality. It's patches up to the
> point where I've come across the first piece that needs to be
> reworked (the rmap intent execution code), so there's no point
> holding these back until I've sorted that out. This builds on top of
> for-next and the patch set I posted yesterday.
> 
> Darrick, I've changed the authorship of the patches to reflect
> the original series this has come from - can you check to see if
> there's anything I got wrong when I did that?

I'll come some minor bits on the actual patches, but I'd like to
understand a few fundamental things first:

For one Darrick has introduced a new rmapxbt btree recently, which
allows using a rmap on reflink enabled file systems.  In his tree
we thus have two different implementation of a reverse mapping
btree.  Is there any good reason to keep it this way?  For one
reflinks are a compelling feature that I doubt people want to
disable in the long run, so most filesystem will be using rmapxbt.
I also don't think having these two implementations is good for the
testing matrix in the long run.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 03/16] xfs: rmap btree add more reserved blocks
  2016-03-08  4:16 ` [PATCH 03/16] xfs: rmap btree add more reserved blocks Dave Chinner
@ 2016-03-10 14:16   ` Christoph Hellwig
  2016-03-10 14:22   ` Christoph Hellwig
  1 sibling, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-03-10 14:16 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

> +	mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);

We only use the m_ag_prealloc_blocks member in xfs_growfs_data_private.
I think a local variable would do it as well, as growfs isn't exactly
a frequent fast path operation.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 05/16] xfs: add owner field to extent allocation and freeing
  2016-03-08  4:16 ` [PATCH 05/16] xfs: add owner field to extent allocation and freeing Dave Chinner
@ 2016-03-10 14:19   ` Christoph Hellwig
  2016-03-28 22:05     ` Darrick J. Wong
  0 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2016-03-10 14:19 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Tue, Mar 08, 2016 at 03:16:07PM +1100, Dave Chinner wrote:
> From: "Darrick J. Wong" <darrick.wong@oracle.com>
> 
> For the rmap btree to work, we have to fed the extent owner

s/fed/feed/

> +/*
> + * Ownership info for an extent.  This is used to create reverse-mapping
> + * entries.
> + */
> +#define XFS_RMAP_INO_ATTR_FORK	(1)
> +#define XFS_RMAP_BMBT_BLOCK	(2)

These are OR-able flags - it might make sense to define them as:

#define XFS_RMAP_INO_ATTR_FORK	(1 << 0)
#define XFS_RMAP_BMBT_BLOCK	(1 << 1)

to make this more obvious.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 03/16] xfs: rmap btree add more reserved blocks
  2016-03-08  4:16 ` [PATCH 03/16] xfs: rmap btree add more reserved blocks Dave Chinner
  2016-03-10 14:16   ` Christoph Hellwig
@ 2016-03-10 14:22   ` Christoph Hellwig
  2016-03-10 22:09     ` Dave Chinner
  1 sibling, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2016-03-10 14:22 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

Sorry for the second reply to the same mail - I expect this defintion to
be in patch 7, where it logically belongs..

> +#define	XFS_RMAP_BLOCK(mp) \
>  	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
>  	 XFS_FIBT_BLOCK(mp) + 1 : \
>  	 XFS_IBT_BLOCK(mp) + 1)

Is there any good reason for the variable offset for the rmap block.
Yes, it saves one otherwise unused block per AG, but fixed offsets
for metadata make a format much easier to understand.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/16] xfs: first part of rmapbt functionality
  2016-03-10 14:14 ` [PATCH 0/16] xfs: first part of rmapbt functionality Christoph Hellwig
@ 2016-03-10 16:57   ` Darrick J. Wong
  2016-03-10 21:44   ` Dave Chinner
  1 sibling, 0 replies; 29+ messages in thread
From: Darrick J. Wong @ 2016-03-10 16:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Thu, Mar 10, 2016 at 06:14:34AM -0800, Christoph Hellwig wrote:
> On Tue, Mar 08, 2016 at 03:16:02PM +1100, Dave Chinner wrote:
> > This isn't all of the rmap functionality. It's patches up to the
> > point where I've come across the first piece that needs to be
> > reworked (the rmap intent execution code), so there's no point
> > holding these back until I've sorted that out. This builds on top of
> > for-next and the patch set I posted yesterday.
> > 
> > Darrick, I've changed the authorship of the patches to reflect
> > the original series this has come from - can you check to see if
> > there's anything I got wrong when I did that?
> 
> I'll come some minor bits on the actual patches, but I'd like to
> understand a few fundamental things first:
> 
> For one Darrick has introduced a new rmapxbt btree recently, which
> allows using a rmap on reflink enabled file systems.  In his tree
> we thus have two different implementation of a reverse mapping
> btree.  Is there any good reason to keep it this way?  For one
> reflinks are a compelling feature that I doubt people want to
> disable in the long run, so most filesystem will be using rmapxbt.
> I also don't think having these two implementations is good for the
> testing matrix in the long run.

The only compelling reason for the split rmapbt/rmapxbt is to increase the
fanout factor for (!reflink && rmap) by a factor of 5.  If we assume a 4K
block size, that works out to the rmapbt occupying about 0.023% more space.
(It's half a percent for a 1k block size, but I assume that's not a common
case.)

Functionality-wise, there's no reason why we can't just run the rmapxbt
even if reflink is disabled.  If our notion is to introduce both features
at the same time then Christoph is probably right that we don't need to
have both tree types.

--D

> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/16] xfs: first part of rmapbt functionality
  2016-03-10 14:14 ` [PATCH 0/16] xfs: first part of rmapbt functionality Christoph Hellwig
  2016-03-10 16:57   ` Darrick J. Wong
@ 2016-03-10 21:44   ` Dave Chinner
  2016-03-25 23:00     ` Darrick J. Wong
  1 sibling, 1 reply; 29+ messages in thread
From: Dave Chinner @ 2016-03-10 21:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Thu, Mar 10, 2016 at 06:14:34AM -0800, Christoph Hellwig wrote:
> On Tue, Mar 08, 2016 at 03:16:02PM +1100, Dave Chinner wrote:
> > This isn't all of the rmap functionality. It's patches up to the
> > point where I've come across the first piece that needs to be
> > reworked (the rmap intent execution code), so there's no point
> > holding these back until I've sorted that out. This builds on top of
> > for-next and the patch set I posted yesterday.
> > 
> > Darrick, I've changed the authorship of the patches to reflect
> > the original series this has come from - can you check to see if
> > there's anything I got wrong when I did that?
> 
> I'll come some minor bits on the actual patches, but I'd like to
> understand a few fundamental things first:
> 
> For one Darrick has introduced a new rmapxbt btree recently, which
> allows using a rmap on reflink enabled file systems.  In his tree
> we thus have two different implementation of a reverse mapping
> btree.  Is there any good reason to keep it this way?  For one
> reflinks are a compelling feature that I doubt people want to
> disable in the long run, so most filesystem will be using rmapxbt.
> I also don't think having these two implementations is good for the
> testing matrix in the long run.

I haven't got as far as the rmapxbt code yet - it's currently at the
end of the entire series, and I'm trying to sort out problems in
infrastructure right now (i.e. rmapbt modifications are atomic and
crash safe w.r.t. bmapbt changes and EFI processing).

I'm planning on re-ordering the rmapxbt and interval query tree
stuff to before the reflink code is included, but I haven't got
hatfar yet so I haven't looked at the code yet. It's slow going, and
right now I don't think I'm going to have even a complete rmapbt
series done in time for the merge 4.6 merge window, let alone all
the extra stuff Darrick has done.

So with only a couple of days left before the merge window opens, I
think this all needs to slip to the next merge window while we sort
out what disk format we are going to use and rework the series to
introduce only that format.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 03/16] xfs: rmap btree add more reserved blocks
  2016-03-10 14:22   ` Christoph Hellwig
@ 2016-03-10 22:09     ` Dave Chinner
  2016-03-11  7:32       ` Christoph Hellwig
  0 siblings, 1 reply; 29+ messages in thread
From: Dave Chinner @ 2016-03-10 22:09 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Thu, Mar 10, 2016 at 06:22:07AM -0800, Christoph Hellwig wrote:
> Sorry for the second reply to the same mail - I expect this defintion to
> be in patch 7, where it logically belongs..
> 
> > +#define	XFS_RMAP_BLOCK(mp) \
> >  	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
> >  	 XFS_FIBT_BLOCK(mp) + 1 : \
> >  	 XFS_IBT_BLOCK(mp) + 1)
> 
> Is there any good reason for the variable offset for the rmap block.
> Yes, it saves one otherwise unused block per AG, but fixed offsets
> for metadata make a format much easier to understand.

The root btree blocks are not a fixed location. The moment the tree
splits to the root we get a new root block and the old root is now
at a lower level of the tree. IOWs, it doesn't matter if there is a
hole at the time of growfs/mkfs laying out the initial tree roots
because they are going to move around anyway.

Also, it depends on the sector vs block size as to where these are
initially located, too. A disk with 512 byte sectors and 4k block
size results in the root btree blocks being located from fsb 2-7.
On a 4k sector/4k block size fs has the root btree blocks located
from fsb 4-9.

Hence I really don't think it matters if the initial location
changes depenedent on features and geometry as you have to look it
up with xfs_db anyway to find it even on a pristine new
filesystem...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 03/16] xfs: rmap btree add more reserved blocks
  2016-03-10 22:09     ` Dave Chinner
@ 2016-03-11  7:32       ` Christoph Hellwig
  0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2016-03-11  7:32 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs

On Fri, Mar 11, 2016 at 09:09:45AM +1100, Dave Chinner wrote:
> The root btree blocks are not a fixed location. The moment the tree
> splits to the root we get a new root block and the old root is now
> at a lower level of the tree. IOWs, it doesn't matter if there is a
> hole at the time of growfs/mkfs laying out the initial tree roots
> because they are going to move around anyway.

Yeah, once we update the root it moves anyway.  Objection retracted..

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/16] xfs: first part of rmapbt functionality
  2016-03-10 21:44   ` Dave Chinner
@ 2016-03-25 23:00     ` Darrick J. Wong
  0 siblings, 0 replies; 29+ messages in thread
From: Darrick J. Wong @ 2016-03-25 23:00 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs

On Fri, Mar 11, 2016 at 08:44:32AM +1100, Dave Chinner wrote:
> On Thu, Mar 10, 2016 at 06:14:34AM -0800, Christoph Hellwig wrote:
> > On Tue, Mar 08, 2016 at 03:16:02PM +1100, Dave Chinner wrote:
> > > This isn't all of the rmap functionality. It's patches up to the
> > > point where I've come across the first piece that needs to be
> > > reworked (the rmap intent execution code), so there's no point
> > > holding these back until I've sorted that out. This builds on top of
> > > for-next and the patch set I posted yesterday.
> > > 
> > > Darrick, I've changed the authorship of the patches to reflect
> > > the original series this has come from - can you check to see if
> > > there's anything I got wrong when I did that?

Looks ok to me.

> > I'll come some minor bits on the actual patches, but I'd like to
> > understand a few fundamental things first:
> > 
> > For one Darrick has introduced a new rmapxbt btree recently, which
> > allows using a rmap on reflink enabled file systems.  In his tree
> > we thus have two different implementation of a reverse mapping
> > btree.  Is there any good reason to keep it this way?  For one
> > reflinks are a compelling feature that I doubt people want to
> > disable in the long run, so most filesystem will be using rmapxbt.
> > I also don't think having these two implementations is good for the
> > testing matrix in the long run.
> 
> I haven't got as far as the rmapxbt code yet - it's currently at the
> end of the entire series, and I'm trying to sort out problems in
> infrastructure right now (i.e. rmapbt modifications are atomic and
> crash safe w.r.t. bmapbt changes and EFI processing).
> 
> I'm planning on re-ordering the rmapxbt and interval query tree
> stuff to before the reflink code is included, but I haven't got
> hatfar yet so I haven't looked at the code yet. It's slow going, and
> right now I don't think I'm going to have even a complete rmapbt
> series done in time for the merge 4.6 merge window, let alone all
> the extra stuff Darrick has done.
> 
> So with only a couple of days left before the merge window opens, I
> think this all needs to slip to the next merge window while we sort
> out what disk format we are going to use and rework the series to
> introduce only that format.

Now that rmap has slipped to 4.7, there's no point in holding back on
the disk format changes that I wanted to make.

The interval query code makes it much easier to look for left neighbor
rmap records on a reflink filesystem.  With that piece, we can drop
the requirement that every bmbt record corresponds exactly with an
rmapbt record; we can also make use of bits 20-30 of the rm_blockcount
field, which will make the rmapbt smaller.

Doing this also enables me to rip out a large chunk of the deferred
rmap processing code (mostly patches 15-16) because everything can
turn into calling the interval query aware versions of
xfs_rmap_{alloc,free}.  At the same time I'll add rmapbt update intent
log items--Dave, I know you were working on that; please send along
whatever you have.

I've been wrangling with the problem of how to deal with refcount
btree updates that update so many records that we overflow the
transaction reservation.  Right now we simply reserve so much space
that we can (usually) pass xfstests without blowing up, but this won't
work for all cases.  One solution is to roll the transaction if we
detect that we're about to run out of reservation, but that requires
us to be able to log refcount update intents.  However, that isn't so
bad, because...

...I think there's a potential for deadlock when unmapping extents
from a file.  Let's say we want to unmap an extent in AG X whose bmbt
block is in AG (X+1).  Let's say that the bmbt unmap causes the block
to split, and the new bmbt block is in AG (X+1).  Next, we go to
remove the rmapbt record from AG X, but let's say that record removal
also causes a btree split.  In that case, the transaction will
deadlock because it has AGF (X+1) and is trying to grab AGF X, which
is a violation of the locking order rules.

In summary, I think we need to have intent log items for both rmapbt
and refcountbt changes in order to keep things atomic w.r.t. crash
recovery.  I think this solves both the deadlock problem and the
reservation overflow problems with the refcount btree.

MCI/MCD = rMap change intent/done
CCI/CCD = refCount change intent/done

So now unmapping looks like this:
unmap extent -> log MCI -> log CCI -> roll -> remove rmapbt entries ->
  -> log MCD -> roll ->
  -> update refcountbt -> log CCD -> log EFI (for btree merges) -+-> 
     ^-- log CCI for remaining <--------------- if trans full ---|
  -> roll -> free extents -> log EFD -> done unmapping

Regular mapping looks like this:
map extent -> log MCI -> roll -> add rmapbt entries -> log MCD -> roll ->
  -> log EFI (for btree merges) -> free -> log EFD -> done mapping

Reflinking looks like this:
regular unmap -> log CCI -> roll ->
  -> update refcountbt -> log CCD -> log EFI (for btree merges) -+-->
     ^-- log CCI for remaining <--------------- if trans full ---|
  -> regular map -> done reflinking

This is my rough roadmap heading towards LSF:

0) jump forward to 4.6-rc1 after merge window closes
1) drop the skinny rmapbt format
2) use interval queries for xfs_rmap_{alloc,free}
3) use MCI/MCD on freeing extents
4) shove the interval query code and all the rmap stuff before reflink
5) rework rmap to drop the "every bmbt record must have an rmap rec"
6) rework refcount to avoid exhausting transaction reservations
7) prototype btree scrubbing code (done)
8) come up with some toy xfs-scrub utility

How's that sound?  Sorry in advance for the code churn and the
inevitable gigantic patchbomb. :)

--D

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 05/16] xfs: add owner field to extent allocation and freeing
  2016-03-10 14:19   ` Christoph Hellwig
@ 2016-03-28 22:05     ` Darrick J. Wong
  0 siblings, 0 replies; 29+ messages in thread
From: Darrick J. Wong @ 2016-03-28 22:05 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Thu, Mar 10, 2016 at 06:19:13AM -0800, Christoph Hellwig wrote:
> On Tue, Mar 08, 2016 at 03:16:07PM +1100, Dave Chinner wrote:
> > From: "Darrick J. Wong" <darrick.wong@oracle.com>
> > 
> > For the rmap btree to work, we have to fed the extent owner
> 
> s/fed/feed/

<nod>

> 
> > +/*
> > + * Ownership info for an extent.  This is used to create reverse-mapping
> > + * entries.
> > + */
> > +#define XFS_RMAP_INO_ATTR_FORK	(1)
> > +#define XFS_RMAP_BMBT_BLOCK	(2)
> 
> These are OR-able flags - it might make sense to define them as:
> 
> #define XFS_RMAP_INO_ATTR_FORK	(1 << 0)
> #define XFS_RMAP_BMBT_BLOCK	(1 << 1)
> 
> to make this more obvious.

Ok.  Will do when I merge the "separate xfs_rmap_irec flags field" patch
into the appropriate patches (like this one).

--D

> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 16/16] xfs: piggyback rmapbt update intents in the bmap free structure
  2016-03-08  4:16 ` [PATCH 16/16] xfs: piggyback rmapbt update intents in the bmap free structure Dave Chinner
@ 2016-04-11 23:23   ` Darrick J. Wong
  0 siblings, 0 replies; 29+ messages in thread
From: Darrick J. Wong @ 2016-04-11 23:23 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Tue, Mar 08, 2016 at 03:16:18PM +1100, Dave Chinner wrote:
> From: "Darrick J. Wong" <darrick.wong@oracle.com>
> 
> Extend the xfs_bmap_free structure to track a list of rmapbt update
> intents. Record the changes being made in the new rmapbt intent list
> and add hooks to process the changes to xfs_bmap_finish().
> 
> Subsequent patches will implement the rmapbt updates recorded in
> the intents, allowing us to re-order the rmapbt changes to avoid
> deadlocks (e.g. AG ordering rules) and sanely log the changes
> without blowing out transaction reservation sizes.
> 
> This patch is derived from a couple of original patches from
> Darrick, split and merged by me, with a minor change to use
> list_head for the linked list.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/libxfs/xfs_bmap.c       | 179 +++++++++++++++++++++++++++++++-
>  fs/xfs/libxfs/xfs_bmap.h       |  16 ++-
>  fs/xfs/libxfs/xfs_rmap.c       | 228 +++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_rmap_btree.h |  57 +++++++++++
>  fs/xfs/xfs_bmap_util.c         |   6 ++
>  5 files changed, 479 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index f8d33c5..3ee3b8c 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -45,6 +45,7 @@
>  #include "xfs_symlink.h"
>  #include "xfs_attr_leaf.h"
>  #include "xfs_filestream.h"
> +#include "xfs_rmap_btree.h"
>  
>  
>  kmem_zone_t		*xfs_bmap_free_item_zone;
> @@ -648,6 +649,8 @@ xfs_bmap_cancel(
>  	xfs_bmap_free_item_t	*free;	/* free list item */
>  	xfs_bmap_free_item_t	*next;
>  
> +	xfs_rmap_cancel(&flist->xbf_rlist);
> +
>  	if (flist->xbf_count == 0)
>  		return;
>  	ASSERT(flist->xbf_first != NULL);
> @@ -1869,6 +1872,10 @@ xfs_bmap_add_extent_delay_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_combine(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &LEFT, &RIGHT, &PREV);

soft-NAK on this, because I'm reworking the code to use the btree interval
query code, which will make it possible to collapse all the inode fork block
mapping/unmapping into three deferred rmap operation types: map, unmap, and
convert (unwritten <-> real).  This will reduce the size of the rmapbt when
a file manages to map an extent that's more than 2^20 blocks long.

(The code itself looks fine, I'm just making a declaration of what I'm
working on for LSF.)

--D

> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
> @@ -1901,6 +1908,10 @@ xfs_bmap_add_extent_delay_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &LEFT, PREV.br_blockcount);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
> @@ -1932,6 +1943,10 @@ xfs_bmap_add_extent_delay_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &RIGHT, -PREV.br_blockcount);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
> @@ -1961,6 +1976,10 @@ xfs_bmap_add_extent_delay_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, new);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
> @@ -1996,6 +2015,10 @@ xfs_bmap_add_extent_delay_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &LEFT, new->br_blockcount);
> +		if (error)
> +			goto done;
>  		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
>  			startblockval(PREV.br_startblock));
>  		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
> @@ -2031,6 +2054,10 @@ xfs_bmap_add_extent_delay_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, new);
> +		if (error)
> +			goto done;
>  
>  		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
>  			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
> @@ -2079,6 +2106,8 @@ xfs_bmap_add_extent_delay_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &RIGHT, -new->br_blockcount);
>  
>  		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
>  			startblockval(PREV.br_startblock));
> @@ -2115,6 +2144,10 @@ xfs_bmap_add_extent_delay_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, new);
> +		if (error)
> +			goto done;
>  
>  		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
>  			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
> @@ -2184,6 +2217,10 @@ xfs_bmap_add_extent_delay_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, new);
> +		if (error)
> +			goto done;
>  
>  		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
>  			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
> @@ -2425,6 +2462,10 @@ xfs_bmap_add_extent_unwritten_real(
>  				RIGHT.br_blockcount, LEFT.br_state)))
>  				goto done;
>  		}
> +		error = xfs_rmap_combine(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &LEFT, &RIGHT, &PREV);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
> @@ -2462,6 +2503,10 @@ xfs_bmap_add_extent_unwritten_real(
>  				LEFT.br_state)))
>  				goto done;
>  		}
> +		error = xfs_rmap_lcombine(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &LEFT, &PREV);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
> @@ -2497,6 +2542,10 @@ xfs_bmap_add_extent_unwritten_real(
>  				newext)))
>  				goto done;
>  		}
> +		error = xfs_rmap_rcombine(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &RIGHT, &PREV);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
> @@ -2523,6 +2572,11 @@ xfs_bmap_add_extent_unwritten_real(
>  				newext)))
>  				goto done;
>  		}
> +
> +		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, new, 0);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
> @@ -2570,6 +2624,14 @@ xfs_bmap_add_extent_unwritten_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &PREV, new->br_blockcount);
> +		if (error)
> +			goto done;
> +		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &LEFT, new->br_blockcount);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING:
> @@ -2608,6 +2670,14 @@ xfs_bmap_add_extent_unwritten_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &PREV, new->br_blockcount);
> +		if (error)
> +			goto done;
> +		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, new);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
> @@ -2650,6 +2720,14 @@ xfs_bmap_add_extent_unwritten_real(
>  				newext)))
>  				goto done;
>  		}
> +		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &PREV, -new->br_blockcount);
> +		if (error)
> +			goto done;
> +		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &RIGHT, -new->br_blockcount);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_RIGHT_FILLING:
> @@ -2690,6 +2768,14 @@ xfs_bmap_add_extent_unwritten_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &PREV, -new->br_blockcount);
> +		if (error)
> +			goto done;
> +		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, new);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case 0:
> @@ -2751,6 +2837,19 @@ xfs_bmap_add_extent_unwritten_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &PREV, new->br_startoff -
> +				PREV.br_startoff - PREV.br_blockcount);
> +		if (error)
> +			goto done;
> +		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, new);
> +		if (error)
> +			goto done;
> +		error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
> +				XFS_DATA_FORK, &r[1]);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
> @@ -2954,6 +3053,7 @@ xfs_bmap_add_extent_hole_real(
>  	int			rval=0;	/* return value (logging flags) */
>  	int			state;	/* state bits, accessed thru macros */
>  	struct xfs_mount	*mp;
> +	struct xfs_bmbt_irec	prev;	/* fake previous extent entry */
>  
>  	mp = bma->ip->i_mount;
>  	ifp = XFS_IFORK_PTR(bma->ip, whichfork);
> @@ -3061,6 +3161,12 @@ xfs_bmap_add_extent_hole_real(
>  			if (error)
>  				goto done;
>  		}
> +		prev = *new;
> +		prev.br_startblock = nullstartblock(0);
> +		error = xfs_rmap_combine(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &left, &right, &prev);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_LEFT_CONTIG:
> @@ -3093,6 +3199,10 @@ xfs_bmap_add_extent_hole_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &left, new->br_blockcount);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case BMAP_RIGHT_CONTIG:
> @@ -3127,6 +3237,10 @@ xfs_bmap_add_extent_hole_real(
>  			if (error)
>  				goto done;
>  		}
> +		error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, &right, -new->br_blockcount);
> +		if (error)
> +			goto done;
>  		break;
>  
>  	case 0:
> @@ -3155,6 +3269,10 @@ xfs_bmap_add_extent_hole_real(
>  				goto done;
>  			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
>  		}
> +		error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
> +				whichfork, new);
> +		if (error)
> +			goto done;
>  		break;
>  	}
>  
> @@ -4289,7 +4407,6 @@ xfs_bmapi_delay(
>  	return 0;
>  }
>  
> -
>  static int
>  xfs_bmapi_allocate(
>  	struct xfs_bmalloca	*bma)
> @@ -4603,6 +4720,7 @@ xfs_bmapi_write(
>  	bma.userdata = 0;
>  	bma.flist = flist;
>  	bma.firstblock = firstblock;
> +	bma.rlist = &flist->xbf_rlist;
>  
>  	while (bno < end && n < *nmap) {
>  		inhole = eof || bma.got.br_startoff > bno;
> @@ -4861,6 +4979,10 @@ xfs_bmap_del_extent(
>  		XFS_IFORK_NEXT_SET(ip, whichfork,
>  			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
>  		flags |= XFS_ILOG_CORE;
> +		error = xfs_rmap_delete(mp, &flist->xbf_rlist, ip->i_ino,
> +				whichfork, &got);
> +		if (error)
> +			goto done;
>  		if (!cur) {
>  			flags |= xfs_ilog_fext(whichfork);
>  			break;
> @@ -4888,6 +5010,10 @@ xfs_bmap_del_extent(
>  		}
>  		xfs_bmbt_set_startblock(ep, del_endblock);
>  		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
> +		error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
> +				whichfork, &got, del->br_blockcount);
> +		if (error)
> +			goto done;
>  		if (!cur) {
>  			flags |= xfs_ilog_fext(whichfork);
>  			break;
> @@ -4914,6 +5040,10 @@ xfs_bmap_del_extent(
>  			break;
>  		}
>  		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
> +		error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
> +				whichfork, &got, -del->br_blockcount);
> +		if (error)
> +			goto done;
>  		if (!cur) {
>  			flags |= xfs_ilog_fext(whichfork);
>  			break;
> @@ -4939,6 +5069,15 @@ xfs_bmap_del_extent(
>  		if (!delay) {
>  			new.br_startblock = del_endblock;
>  			flags |= XFS_ILOG_CORE;
> +			error = xfs_rmap_resize(mp, &flist->xbf_rlist,
> +					ip->i_ino, whichfork, &got,
> +					temp - got.br_blockcount);
> +			if (error)
> +				goto done;
> +			error = xfs_rmap_insert(mp, &flist->xbf_rlist,
> +					ip->i_ino, whichfork, &new);
> +			if (error)
> +				goto done;
>  			if (cur) {
>  				if ((error = xfs_bmbt_update(cur,
>  						got.br_startoff,
> @@ -5175,6 +5314,7 @@ xfs_bunmapi(
>  			got.br_startoff + got.br_blockcount - 1);
>  		if (bno < start)
>  			break;
> +
>  		/*
>  		 * Then deal with the (possibly delayed) allocated space
>  		 * we found.
> @@ -5477,7 +5617,8 @@ xfs_bmse_merge(
>  	struct xfs_bmbt_rec_host	*gotp,		/* extent to shift */
>  	struct xfs_bmbt_rec_host	*leftp,		/* preceding extent */
>  	struct xfs_btree_cur		*cur,
> -	int				*logflags)	/* output */
> +	int				*logflags,	/* output */
> +	struct xfs_rmap_list		*rlist)		/* rmap intent list */
>  {
>  	struct xfs_bmbt_irec		got;
>  	struct xfs_bmbt_irec		left;
> @@ -5508,6 +5649,13 @@ xfs_bmse_merge(
>  	XFS_IFORK_NEXT_SET(ip, whichfork,
>  			   XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
>  	*logflags |= XFS_ILOG_CORE;
> +	error = xfs_rmap_resize(mp, rlist, ip->i_ino, whichfork, &left,
> +			blockcount - left.br_blockcount);
> +	if (error)
> +		return error;
> +	error = xfs_rmap_delete(mp, rlist, ip->i_ino, whichfork, &got);
> +	if (error)
> +		return error;
>  	if (!cur) {
>  		*logflags |= XFS_ILOG_DEXT;
>  		return 0;
> @@ -5550,7 +5698,8 @@ xfs_bmse_shift_one(
>  	struct xfs_bmbt_rec_host	*gotp,
>  	struct xfs_btree_cur		*cur,
>  	int				*logflags,
> -	enum shift_direction		direction)
> +	enum shift_direction		direction,
> +	struct xfs_rmap_list		*rlist)
>  {
>  	struct xfs_ifork		*ifp;
>  	struct xfs_mount		*mp;
> @@ -5600,7 +5749,7 @@ xfs_bmse_shift_one(
>  				       offset_shift_fsb)) {
>  			return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
>  					      *current_ext, gotp, adj_irecp,
> -					      cur, logflags);
> +					      cur, logflags, rlist);
>  		}
>  	} else {
>  		startoff = got.br_startoff + offset_shift_fsb;
> @@ -5637,6 +5786,10 @@ update_current_ext:
>  		(*current_ext)--;
>  	xfs_bmbt_set_startoff(gotp, startoff);
>  	*logflags |= XFS_ILOG_CORE;
> +	error = xfs_rmap_slide(mp, rlist, ip->i_ino, whichfork,
> +			&got, startoff - got.br_startoff);
> +	if (error)
> +		return error;
>  	if (!cur) {
>  		*logflags |= XFS_ILOG_DEXT;
>  		return 0;
> @@ -5776,9 +5929,11 @@ xfs_bmap_shift_extents(
>  	}
>  
>  	while (nexts++ < num_exts) {
> +		xfs_bmbt_get_all(gotp, &got);
> +
>  		error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
>  					   &current_ext, gotp, cur, &logflags,
> -					   direction);
> +					   direction, &flist->xbf_rlist);
>  		if (error)
>  			goto del_cursor;
>  		/*
> @@ -5831,6 +5986,7 @@ xfs_bmap_split_extent_at(
>  	int				whichfork = XFS_DATA_FORK;
>  	struct xfs_btree_cur		*cur = NULL;
>  	struct xfs_bmbt_rec_host	*gotp;
> +	struct xfs_bmbt_irec		rgot;
>  	struct xfs_bmbt_irec		got;
>  	struct xfs_bmbt_irec		new; /* split extent */
>  	struct xfs_mount		*mp = ip->i_mount;
> @@ -5840,6 +5996,7 @@ xfs_bmap_split_extent_at(
>  	int				error = 0;
>  	int				logflags = 0;
>  	int				i = 0;
> +	long				adj;
>  
>  	if (unlikely(XFS_TEST_ERROR(
>  	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> @@ -5879,6 +6036,7 @@ xfs_bmap_split_extent_at(
>  	if (got.br_startoff >= split_fsb)
>  		return 0;
>  
> +	rgot = got;
>  	gotblkcnt = split_fsb - got.br_startoff;
>  	new.br_startoff = split_fsb;
>  	new.br_startblock = got.br_startblock + gotblkcnt;
> @@ -5934,6 +6092,17 @@ xfs_bmap_split_extent_at(
>  		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
>  	}
>  
> +	/* update rmapbt */
> +	adj = -(long)rgot.br_blockcount + gotblkcnt;
> +	error = xfs_rmap_resize(mp, &free_list->xbf_rlist, ip->i_ino,
> +			whichfork, &rgot, adj);
> +	if (error)
> +		goto del_cursor;
> +	error = xfs_rmap_insert(mp, &free_list->xbf_rlist, ip->i_ino,
> +			whichfork, &new);
> +	if (error)
> +		goto del_cursor;
> +
>  	/*
>  	 * Convert to a btree if necessary.
>  	 */
> diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> index 06dbe08..38cd9b5 100644
> --- a/fs/xfs/libxfs/xfs_bmap.h
> +++ b/fs/xfs/libxfs/xfs_bmap.h
> @@ -56,6 +56,7 @@ struct xfs_bmalloca {
>  	bool			conv;	/* overwriting unwritten extents */
>  	char			userdata;/* userdata mask */
>  	int			flags;
> +	struct xfs_rmap_list	*rlist;
>  };
>  
>  /*
> @@ -70,6 +71,11 @@ typedef struct xfs_bmap_free_item
>  	struct xfs_bmap_free_item *xbfi_next;	/* link to next entry */
>  } xfs_bmap_free_item_t;
>  
> +struct xfs_rmap_list {
> +	struct list_head	rl_list;
> +	int			rl_count;
> +};
> +
>  /*
>   * Header for free extent list.
>   *
> @@ -89,6 +95,7 @@ typedef	struct xfs_bmap_free
>  	xfs_bmap_free_item_t	*xbf_first;	/* list of to-be-free extents */
>  	int			xbf_count;	/* count of items on list */
>  	int			xbf_low;	/* alloc in low mode */
> +	struct xfs_rmap_list	xbf_rlist;	/* rmap intent list */
>  } xfs_bmap_free_t;
>  
>  #define	XFS_BMAP_MAX_NMAP	4
> @@ -142,8 +149,13 @@ static inline int xfs_bmapi_aflag(int w)
>  
>  static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
>  {
> -	((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
> -		(flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK);
> +	flp->xbf_first = NULL;
> +	flp->xbf_count = 0;
> +	flp->xbf_low = 0;
> +	*fbp = NULLFSBLOCK;
> +
> +	INIT_LIST_HEAD(&flp->xbf_rlist.rl_list);
> +	flp->xbf_rlist.rl_count = 0;
>  }
>  
>  /*
> diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
> index 479cb73..a7130c5 100644
> --- a/fs/xfs/libxfs/xfs_rmap.c
> +++ b/fs/xfs/libxfs/xfs_rmap.c
> @@ -35,6 +35,7 @@
>  #include "xfs_trace.h"
>  #include "xfs_error.h"
>  #include "xfs_extent_busy.h"
> +#include "xfs_bmap.h"
>  
>  /*
>   * Lookup the first record less than or equal to [bno, len, owner, offset]
> @@ -542,3 +543,230 @@ out_error:
>  	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
>  	return error;
>  }
> +
> +
> +/*
> + * Free up any items left in the list.
> + */
> +void
> +xfs_rmap_cancel(
> +	struct xfs_rmap_list	*rlist)	/* list of bmap_free_items */
> +{
> +	if (list_empty(&rlist->rl_list))
> +		return;
> +	while (!list_empty(&rlist->rl_list)) {
> +		struct xfs_rmap_intent *free;
> +
> +		free = list_first_entry(&rlist->rl_list, struct xfs_rmap_intent,
> +					ri_list);
> +		list_del(&free->ri_list);
> +		kmem_free(free);
> +	}
> +	rlist->rl_count = 0;
> +}
> +
> +/*
> + * Free up any items left in the intent list.
> + */
> +int
> +xfs_rmap_finish(
> +	struct xfs_mount	*mp,
> +	struct xfs_trans	**tpp,
> +	struct xfs_inode	*ip,
> +	struct xfs_rmap_list	*rlist)
> +{
> +	/* Not yet implemented, just cancel until implemented */
> +	xfs_rmap_cancel(rlist);
> +	return 0;
> +}
> +
> +/*
> + * Record a rmap intent; the list is kept sorted first by AG and then by
> + * increasing age.
> + */
> +static int
> +__xfs_rmap_add(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	struct xfs_rmap_intent	*ri)
> +{
> +	struct xfs_rmap_intent	*new;
> +
> +	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
> +		return 0;
> +
> +	new = kmem_zalloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
> +	*new = *ri;
> +	INIT_LIST_HEAD(&new->ri_list);
> +
> +	/* XXX: ordering will be needed */
> +	list_add(&new->ri_list, &rlist->rl_list);
> +	rlist->rl_count++;
> +	return 0;
> +}
> +
> +/* Combine two adjacent rmap extents */
> +int
> +xfs_rmap_combine(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*left,
> +	struct xfs_bmbt_irec	*right,
> +	struct xfs_bmbt_irec	*prev)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_COMBINE;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *prev;
> +	ri.ri_u.a.left = *left;
> +	ri.ri_u.a.right = *right;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> +
> +/* Extend a left rmap extent */
> +int
> +xfs_rmap_lcombine(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*LEFT,
> +	struct xfs_bmbt_irec	*PREV)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_LCOMBINE;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *PREV;
> +	ri.ri_u.a.left = *LEFT;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> +
> +/* Extend a right rmap extent */
> +int
> +xfs_rmap_rcombine(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*RIGHT,
> +	struct xfs_bmbt_irec	*PREV)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_RCOMBINE;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *PREV;
> +	ri.ri_u.a.right = *RIGHT;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> +
> +/* Insert a rmap extent */
> +int
> +xfs_rmap_insert(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*new)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_INSERT;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *new;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> +
> +/* Delete a rmap extent */
> +int
> +xfs_rmap_delete(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*new)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_DELETE;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *new;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> +
> +/* Change the start of an rmap */
> +int
> +xfs_rmap_move(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*PREV,
> +	long			start_adj)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_MOVE;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *PREV;
> +	ri.ri_u.b.adj = start_adj;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> +
> +/* Change the logical offset of an rmap */
> +int
> +xfs_rmap_slide(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*PREV,
> +	long			start_adj)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_SLIDE;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *PREV;
> +	ri.ri_u.b.adj = start_adj;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> +
> +/* Change the size of an rmap */
> +int
> +xfs_rmap_resize(
> +	struct xfs_mount	*mp,
> +	struct xfs_rmap_list	*rlist,
> +	xfs_ino_t		ino,
> +	int			whichfork,
> +	struct xfs_bmbt_irec	*PREV,
> +	long			size_adj)
> +{
> +	struct xfs_rmap_intent	ri;
> +
> +	ri.ri_type = XFS_RMAP_RESIZE;
> +	ri.ri_ino = ino;
> +	ri.ri_whichfork = whichfork;
> +	ri.ri_prev = *PREV;
> +	ri.ri_u.b.adj = size_adj;
> +
> +	return __xfs_rmap_add(mp, rlist, &ri);
> +}
> diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
> index d7c9722..599fa3a 100644
> --- a/fs/xfs/libxfs/xfs_rmap_btree.h
> +++ b/fs/xfs/libxfs/xfs_rmap_btree.h
> @@ -21,6 +21,7 @@
>  struct xfs_buf;
>  struct xfs_btree_cur;
>  struct xfs_mount;
> +struct xfs_rmap_list;
>  
>  /* rmaps only exist on crc enabled filesystems */
>  #define XFS_RMAP_BLOCK_LEN	XFS_BTREE_SBLOCK_CRC_LEN
> @@ -68,4 +69,60 @@ int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
>  		  xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
>  		  struct xfs_owner_info *oinfo);
>  
> +/* functions for updating the rmapbt based on bmbt map/unmap operations */
> +int xfs_rmap_combine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *LEFT,
> +		struct xfs_bmbt_irec *RIGHT, struct xfs_bmbt_irec *PREV);
> +int xfs_rmap_lcombine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *LEFT,
> +		struct xfs_bmbt_irec *PREV);
> +int xfs_rmap_rcombine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *RIGHT,
> +		struct xfs_bmbt_irec *PREV);
> +int xfs_rmap_insert(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *rec);
> +int xfs_rmap_delete(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *rec);
> +int xfs_rmap_move(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
> +		long start_adj);
> +int xfs_rmap_slide(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
> +		long start_adj);
> +int xfs_rmap_resize(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
> +		xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
> +		long size_adj);
> +
> +enum xfs_rmap_intent_type {
> +	XFS_RMAP_COMBINE,
> +	XFS_RMAP_LCOMBINE,
> +	XFS_RMAP_RCOMBINE,
> +	XFS_RMAP_INSERT,
> +	XFS_RMAP_DELETE,
> +	XFS_RMAP_MOVE,
> +	XFS_RMAP_SLIDE,
> +	XFS_RMAP_RESIZE,
> +};
> +
> +struct xfs_rmap_intent {
> +	struct list_head			ri_list;
> +	enum xfs_rmap_intent_type		ri_type;
> +	xfs_ino_t				ri_ino;
> +	int					ri_whichfork;
> +	struct xfs_bmbt_irec			ri_prev;
> +	union {
> +		struct {
> +			struct xfs_bmbt_irec	left;
> +			struct xfs_bmbt_irec	right;
> +		} a;
> +		struct {
> +			long			adj;
> +		} b;
> +	} ri_u;
> +};
> +
> +void	xfs_rmap_cancel(struct xfs_rmap_list *rlist);
> +int	xfs_rmap_finish(struct xfs_mount *mp, struct xfs_trans **tpp,
> +			struct xfs_inode *ip, struct xfs_rmap_list *rlist);
> +
>  #endif	/* __XFS_RMAP_BTREE_H__ */
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index 834639d..a9cf94e 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -40,6 +40,7 @@
>  #include "xfs_trace.h"
>  #include "xfs_icache.h"
>  #include "xfs_log.h"
> +#include "xfs_rmap_btree.h"
>  
>  /* Kernel only BMAP related definitions and functions */
>  
> @@ -109,6 +110,11 @@ xfs_bmap_finish(
>  	struct xfs_bmap_free_item	*next;	/* next item on free list */
>  
>  	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
> +
> +	error = xfs_rmap_finish((*tp)->t_mountp, tp, ip, &flist->xbf_rlist);
> +	if (error)
> +		return error;
> +
>  	if (flist->xbf_count == 0)
>  		return 0;
>  
> -- 
> 2.7.0
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2016-04-11 23:23 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-08  4:16 [PATCH 0/16] xfs: first part of rmapbt functionality Dave Chinner
2016-03-08  4:16 ` [PATCH 01/16] xfs: introduce rmap btree definitions Dave Chinner
2016-03-08  4:16 ` [PATCH 02/16] xfs: add rmap btree stats infrastructure Dave Chinner
2016-03-08  4:16 ` [PATCH 03/16] xfs: rmap btree add more reserved blocks Dave Chinner
2016-03-10 14:16   ` Christoph Hellwig
2016-03-10 14:22   ` Christoph Hellwig
2016-03-10 22:09     ` Dave Chinner
2016-03-11  7:32       ` Christoph Hellwig
2016-03-08  4:16 ` [PATCH 04/16] libxfs: rearrange xfs_bmap_add_free parameters Dave Chinner
2016-03-08 17:18   ` Christoph Hellwig
2016-03-08  4:16 ` [PATCH 05/16] xfs: add owner field to extent allocation and freeing Dave Chinner
2016-03-10 14:19   ` Christoph Hellwig
2016-03-28 22:05     ` Darrick J. Wong
2016-03-08  4:16 ` [PATCH 06/16] xfs: introduce rmap extent operation stubs Dave Chinner
2016-03-08  4:16 ` [PATCH 07/16] xfs: define the on-disk rmap btree format Dave Chinner
2016-03-08  4:16 ` [PATCH 08/16] xfs: add rmap btree growfs support Dave Chinner
2016-03-08  4:16 ` [PATCH 09/16] xfs: rmap btree transaction reservations Dave Chinner
2016-03-08  4:16 ` [PATCH 10/16] xfs: rmap btree requires more reserved free space Dave Chinner
2016-03-08  4:16 ` [PATCH 11/16] xfs: add rmap btree operations Dave Chinner
2016-03-08  4:16 ` [PATCH 12/16] xfs: add tracepoints for the rmap-mirrors-bmbt functions Dave Chinner
2016-03-08  4:16 ` [PATCH 13/16] xfs: add an extent to the rmap btree Dave Chinner
2016-03-08  4:16 ` [PATCH 14/16] xfs: remove an extent from " Dave Chinner
2016-03-08  4:16 ` [PATCH 15/16] xfs: add rmap btree insert and delete helpers Dave Chinner
2016-03-08  4:16 ` [PATCH 16/16] xfs: piggyback rmapbt update intents in the bmap free structure Dave Chinner
2016-04-11 23:23   ` Darrick J. Wong
2016-03-10 14:14 ` [PATCH 0/16] xfs: first part of rmapbt functionality Christoph Hellwig
2016-03-10 16:57   ` Darrick J. Wong
2016-03-10 21:44   ` Dave Chinner
2016-03-25 23:00     ` Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.