All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: david@fromorbit.com, darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH 29/63] xfs: introduce the CoW fork
Date: Thu, 29 Sep 2016 20:08:48 -0700	[thread overview]
Message-ID: <147520492856.29434.17061076591831860945.stgit@birch.djwong.org> (raw)
In-Reply-To: <147520472904.29434.15518629624221621056.stgit@birch.djwong.org>

Introduce a new in-core fork for storing copy-on-write delalloc
reservations and allocated extents that are in the process of being
written out.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: fix up bmapi_read so that we can query the CoW fork, and have it
return a "hole" extent if there's no CoW fork.
---
 fs/xfs/Makefile                |    1 
 fs/xfs/libxfs/xfs_bmap.c       |   27 +++++++--
 fs/xfs/libxfs/xfs_bmap.h       |   22 +++++++-
 fs/xfs/libxfs/xfs_bmap_btree.c |    1 
 fs/xfs/libxfs/xfs_inode_fork.c |   47 +++++++++++++++-
 fs/xfs/libxfs/xfs_inode_fork.h |   28 ++++++++--
 fs/xfs/libxfs/xfs_rmap.c       |   15 +++--
 fs/xfs/libxfs/xfs_types.h      |    1 
 fs/xfs/xfs_icache.c            |    5 ++
 fs/xfs/xfs_inode.h             |    4 +
 fs/xfs/xfs_reflink.c           |  114 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.h           |   23 ++++++++
 fs/xfs/xfs_trace.h             |    4 +
 13 files changed, 264 insertions(+), 28 deletions(-)
 create mode 100644 fs/xfs/xfs_reflink.c
 create mode 100644 fs/xfs/xfs_reflink.h


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 6afb228..26ef195 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -90,6 +90,7 @@ xfs-y				+= xfs_aops.o \
 				   xfs_message.o \
 				   xfs_mount.o \
 				   xfs_mru_cache.o \
+				   xfs_reflink.o \
 				   xfs_stats.o \
 				   xfs_super.o \
 				   xfs_symlink.o \
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1e4f1a1..3388058 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2924,6 +2924,7 @@ xfs_bmap_add_extent_hole_real(
 	ASSERT(!isnullstartblock(new->br_startblock));
 	ASSERT(!bma->cur ||
 	       !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+	ASSERT(whichfork != XFS_COW_FORK);
 
 	XFS_STATS_INC(mp, xs_add_exlist);
 
@@ -4064,12 +4065,11 @@ xfs_bmapi_read(
 	int			error;
 	int			eof;
 	int			n = 0;
-	int			whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-						XFS_ATTR_FORK : XFS_DATA_FORK;
+	int			whichfork = xfs_bmapi_whichfork(flags);
 
 	ASSERT(*nmap >= 1);
 	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
-			   XFS_BMAPI_IGSTATE)));
+			   XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK)));
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
 
 	if (unlikely(XFS_TEST_ERROR(
@@ -4087,6 +4087,16 @@ xfs_bmapi_read(
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 
+	/* No CoW fork?  Return a hole. */
+	if (whichfork == XFS_COW_FORK && !ifp) {
+		mval->br_startoff = bno;
+		mval->br_startblock = HOLESTARTBLOCK;
+		mval->br_blockcount = len;
+		mval->br_state = XFS_EXT_NORM;
+		*nmap = 1;
+		return 0;
+	}
+
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		error = xfs_iread_extents(NULL, ip, whichfork);
 		if (error)
@@ -4360,8 +4370,7 @@ xfs_bmapi_convert_unwritten(
 	xfs_filblks_t		len,
 	int			flags)
 {
-	int			whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-						XFS_ATTR_FORK : XFS_DATA_FORK;
+	int			whichfork = xfs_bmapi_whichfork(flags);
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
 	int			tmp_logflags = 0;
 	int			error;
@@ -4377,6 +4386,8 @@ xfs_bmapi_convert_unwritten(
 			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
 		return 0;
 
+	ASSERT(whichfork != XFS_COW_FORK);
+
 	/*
 	 * Modify (by adding) the state flag, if writing.
 	 */
@@ -4790,6 +4801,8 @@ xfs_bmap_del_extent(
 
 	if (whichfork == XFS_ATTR_FORK)
 		state |= BMAP_ATTRFORK;
+	else if (whichfork == XFS_COW_FORK)
+		state |= BMAP_COWFORK;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
@@ -5128,8 +5141,8 @@ __xfs_bunmapi(
 
 	trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
 
-	whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-		XFS_ATTR_FORK : XFS_DATA_FORK;
+	whichfork = xfs_bmapi_whichfork(flags);
+	ASSERT(whichfork != XFS_COW_FORK);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (unlikely(
 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 48ba3ed..adb64fb 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -107,6 +107,9 @@ struct xfs_extent_free_item
  */
 #define XFS_BMAPI_REMAP		0x100
 
+/* Map something in the CoW fork. */
+#define XFS_BMAPI_COWFORK	0x200
+
 #define XFS_BMAPI_FLAGS \
 	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
 	{ XFS_BMAPI_METADATA,	"METADATA" }, \
@@ -116,12 +119,23 @@ struct xfs_extent_free_item
 	{ XFS_BMAPI_CONTIG,	"CONTIG" }, \
 	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
 	{ XFS_BMAPI_ZERO,	"ZERO" }, \
-	{ XFS_BMAPI_REMAP,	"REMAP" }
+	{ XFS_BMAPI_REMAP,	"REMAP" }, \
+	{ XFS_BMAPI_COWFORK,	"COWFORK" }
 
 
 static inline int xfs_bmapi_aflag(int w)
 {
-	return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0);
+	return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK :
+	       (w == XFS_COW_FORK ? XFS_BMAPI_COWFORK : 0));
+}
+
+static inline int xfs_bmapi_whichfork(int bmapi_flags)
+{
+	if (bmapi_flags & XFS_BMAPI_COWFORK)
+		return XFS_COW_FORK;
+	else if (bmapi_flags & XFS_BMAPI_ATTRFORK)
+		return XFS_ATTR_FORK;
+	return XFS_DATA_FORK;
 }
 
 /*
@@ -142,13 +156,15 @@ static inline int xfs_bmapi_aflag(int w)
 #define BMAP_LEFT_VALID		(1 << 6)
 #define BMAP_RIGHT_VALID	(1 << 7)
 #define BMAP_ATTRFORK		(1 << 8)
+#define BMAP_COWFORK		(1 << 9)
 
 #define XFS_BMAP_EXT_FLAGS \
 	{ BMAP_LEFT_CONTIG,	"LC" }, \
 	{ BMAP_RIGHT_CONTIG,	"RC" }, \
 	{ BMAP_LEFT_FILLING,	"LF" }, \
 	{ BMAP_RIGHT_FILLING,	"RF" }, \
-	{ BMAP_ATTRFORK,	"ATTR" }
+	{ BMAP_ATTRFORK,	"ATTR" }, \
+	{ BMAP_COWFORK,		"COW" }
 
 
 /*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index cd85274..37f0d9d 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -777,6 +777,7 @@ xfs_bmbt_init_cursor(
 {
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_btree_cur	*cur;
+	ASSERT(whichfork != XFS_COW_FORK);
 
 	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
 
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 7699a03..d29954a 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -206,9 +206,14 @@ xfs_iformat_fork(
 		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
 		return -EFSCORRUPTED;
 	}
-	if (error) {
+	if (error)
 		return error;
+
+	if (xfs_is_reflink_inode(ip)) {
+		ASSERT(ip->i_cowfp == NULL);
+		xfs_ifork_init_cow(ip);
 	}
+
 	if (!XFS_DFORK_Q(dip))
 		return 0;
 
@@ -247,6 +252,9 @@ xfs_iformat_fork(
 	if (error) {
 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
 		ip->i_afp = NULL;
+		if (ip->i_cowfp)
+			kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
+		ip->i_cowfp = NULL;
 		xfs_idestroy_fork(ip, XFS_DATA_FORK);
 	}
 	return error;
@@ -761,6 +769,9 @@ xfs_idestroy_fork(
 	if (whichfork == XFS_ATTR_FORK) {
 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
 		ip->i_afp = NULL;
+	} else if (whichfork == XFS_COW_FORK) {
+		kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
+		ip->i_cowfp = NULL;
 	}
 }
 
@@ -948,6 +959,19 @@ xfs_iext_get_ext(
 	}
 }
 
+/* XFS_IEXT_STATE_TO_FORK() -- Convert BMAP state flags to an inode fork. */
+xfs_ifork_t *
+XFS_IEXT_STATE_TO_FORK(
+	struct xfs_inode	*ip,
+	int			state)
+{
+	if (state & BMAP_COWFORK)
+		return ip->i_cowfp;
+	else if (state & BMAP_ATTRFORK)
+		return ip->i_afp;
+	return &ip->i_df;
+}
+
 /*
  * Insert new item(s) into the extent records for incore inode
  * fork 'ifp'.  'count' new items are inserted at index 'idx'.
@@ -960,7 +984,7 @@ xfs_iext_insert(
 	xfs_bmbt_irec_t	*new,		/* items to insert */
 	int		state)		/* type of extent conversion */
 {
-	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+	xfs_ifork_t	*ifp = XFS_IEXT_STATE_TO_FORK(ip, state);
 	xfs_extnum_t	i;		/* extent record index */
 
 	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
@@ -1210,7 +1234,7 @@ xfs_iext_remove(
 	int		ext_diff,	/* number of extents to remove */
 	int		state)		/* type of extent conversion */
 {
-	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+	xfs_ifork_t	*ifp = XFS_IEXT_STATE_TO_FORK(ip, state);
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	int		new_size;	/* size of extents after removal */
 
@@ -1955,3 +1979,20 @@ xfs_iext_irec_update_extoffs(
 		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
 	}
 }
+
+/*
+ * Initialize an inode's copy-on-write fork.
+ */
+void
+xfs_ifork_init_cow(
+	struct xfs_inode	*ip)
+{
+	if (ip->i_cowfp)
+		return;
+
+	ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone,
+				       KM_SLEEP | KM_NOFS);
+	ip->i_cowfp->if_flags = XFS_IFEXTENTS;
+	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
+	ip->i_cnextents = 0;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index f95e072..44d38eb 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -92,7 +92,9 @@ typedef struct xfs_ifork {
 #define XFS_IFORK_PTR(ip,w)		\
 	((w) == XFS_DATA_FORK ? \
 		&(ip)->i_df : \
-		(ip)->i_afp)
+		((w) == XFS_ATTR_FORK ? \
+			(ip)->i_afp : \
+			(ip)->i_cowfp))
 #define XFS_IFORK_DSIZE(ip) \
 	(XFS_IFORK_Q(ip) ? \
 		XFS_IFORK_BOFF(ip) : \
@@ -105,26 +107,38 @@ typedef struct xfs_ifork {
 #define XFS_IFORK_SIZE(ip,w) \
 	((w) == XFS_DATA_FORK ? \
 		XFS_IFORK_DSIZE(ip) : \
-		XFS_IFORK_ASIZE(ip))
+		((w) == XFS_ATTR_FORK ? \
+			XFS_IFORK_ASIZE(ip) : \
+			0))
 #define XFS_IFORK_FORMAT(ip,w) \
 	((w) == XFS_DATA_FORK ? \
 		(ip)->i_d.di_format : \
-		(ip)->i_d.di_aformat)
+		((w) == XFS_ATTR_FORK ? \
+			(ip)->i_d.di_aformat : \
+			(ip)->i_cformat))
 #define XFS_IFORK_FMT_SET(ip,w,n) \
 	((w) == XFS_DATA_FORK ? \
 		((ip)->i_d.di_format = (n)) : \
-		((ip)->i_d.di_aformat = (n)))
+		((w) == XFS_ATTR_FORK ? \
+			((ip)->i_d.di_aformat = (n)) : \
+			((ip)->i_cformat = (n))))
 #define XFS_IFORK_NEXTENTS(ip,w) \
 	((w) == XFS_DATA_FORK ? \
 		(ip)->i_d.di_nextents : \
-		(ip)->i_d.di_anextents)
+		((w) == XFS_ATTR_FORK ? \
+			(ip)->i_d.di_anextents : \
+			(ip)->i_cnextents))
 #define XFS_IFORK_NEXT_SET(ip,w,n) \
 	((w) == XFS_DATA_FORK ? \
 		((ip)->i_d.di_nextents = (n)) : \
-		((ip)->i_d.di_anextents = (n)))
+		((w) == XFS_ATTR_FORK ? \
+			((ip)->i_d.di_anextents = (n)) : \
+			((ip)->i_cnextents = (n))))
 #define XFS_IFORK_MAXEXT(ip, w) \
 	(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
 
+xfs_ifork_t	*XFS_IEXT_STATE_TO_FORK(struct xfs_inode *ip, int state);
+
 int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
 void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
 				struct xfs_inode_log_item *, int);
@@ -169,4 +183,6 @@ void		xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
 
 extern struct kmem_zone	*xfs_ifork_zone;
 
+extern void xfs_ifork_init_cow(struct xfs_inode *ip);
+
 #endif	/* __XFS_INODE_FORK_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 73d0540..1c40b85 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1263,9 +1263,10 @@ xfs_rmap_finish_one(
  */
 static bool
 xfs_rmap_update_is_needed(
-	struct xfs_mount	*mp)
+	struct xfs_mount	*mp,
+	int			whichfork)
 {
-	return xfs_sb_version_hasrmapbt(&mp->m_sb);
+	return xfs_sb_version_hasrmapbt(&mp->m_sb) && whichfork != XFS_COW_FORK;
 }
 
 /*
@@ -1311,7 +1312,7 @@ xfs_rmap_map_extent(
 	int			whichfork,
 	struct xfs_bmbt_irec	*PREV)
 {
-	if (!xfs_rmap_update_is_needed(mp))
+	if (!xfs_rmap_update_is_needed(mp, whichfork))
 		return 0;
 
 	return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino,
@@ -1327,7 +1328,7 @@ xfs_rmap_unmap_extent(
 	int			whichfork,
 	struct xfs_bmbt_irec	*PREV)
 {
-	if (!xfs_rmap_update_is_needed(mp))
+	if (!xfs_rmap_update_is_needed(mp, whichfork))
 		return 0;
 
 	return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino,
@@ -1343,7 +1344,7 @@ xfs_rmap_convert_extent(
 	int			whichfork,
 	struct xfs_bmbt_irec	*PREV)
 {
-	if (!xfs_rmap_update_is_needed(mp))
+	if (!xfs_rmap_update_is_needed(mp, whichfork))
 		return 0;
 
 	return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino,
@@ -1362,7 +1363,7 @@ xfs_rmap_alloc_extent(
 {
 	struct xfs_bmbt_irec	bmap;
 
-	if (!xfs_rmap_update_is_needed(mp))
+	if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK))
 		return 0;
 
 	bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
@@ -1386,7 +1387,7 @@ xfs_rmap_free_extent(
 {
 	struct xfs_bmbt_irec	bmap;
 
-	if (!xfs_rmap_update_is_needed(mp))
+	if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK))
 		return 0;
 
 	bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index be7b6de..8d74870 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -90,6 +90,7 @@ typedef __int64_t	xfs_sfiloff_t;	/* signed block number in a file */
  */
 #define	XFS_DATA_FORK	0
 #define	XFS_ATTR_FORK	1
+#define	XFS_COW_FORK	2
 
 /*
  * Min numbers of data/attr fork btree root pointers.
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 65b2e3f..2d3de02 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -76,6 +76,9 @@ xfs_inode_alloc(
 	ip->i_mount = mp;
 	memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
 	ip->i_afp = NULL;
+	ip->i_cowfp = NULL;
+	ip->i_cnextents = 0;
+	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
 	memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
 	ip->i_flags = 0;
 	ip->i_delayed_blks = 0;
@@ -101,6 +104,8 @@ xfs_inode_free_callback(
 
 	if (ip->i_afp)
 		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+	if (ip->i_cowfp)
+		xfs_idestroy_fork(ip, XFS_COW_FORK);
 
 	if (ip->i_itemp) {
 		ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 46632f1..1af1d8d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -47,6 +47,7 @@ typedef struct xfs_inode {
 
 	/* Extent information. */
 	xfs_ifork_t		*i_afp;		/* attribute fork pointer */
+	xfs_ifork_t		*i_cowfp;	/* copy on write extents */
 	xfs_ifork_t		i_df;		/* data fork */
 
 	/* operations vectors */
@@ -65,6 +66,9 @@ typedef struct xfs_inode {
 
 	struct xfs_icdinode	i_d;		/* most of ondisk inode */
 
+	xfs_extnum_t		i_cnextents;	/* # of extents in cow fork */
+	unsigned int		i_cformat;	/* format of cow fork */
+
 	/* VFS inode */
 	struct inode		i_vnode;	/* embedded VFS inode */
 } xfs_inode_t;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
new file mode 100644
index 0000000..7adbb83
--- /dev/null
+++ b/fs/xfs/xfs_reflink.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_error.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_ioctl.h"
+#include "xfs_trace.h"
+#include "xfs_log.h"
+#include "xfs_icache.h"
+#include "xfs_pnfs.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_bit.h"
+#include "xfs_alloc.h"
+#include "xfs_quota_defs.h"
+#include "xfs_quota.h"
+#include "xfs_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_reflink.h"
+
+/*
+ * Copy on Write of Shared Blocks
+ *
+ * XFS must preserve "the usual" file semantics even when two files share
+ * the same physical blocks.  This means that a write to one file must not
+ * alter the blocks in a different file; the way that we'll do that is
+ * through the use of a copy-on-write mechanism.  At a high level, that
+ * means that when we want to write to a shared block, we allocate a new
+ * block, write the data to the new block, and if that succeeds we map the
+ * new block into the file.
+ *
+ * XFS provides a "delayed allocation" mechanism that defers the allocation
+ * of disk blocks to dirty-but-not-yet-mapped file blocks as long as
+ * possible.  This reduces fragmentation by enabling the filesystem to ask
+ * for bigger chunks less often, which is exactly what we want for CoW.
+ *
+ * The delalloc mechanism begins when the kernel wants to make a block
+ * writable (write_begin or page_mkwrite).  If the offset is not mapped, we
+ * create a delalloc mapping, which is a regular in-core extent, but without
+ * a real startblock.  (For delalloc mappings, the startblock encodes both
+ * a flag that this is a delalloc mapping, and a worst-case estimate of how
+ * many blocks might be required to put the mapping into the BMBT.)  delalloc
+ * mappings are a reservation against the free space in the filesystem;
+ * adjacent mappings can also be combined into fewer larger mappings.
+ *
+ * When dirty pages are being written out (typically in writepage), the
+ * delalloc reservations are converted into real mappings by allocating
+ * blocks and replacing the delalloc mapping with real ones.  A delalloc
+ * mapping can be replaced by several real ones if the free space is
+ * fragmented.
+ *
+ * We want to adapt the delalloc mechanism for copy-on-write, since the
+ * write paths are similar.  The first two steps (creating the reservation
+ * and allocating the blocks) are exactly the same as delalloc except that
+ * the mappings must be stored in a separate CoW fork because we do not want
+ * to disturb the mapping in the data fork until we're sure that the write
+ * succeeded.  IO completion in this case is the process of removing the old
+ * mapping from the data fork and moving the new mapping from the CoW fork to
+ * the data fork.  This will be discussed shortly.
+ *
+ * For now, unaligned directio writes will be bounced back to the page cache.
+ * Block-aligned directio writes will use the same mechanism as buffered
+ * writes.
+ *
+ * CoW remapping must be done after the data block write completes,
+ * because we don't want to destroy the old data fork map until we're sure
+ * the new block has been written.  Since the new mappings are kept in a
+ * separate fork, we can simply iterate these mappings to find the ones
+ * that cover the file blocks that we just CoW'd.  For each extent, simply
+ * unmap the corresponding range in the data fork, map the new range into
+ * the data fork, and remove the extent from the CoW fork.
+ *
+ * Since the remapping operation can be applied to an arbitrary file
+ * range, we record the need for the remap step as a flag in the ioend
+ * instead of declaring a new IO type.  This is required for direct io
+ * because we only have ioend for the whole dio, and we have to be able to
+ * remember the presence of unwritten blocks and CoW blocks with a single
+ * ioend structure.  Better yet, the more ground we can cover with one
+ * ioend, the better.
+ */
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
new file mode 100644
index 0000000..820b151
--- /dev/null
+++ b/fs/xfs/xfs_reflink.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __XFS_REFLINK_H
+#define __XFS_REFLINK_H 1
+
+#endif /* __XFS_REFLINK_H */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5403199..883c375 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -269,10 +269,10 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
 		__field(unsigned long, caller_ip)
 	),
 	TP_fast_assign(
-		struct xfs_ifork	*ifp = (state & BMAP_ATTRFORK) ?
-						ip->i_afp : &ip->i_df;
+		struct xfs_ifork	*ifp;
 		struct xfs_bmbt_irec	r;
 
+		ifp = XFS_IEXT_STATE_TO_FORK(ip, state);
 		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
 		__entry->ino = ip->i_ino;


  parent reply	other threads:[~2016-09-30  3:08 UTC|newest]

Thread overview: 187+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-30  3:05 [PATCH v10 00/63] xfs: add reflink and dedupe support Darrick J. Wong
2016-09-30  3:05 ` [PATCH 01/63] vfs: support FS_XFLAG_COWEXTSIZE and get/set of CoW extent size hint Darrick J. Wong
2016-09-30  3:05 ` [PATCH 02/63] vfs: add a FALLOC_FL_UNSHARE mode to fallocate to unshare a range of blocks Darrick J. Wong
2016-09-30  7:08   ` Christoph Hellwig
2016-09-30  3:05 ` [PATCH 03/63] xfs: return an error when an inline directory is too small Darrick J. Wong
2016-09-30  3:06 ` [PATCH 04/63] xfs: define tracepoints for refcount btree activities Darrick J. Wong
2016-09-30  3:06 ` [PATCH 05/63] xfs: introduce refcount btree definitions Darrick J. Wong
2016-09-30  3:06 ` [PATCH 06/63] xfs: refcount btree add more reserved blocks Darrick J. Wong
2016-09-30  3:06 ` [PATCH 07/63] xfs: define the on-disk refcount btree format Darrick J. Wong
2016-09-30  3:06 ` [PATCH 08/63] xfs: add refcount btree support to growfs Darrick J. Wong
2016-09-30  3:06 ` [PATCH 09/63] xfs: account for the refcount btree in the alloc/free log reservation Darrick J. Wong
2016-09-30  3:06 ` [PATCH 10/63] xfs: add refcount btree operations Darrick J. Wong
2016-09-30  3:06 ` [PATCH 11/63] xfs: create refcount update intent log items Darrick J. Wong
2016-09-30  3:06 ` [PATCH 12/63] xfs: log refcount intent items Darrick J. Wong
2016-09-30  3:06 ` [PATCH 13/63] xfs: adjust refcount of an extent of blocks in refcount btree Darrick J. Wong
2016-09-30  7:11   ` Christoph Hellwig
2016-09-30 17:53     ` Darrick J. Wong
2016-09-30  3:07 ` [PATCH 14/63] xfs: connect refcount adjust functions to upper layers Darrick J. Wong
2016-09-30  7:13   ` Christoph Hellwig
2016-09-30 16:21   ` Brian Foster
2016-09-30 19:40     ` Darrick J. Wong
2016-09-30 20:11       ` Brian Foster
2016-09-30  3:07 ` [PATCH 15/63] xfs: adjust refcount when unmapping file blocks Darrick J. Wong
2016-09-30  7:14   ` Christoph Hellwig
2016-09-30  3:07 ` [PATCH 16/63] xfs: add refcount btree block detection to log recovery Darrick J. Wong
2016-09-30  7:15   ` Christoph Hellwig
2016-09-30  3:07 ` [PATCH 17/63] xfs: refcount btree requires more reserved space Darrick J. Wong
2016-09-30  7:15   ` Christoph Hellwig
2016-09-30 16:46   ` Brian Foster
2016-09-30 18:41     ` Darrick J. Wong
2016-09-30  3:07 ` [PATCH 18/63] xfs: introduce reflink utility functions Darrick J. Wong
2016-09-30  3:07   ` Darrick J. Wong
2016-09-30  7:16   ` Christoph Hellwig
2016-09-30 19:22   ` Brian Foster
2016-09-30 19:50     ` Darrick J. Wong
2016-09-30  3:07 ` [PATCH 19/63] xfs: create bmbt update intent log items Darrick J. Wong
2016-09-30  7:24   ` Christoph Hellwig
2016-09-30 17:24     ` Darrick J. Wong
2016-09-30  3:07 ` [PATCH 20/63] xfs: log bmap intent items Darrick J. Wong
2016-09-30  7:26   ` Christoph Hellwig
2016-09-30 17:26     ` Darrick J. Wong
2016-09-30 19:22   ` Brian Foster
2016-09-30 19:52     ` Darrick J. Wong
2016-09-30  3:07 ` [PATCH 21/63] xfs: map an inode's offset to an exact physical block Darrick J. Wong
2016-09-30  7:31   ` Christoph Hellwig
2016-09-30 17:30     ` Darrick J. Wong
2016-10-03 19:03   ` Brian Foster
2016-10-04  0:11     ` Darrick J. Wong
2016-10-04 12:43       ` Brian Foster
2016-10-04 17:28         ` Darrick J. Wong
2016-09-30  3:08 ` [PATCH 22/63] xfs: pass bmapi flags through to bmap_del_extent Darrick J. Wong
2016-09-30  7:16   ` Christoph Hellwig
2016-09-30  3:08 ` [PATCH 23/63] xfs: implement deferred bmbt map/unmap operations Darrick J. Wong
2016-09-30  7:34   ` Christoph Hellwig
2016-09-30 17:38     ` Darrick J. Wong
2016-09-30 20:34       ` Roger Willcocks
2016-09-30 21:08         ` Darrick J. Wong
2016-09-30  3:08 ` [PATCH 24/63] xfs: when replaying bmap operations, don't let unlinked inodes get reaped Darrick J. Wong
2016-09-30  7:35   ` Christoph Hellwig
2016-10-03 19:04   ` Brian Foster
2016-10-04  0:29     ` Darrick J. Wong
2016-10-04 12:44       ` Brian Foster
2016-10-04 19:07         ` Dave Chinner
2016-10-04 21:44           ` Darrick J. Wong
2016-09-30  3:08 ` [PATCH 25/63] xfs: return work remaining at the end of a bunmapi operation Darrick J. Wong
2016-09-30  7:19   ` Christoph Hellwig
2016-10-03 19:04   ` Brian Foster
2016-10-04  0:30     ` Darrick J. Wong
2016-10-04 12:44       ` Brian Foster
2016-09-30  3:08 ` [PATCH 26/63] xfs: define tracepoints for reflink activities Darrick J. Wong
2016-09-30  7:20   ` Christoph Hellwig
2016-09-30  3:08 ` [PATCH 27/63] xfs: add reflink feature flag to geometry Darrick J. Wong
2016-09-30  7:20   ` Christoph Hellwig
2016-09-30  3:08 ` [PATCH 28/63] xfs: don't allow reflinked dir/dev/fifo/socket/pipe files Darrick J. Wong
2016-09-30  7:20   ` Christoph Hellwig
2016-09-30  3:08 ` Darrick J. Wong [this message]
2016-09-30  7:39   ` [PATCH 29/63] xfs: introduce the CoW fork Christoph Hellwig
2016-09-30 17:48     ` Darrick J. Wong
2016-09-30  3:08 ` [PATCH 30/63] xfs: support bmapping delalloc extents in " Darrick J. Wong
2016-09-30  7:42   ` Christoph Hellwig
2016-09-30  3:09 ` [PATCH 31/63] xfs: create delalloc extents in " Darrick J. Wong
2016-10-04 16:38   ` Brian Foster
2016-10-04 17:39     ` Darrick J. Wong
2016-10-04 18:38       ` Brian Foster
2016-09-30  3:09 ` [PATCH 32/63] xfs: support allocating delayed " Darrick J. Wong
2016-09-30  7:42   ` Christoph Hellwig
2016-10-04 16:38   ` Brian Foster
2016-09-30  3:09 ` [PATCH 33/63] xfs: allocate " Darrick J. Wong
2016-10-04 16:38   ` Brian Foster
2016-10-04 18:26     ` Darrick J. Wong
2016-10-04 18:39       ` Brian Foster
2016-09-30  3:09 ` [PATCH 34/63] xfs: support removing extents from " Darrick J. Wong
2016-09-30  7:46   ` Christoph Hellwig
2016-09-30 18:00     ` Darrick J. Wong
2016-10-05 18:26   ` Brian Foster
2016-09-30  3:09 ` [PATCH 35/63] xfs: move mappings from cow fork to data fork after copy-write Darrick J. Wong
2016-10-05 18:26   ` Brian Foster
2016-10-05 21:22     ` Darrick J. Wong
2016-09-30  3:09 ` [PATCH 36/63] xfs: report shared extent mappings to userspace correctly Darrick J. Wong
2016-09-30  3:09 ` [PATCH 37/63] xfs: implement CoW for directio writes Darrick J. Wong
2016-10-05 18:27   ` Brian Foster
2016-10-05 20:55     ` Darrick J. Wong
2016-10-06 12:20       ` Brian Foster
2016-10-07  1:02         ` Darrick J. Wong
2016-10-07  6:17           ` Christoph Hellwig
2016-10-07 12:16             ` Brian Foster
2016-10-07 12:15           ` Brian Foster
2016-10-13 18:14             ` Darrick J. Wong
2016-10-13 19:01               ` Brian Foster
2016-09-30  3:09 ` [PATCH 38/63] xfs: cancel CoW reservations and clear inode reflink flag when freeing blocks Darrick J. Wong
2016-09-30  7:47   ` Christoph Hellwig
2016-10-06 16:44   ` Brian Foster
2016-10-07  0:40     ` Darrick J. Wong
2016-09-30  3:09 ` [PATCH 39/63] xfs: cancel pending CoW reservations when destroying inodes Darrick J. Wong
2016-09-30  7:47   ` Christoph Hellwig
2016-10-06 16:44   ` Brian Foster
2016-10-07  0:42     ` Darrick J. Wong
2016-09-30  3:09 ` [PATCH 40/63] xfs: store in-progress CoW allocations in the refcount btree Darrick J. Wong
2016-09-30  7:49   ` Christoph Hellwig
2016-10-07 18:04   ` Brian Foster
2016-10-07 19:18     ` Darrick J. Wong
2016-09-30  3:10 ` [PATCH 41/63] xfs: reflink extents from one file to another Darrick J. Wong
2016-09-30  7:50   ` Christoph Hellwig
2016-10-07 18:04   ` Brian Foster
2016-10-07 19:44     ` Darrick J. Wong
2016-10-07 20:48       ` Brian Foster
2016-10-07 21:41         ` Darrick J. Wong
2016-10-10 13:17           ` Brian Foster
2016-09-30  3:10 ` [PATCH 42/63] xfs: add clone file and clone range vfs functions Darrick J. Wong
2016-09-30  7:51   ` Christoph Hellwig
2016-09-30 18:04     ` Darrick J. Wong
2016-10-07 18:04   ` Brian Foster
2016-10-07 20:31     ` Darrick J. Wong
2016-09-30  3:10 ` [PATCH 43/63] xfs: add dedupe range vfs function Darrick J. Wong
2016-09-30  7:53   ` Christoph Hellwig
2016-09-30  3:10 ` [PATCH 44/63] xfs: teach get_bmapx about shared extents and the CoW fork Darrick J. Wong
2016-09-30  7:53   ` Christoph Hellwig
2016-09-30  3:10 ` [PATCH 45/63] xfs: swap inode reflink flags when swapping inode extents Darrick J. Wong
2016-09-30  7:54   ` Christoph Hellwig
2016-09-30  3:10 ` [PATCH 46/63] xfs: unshare a range of blocks via fallocate Darrick J. Wong
2016-09-30  7:54   ` Christoph Hellwig
2016-10-07 18:05   ` Brian Foster
2016-10-07 20:26     ` Darrick J. Wong
2016-10-07 20:58       ` Brian Foster
2016-10-07 21:15         ` Darrick J. Wong
2016-10-07 22:25           ` Dave Chinner
2016-10-10 17:05             ` Darrick J. Wong
2016-09-30  3:10 ` [PATCH 47/63] xfs: create a separate cow extent size hint for the allocator Darrick J. Wong
2016-09-30  7:55   ` Christoph Hellwig
2016-09-30  3:10 ` [PATCH 48/63] xfs: preallocate blocks for worst-case btree expansion Darrick J. Wong
2016-09-30  8:19   ` Christoph Hellwig
2016-10-12 18:44   ` Brian Foster
2016-10-12 20:52     ` Darrick J. Wong
2016-10-12 22:42       ` Brian Foster
2016-12-06 19:32         ` Darrick J. Wong
2016-12-07 11:53           ` Brian Foster
2016-12-08  6:14             ` Darrick J. Wong
2016-09-30  3:10 ` [PATCH 49/63] xfs: don't allow reflink when the AG is low on space Darrick J. Wong
2016-09-30  8:19   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 50/63] xfs: try other AGs to allocate a BMBT block Darrick J. Wong
2016-09-30  8:20   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 51/63] xfs: garbage collect old cowextsz reservations Darrick J. Wong
2016-09-30  8:23   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 52/63] xfs: increase log reservations for reflink Darrick J. Wong
2016-09-30  8:23   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 53/63] xfs: add shared rmap map/unmap/convert log item types Darrick J. Wong
2016-09-30  8:24   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 54/63] xfs: use interval query for rmap alloc operations on shared files Darrick J. Wong
2016-09-30  8:24   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 55/63] xfs: convert unwritten status of reverse mappings for " Darrick J. Wong
2016-09-30  8:25   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 56/63] xfs: set a default CoW extent size of 32 blocks Darrick J. Wong
2016-09-30  8:25   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 57/63] xfs: check for invalid inode reflink flags Darrick J. Wong
2016-09-30  8:26   ` Christoph Hellwig
2016-09-30  3:11 ` [PATCH 58/63] xfs: don't mix reflink and DAX mode for now Darrick J. Wong
2016-09-30  8:26   ` Christoph Hellwig
2016-09-30  3:12 ` [PATCH 59/63] xfs: simulate per-AG reservations being critically low Darrick J. Wong
2016-09-30  8:27   ` Christoph Hellwig
2016-09-30  3:12 ` [PATCH 60/63] xfs: recognize the reflink feature bit Darrick J. Wong
2016-09-30  8:27   ` Christoph Hellwig
2016-09-30  3:12 ` [PATCH 61/63] xfs: various swapext cleanups Darrick J. Wong
2016-09-30  8:28   ` Christoph Hellwig
2016-09-30  3:12 ` [PATCH 62/63] xfs: refactor swapext code Darrick J. Wong
2016-09-30  8:28   ` Christoph Hellwig
2016-09-30  3:12 ` [PATCH 63/63] xfs: implement swapext for rmap filesystems Darrick J. Wong
2016-09-30  9:00 ` [PATCH v10 00/63] xfs: add reflink and dedupe support Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=147520492856.29434.17061076591831860945.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.