All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 00/28] xfs: refactor log recovery
@ 2020-05-05  1:10 Darrick J. Wong
  2020-05-05  1:10 ` [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item Darrick J. Wong
                   ` (27 more replies)
  0 siblings, 28 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:10 UTC (permalink / raw)
  To: darrick.wong; +Cc: Christoph Hellwig, Christoph Hellwig, linux-xfs

Hi all,

This series refactors log recovery by moving recovery code for each log
item type into the source code for the rest of that log item type and
using dispatch function pointers to virtualize the interactions.  This
dramatically reduces the amount of code in xfs_log_recover.c and
increases cohesion throughout the log code.

In this second version, we dispense with the extra indirection for log
intent items.  During log recovery pass 2, committing of the recovered
intent and intent-done items is done directly by creating
xlog_recover_item_types for all intent types.  The recovery functions
that do the work are now called directly through the xfs_log_item ops
structure.  Recovery item sorting is less intrusive, and the buffer and
inode recovery code are in separate files now.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=refactor-log-recovery

^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
@ 2020-05-05  1:10 ` Darrick J. Wong
  2020-05-05  3:33   ` Chandan Babu R
  2020-05-06 14:59   ` Christoph Hellwig
  2020-05-05  1:10 ` [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure Darrick J. Wong
                   ` (26 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:10 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Remove the old typedefs.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_log_recover.h |    4 ++--
 fs/xfs/xfs_log_recover.c        |   26 ++++++++++++++------------
 2 files changed, 16 insertions(+), 14 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 3bf671637a91..148e0cb5d379 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -22,13 +22,13 @@
 /*
  * item headers are in ri_buf[0].  Additional buffers follow.
  */
-typedef struct xlog_recover_item {
+struct xlog_recover_item {
 	struct list_head	ri_list;
 	int			ri_type;
 	int			ri_cnt;	/* count of regions found */
 	int			ri_total;	/* total regions */
 	xfs_log_iovec_t		*ri_buf;	/* ptr to regions buffer */
-} xlog_recover_item_t;
+};
 
 struct xlog_recover {
 	struct hlist_node	r_list;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d0e2dd81de53..c2c06f70fb8a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1841,7 +1841,7 @@ xlog_recover_reorder_trans(
 	struct xlog_recover	*trans,
 	int			pass)
 {
-	xlog_recover_item_t	*item, *n;
+	struct xlog_recover_item *item, *n;
 	int			error = 0;
 	LIST_HEAD(sort_list);
 	LIST_HEAD(cancel_list);
@@ -2056,7 +2056,7 @@ xlog_recover_buffer_pass1(
 STATIC int
 xlog_recover_do_inode_buffer(
 	struct xfs_mount	*mp,
-	xlog_recover_item_t	*item,
+	struct xlog_recover_item *item,
 	struct xfs_buf		*bp,
 	xfs_buf_log_format_t	*buf_f)
 {
@@ -2561,7 +2561,7 @@ xlog_recover_validate_buf_type(
 STATIC void
 xlog_recover_do_reg_buffer(
 	struct xfs_mount	*mp,
-	xlog_recover_item_t	*item,
+	struct xlog_recover_item *item,
 	struct xfs_buf		*bp,
 	xfs_buf_log_format_t	*buf_f,
 	xfs_lsn_t		current_lsn)
@@ -3759,7 +3759,7 @@ STATIC int
 xlog_recover_do_icreate_pass2(
 	struct xlog		*log,
 	struct list_head	*buffer_list,
-	xlog_recover_item_t	*item)
+	struct xlog_recover_item *item)
 {
 	struct xfs_mount	*mp = log->l_mp;
 	struct xfs_icreate_log	*icl;
@@ -4134,9 +4134,9 @@ STATIC void
 xlog_recover_add_item(
 	struct list_head	*head)
 {
-	xlog_recover_item_t	*item;
+	struct xlog_recover_item *item;
 
-	item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
+	item = kmem_zalloc(sizeof(struct xlog_recover_item), 0);
 	INIT_LIST_HEAD(&item->ri_list);
 	list_add_tail(&item->ri_list, head);
 }
@@ -4148,7 +4148,7 @@ xlog_recover_add_to_cont_trans(
 	char			*dp,
 	int			len)
 {
-	xlog_recover_item_t	*item;
+	struct xlog_recover_item *item;
 	char			*ptr, *old_ptr;
 	int			old_len;
 
@@ -4171,7 +4171,8 @@ xlog_recover_add_to_cont_trans(
 	}
 
 	/* take the tail entry */
-	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
+			  ri_list);
 
 	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
 	old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -4205,7 +4206,7 @@ xlog_recover_add_to_trans(
 	int			len)
 {
 	struct xfs_inode_log_format	*in_f;			/* any will do */
-	xlog_recover_item_t	*item;
+	struct xlog_recover_item *item;
 	char			*ptr;
 
 	if (!len)
@@ -4241,13 +4242,14 @@ xlog_recover_add_to_trans(
 	in_f = (struct xfs_inode_log_format *)ptr;
 
 	/* take the tail entry */
-	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
+			  ri_list);
 	if (item->ri_total != 0 &&
 	     item->ri_total == item->ri_cnt) {
 		/* tail item is in use, get a new one */
 		xlog_recover_add_item(&trans->r_itemq);
 		item = list_entry(trans->r_itemq.prev,
-					xlog_recover_item_t, ri_list);
+					struct xlog_recover_item, ri_list);
 	}
 
 	if (item->ri_total == 0) {		/* first region to be added */
@@ -4293,7 +4295,7 @@ STATIC void
 xlog_recover_free_trans(
 	struct xlog_recover	*trans)
 {
-	xlog_recover_item_t	*item, *n;
+	struct xlog_recover_item *item, *n;
 	int			i;
 
 	hlist_del_init(&trans->r_list);


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
  2020-05-05  1:10 ` [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item Darrick J. Wong
@ 2020-05-05  1:10 ` Darrick J. Wong
  2020-05-05  4:11   ` Chandan Babu R
  2020-05-06 15:03   ` Christoph Hellwig
  2020-05-05  1:10 ` [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions Darrick J. Wong
                   ` (25 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:10 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Create a generic dispatch structure to delegate recovery of different
log item types into various code modules.  This will enable us to move
code specific to a particular log item type out of xfs_log_recover.c and
into the log item source.

The first operation we virtualize is the log item sorting.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Makefile                 |    3 +
 fs/xfs/libxfs/xfs_log_recover.h |   45 ++++++++++++++++++-
 fs/xfs/xfs_bmap_item.c          |    9 ++++
 fs/xfs/xfs_buf_item_recover.c   |   38 ++++++++++++++++
 fs/xfs/xfs_dquot_item_recover.c |   29 ++++++++++++
 fs/xfs/xfs_extfree_item.c       |    9 ++++
 fs/xfs/xfs_icreate_item.c       |   20 ++++++++
 fs/xfs/xfs_inode_item_recover.c |   26 +++++++++++
 fs/xfs/xfs_log_recover.c        |   93 +++++++++++++++++++++++----------------
 fs/xfs/xfs_refcount_item.c      |    9 ++++
 fs/xfs/xfs_rmap_item.c          |    9 ++++
 11 files changed, 251 insertions(+), 39 deletions(-)
 create mode 100644 fs/xfs/xfs_buf_item_recover.c
 create mode 100644 fs/xfs/xfs_dquot_item_recover.c
 create mode 100644 fs/xfs/xfs_inode_item_recover.c


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ff94fb90a2ee..04611a1068b4 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -99,9 +99,12 @@ xfs-y				+= xfs_log.o \
 				   xfs_log_cil.o \
 				   xfs_bmap_item.o \
 				   xfs_buf_item.o \
+				   xfs_buf_item_recover.o \
+				   xfs_dquot_item_recover.o \
 				   xfs_extfree_item.o \
 				   xfs_icreate_item.o \
 				   xfs_inode_item.o \
+				   xfs_inode_item_recover.o \
 				   xfs_refcount_item.o \
 				   xfs_rmap_item.o \
 				   xfs_log_recover.o \
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 148e0cb5d379..271b0741f1e1 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -6,6 +6,47 @@
 #ifndef	__XFS_LOG_RECOVER_H__
 #define __XFS_LOG_RECOVER_H__
 
+/*
+ * Each log item type (XFS_LI_*) gets its own xlog_recover_item_ops to
+ * define how recovery should work for that type of log item.
+ */
+struct xlog_recover_item;
+
+/* Sorting hat for log items as they're read in. */
+enum xlog_recover_reorder {
+	XLOG_REORDER_BUFFER_LIST,
+	XLOG_REORDER_ITEM_LIST,
+	XLOG_REORDER_INODE_BUFFER_LIST,
+	XLOG_REORDER_CANCEL_LIST,
+};
+
+struct xlog_recover_item_ops {
+	uint16_t	item_type;	/* XFS_LI_* type code. */
+
+	/*
+	 * Help sort recovered log items into the order required to replay them
+	 * correctly.  Log item types that always use XLOG_REORDER_ITEM_LIST do
+	 * not have to supply a function here.  See the comment preceding
+	 * xlog_recover_reorder_trans for more details about what the return
+	 * values mean.
+	 */
+	enum xlog_recover_reorder (*reorder)(struct xlog_recover_item *item);
+};
+
+extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
+extern const struct xlog_recover_item_ops xlog_buf_item_ops;
+extern const struct xlog_recover_item_ops xlog_inode_item_ops;
+extern const struct xlog_recover_item_ops xlog_dquot_item_ops;
+extern const struct xlog_recover_item_ops xlog_quotaoff_item_ops;
+extern const struct xlog_recover_item_ops xlog_bmap_intent_item_ops;
+extern const struct xlog_recover_item_ops xlog_bmap_done_item_ops;
+extern const struct xlog_recover_item_ops xlog_extfree_intent_item_ops;
+extern const struct xlog_recover_item_ops xlog_extfree_done_item_ops;
+extern const struct xlog_recover_item_ops xlog_rmap_intent_item_ops;
+extern const struct xlog_recover_item_ops xlog_rmap_done_item_ops;
+extern const struct xlog_recover_item_ops xlog_refcount_intent_item_ops;
+extern const struct xlog_recover_item_ops xlog_refcount_done_item_ops;
+
 /*
  * Macros, structures, prototypes for internal log manager use.
  */
@@ -24,10 +65,10 @@
  */
 struct xlog_recover_item {
 	struct list_head	ri_list;
-	int			ri_type;
 	int			ri_cnt;	/* count of regions found */
 	int			ri_total;	/* total regions */
-	xfs_log_iovec_t		*ri_buf;	/* ptr to regions buffer */
+	struct xfs_log_iovec	*ri_buf;	/* ptr to regions buffer */
+	const struct xlog_recover_item_ops *ri_ops;
 };
 
 struct xlog_recover {
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 7768fb2b7135..42354403fec7 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -22,6 +22,7 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_trans_space.h"
 #include "xfs_error.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_bui_zone;
 kmem_zone_t	*xfs_bud_zone;
@@ -557,3 +558,11 @@ xfs_bui_recover(
 	}
 	return error;
 }
+
+const struct xlog_recover_item_ops xlog_bmap_intent_item_ops = {
+	.item_type		= XFS_LI_BUI,
+};
+
+const struct xlog_recover_item_ops xlog_bmap_done_item_ops = {
+	.item_type		= XFS_LI_BUD,
+};
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
new file mode 100644
index 000000000000..def19025512e
--- /dev/null
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_mount.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_trace.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+
+STATIC enum xlog_recover_reorder
+xlog_recover_buf_reorder(
+	struct xlog_recover_item	*item)
+{
+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
+
+	if (buf_f->blf_flags & XFS_BLF_CANCEL)
+		return XLOG_REORDER_CANCEL_LIST;
+	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
+		return XLOG_REORDER_INODE_BUFFER_LIST;
+	return XLOG_REORDER_BUFFER_LIST;
+}
+
+const struct xlog_recover_item_ops xlog_buf_item_ops = {
+	.item_type		= XFS_LI_BUF,
+	.reorder		= xlog_recover_buf_reorder,
+};
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
new file mode 100644
index 000000000000..78fe644e9907
--- /dev/null
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+
+const struct xlog_recover_item_ops xlog_dquot_item_ops = {
+	.item_type		= XFS_LI_DQUOT,
+};
+
+const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
+	.item_type		= XFS_LI_QUOTAOFF,
+};
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index c8cde4122a0f..b43bb087aef3 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -22,6 +22,7 @@
 #include "xfs_bmap.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_efi_zone;
 kmem_zone_t	*xfs_efd_zone;
@@ -644,3 +645,11 @@ xfs_efi_recover(
 	xfs_trans_cancel(tp);
 	return error;
 }
+
+const struct xlog_recover_item_ops xlog_extfree_intent_item_ops = {
+	.item_type		= XFS_LI_EFI,
+};
+
+const struct xlog_recover_item_ops xlog_extfree_done_item_ops = {
+	.item_type		= XFS_LI_EFD,
+};
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 490fee22b878..366c1e722a29 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -11,6 +11,8 @@
 #include "xfs_trans_priv.h"
 #include "xfs_icreate_item.h"
 #include "xfs_log.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */
 
@@ -107,3 +109,21 @@ xfs_icreate_log(
 	tp->t_flags |= XFS_TRANS_DIRTY;
 	set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags);
 }
+
+static enum xlog_recover_reorder
+xlog_recover_icreate_reorder(
+		struct xlog_recover_item *item)
+{
+	/*
+	 * Inode allocation buffers must be replayed before subsequent inode
+	 * items try to modify those buffers.  ICREATE items are the logical
+	 * equivalent of logging a newly initialized inode buffer, so recover
+	 * these at the same time that we recover logged buffers.
+	 */
+	return XLOG_REORDER_BUFFER_LIST;
+}
+
+const struct xlog_recover_item_ops xlog_icreate_item_ops = {
+	.item_type		= XFS_LI_ICREATE,
+	.reorder		= xlog_recover_icreate_reorder,
+};
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
new file mode 100644
index 000000000000..b19a151efb10
--- /dev/null
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+#include "xfs_trans_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_log.h"
+#include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+
+const struct xlog_recover_item_ops xlog_inode_item_ops = {
+	.item_type		= XFS_LI_INODE,
+};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c2c06f70fb8a..0ef0d81fd190 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1785,6 +1785,34 @@ xlog_clear_stale_blocks(
  *
  ******************************************************************************
  */
+static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
+	&xlog_buf_item_ops,
+	&xlog_inode_item_ops,
+	&xlog_dquot_item_ops,
+	&xlog_quotaoff_item_ops,
+	&xlog_icreate_item_ops,
+	&xlog_extfree_intent_item_ops,
+	&xlog_extfree_done_item_ops,
+	&xlog_rmap_intent_item_ops,
+	&xlog_rmap_done_item_ops,
+	&xlog_refcount_intent_item_ops,
+	&xlog_refcount_done_item_ops,
+	&xlog_bmap_intent_item_ops,
+	&xlog_bmap_done_item_ops,
+};
+
+static const struct xlog_recover_item_ops *
+xlog_find_item_ops(
+	struct xlog_recover_item		*item)
+{
+	unsigned int				i;
+
+	for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++)
+		if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type)
+			return xlog_recover_item_ops[i];
+
+	return NULL;
+}
 
 /*
  * Sort the log items in the transaction.
@@ -1851,41 +1879,10 @@ xlog_recover_reorder_trans(
 
 	list_splice_init(&trans->r_itemq, &sort_list);
 	list_for_each_entry_safe(item, n, &sort_list, ri_list) {
-		xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
+		enum xlog_recover_reorder	fate = XLOG_REORDER_ITEM_LIST;
 
-		switch (ITEM_TYPE(item)) {
-		case XFS_LI_ICREATE:
-			list_move_tail(&item->ri_list, &buffer_list);
-			break;
-		case XFS_LI_BUF:
-			if (buf_f->blf_flags & XFS_BLF_CANCEL) {
-				trace_xfs_log_recover_item_reorder_head(log,
-							trans, item, pass);
-				list_move(&item->ri_list, &cancel_list);
-				break;
-			}
-			if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
-				list_move(&item->ri_list, &inode_buffer_list);
-				break;
-			}
-			list_move_tail(&item->ri_list, &buffer_list);
-			break;
-		case XFS_LI_INODE:
-		case XFS_LI_DQUOT:
-		case XFS_LI_QUOTAOFF:
-		case XFS_LI_EFD:
-		case XFS_LI_EFI:
-		case XFS_LI_RUI:
-		case XFS_LI_RUD:
-		case XFS_LI_CUI:
-		case XFS_LI_CUD:
-		case XFS_LI_BUI:
-		case XFS_LI_BUD:
-			trace_xfs_log_recover_item_reorder_tail(log,
-							trans, item, pass);
-			list_move_tail(&item->ri_list, &item_list);
-			break;
-		default:
+		item->ri_ops = xlog_find_item_ops(item);
+		if (!item->ri_ops) {
 			xfs_warn(log->l_mp,
 				"%s: unrecognized type of log operation (%d)",
 				__func__, ITEM_TYPE(item));
@@ -1896,11 +1893,33 @@ xlog_recover_reorder_trans(
 			 */
 			if (!list_empty(&sort_list))
 				list_splice_init(&sort_list, &trans->r_itemq);
-			error = -EIO;
-			goto out;
+			error = -EFSCORRUPTED;
+			break;
+		}
+
+		if (item->ri_ops->reorder)
+			fate = item->ri_ops->reorder(item);
+
+		switch (fate) {
+		case XLOG_REORDER_BUFFER_LIST:
+			list_move_tail(&item->ri_list, &buffer_list);
+			break;
+		case XLOG_REORDER_CANCEL_LIST:
+			trace_xfs_log_recover_item_reorder_head(log,
+					trans, item, pass);
+			list_move(&item->ri_list, &cancel_list);
+			break;
+		case XLOG_REORDER_INODE_BUFFER_LIST:
+			list_move(&item->ri_list, &inode_buffer_list);
+			break;
+		case XLOG_REORDER_ITEM_LIST:
+			trace_xfs_log_recover_item_reorder_tail(log,
+							trans, item, pass);
+			list_move_tail(&item->ri_list, &item_list);
+			break;
 		}
 	}
-out:
+
 	ASSERT(list_empty(&sort_list));
 	if (!list_empty(&buffer_list))
 		list_splice(&buffer_list, &trans->r_itemq);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 0316eab2fc35..0e8e8bab4344 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -18,6 +18,7 @@
 #include "xfs_log.h"
 #include "xfs_refcount.h"
 #include "xfs_error.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_cui_zone;
 kmem_zone_t	*xfs_cud_zone;
@@ -570,3 +571,11 @@ xfs_cui_recover(
 	xfs_trans_cancel(tp);
 	return error;
 }
+
+const struct xlog_recover_item_ops xlog_refcount_intent_item_ops = {
+	.item_type		= XFS_LI_CUI,
+};
+
+const struct xlog_recover_item_ops xlog_refcount_done_item_ops = {
+	.item_type		= XFS_LI_CUD,
+};
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index e3bba2aec868..3eb538674cb9 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -18,6 +18,7 @@
 #include "xfs_log.h"
 #include "xfs_rmap.h"
 #include "xfs_error.h"
+#include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_rui_zone;
 kmem_zone_t	*xfs_rud_zone;
@@ -585,3 +586,11 @@ xfs_rui_recover(
 	xfs_trans_cancel(tp);
 	return error;
 }
+
+const struct xlog_recover_item_ops xlog_rmap_intent_item_ops = {
+	.item_type		= XFS_LI_RUI,
+};
+
+const struct xlog_recover_item_ops xlog_rmap_done_item_ops = {
+	.item_type		= XFS_LI_RUD,
+};


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
  2020-05-05  1:10 ` [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item Darrick J. Wong
  2020-05-05  1:10 ` [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure Darrick J. Wong
@ 2020-05-05  1:10 ` Darrick J. Wong
  2020-05-05  4:32   ` Chandan Babu R
  2020-05-06 15:04   ` Christoph Hellwig
  2020-05-05  1:10 ` [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions Darrick J. Wong
                   ` (24 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:10 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the pass2 readhead code into the per-item source code files and use
the dispatch function to call them.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_log_recover.h |    6 ++
 fs/xfs/xfs_buf_item_recover.c   |   11 +++++
 fs/xfs/xfs_dquot_item_recover.c |   34 ++++++++++++++
 fs/xfs/xfs_inode_item_recover.c |   19 ++++++++
 fs/xfs/xfs_log_recover.c        |   95 +--------------------------------------
 5 files changed, 73 insertions(+), 92 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 271b0741f1e1..ff80871138bb 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -31,6 +31,9 @@ struct xlog_recover_item_ops {
 	 * values mean.
 	 */
 	enum xlog_recover_reorder (*reorder)(struct xlog_recover_item *item);
+
+	/* Start readahead for pass2, if provided. */
+	void (*ra_pass2)(struct xlog *log, struct xlog_recover_item *item);
 };
 
 extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
@@ -92,4 +95,7 @@ struct xlog_recover {
 #define	XLOG_RECOVER_PASS1	1
 #define	XLOG_RECOVER_PASS2	2
 
+void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
+		const struct xfs_buf_ops *ops);
+
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index def19025512e..a1327196b690 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -32,7 +32,18 @@ xlog_recover_buf_reorder(
 	return XLOG_REORDER_BUFFER_LIST;
 }
 
+STATIC void
+xlog_recover_buf_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
+
+	xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
+}
+
 const struct xlog_recover_item_ops xlog_buf_item_ops = {
 	.item_type		= XFS_LI_BUF,
 	.reorder		= xlog_recover_buf_reorder,
+	.ra_pass2		= xlog_recover_buf_ra_pass2,
 };
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index 78fe644e9907..215274173b70 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -20,8 +20,42 @@
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
 
+STATIC void
+xlog_recover_dquot_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	struct xfs_mount	*mp = log->l_mp;
+	struct xfs_disk_dquot	*recddq;
+	struct xfs_dq_logformat	*dq_f;
+	uint			type;
+
+	if (mp->m_qflags == 0)
+		return;
+
+	recddq = item->ri_buf[1].i_addr;
+	if (recddq == NULL)
+		return;
+	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
+		return;
+
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	ASSERT(type);
+	if (log->l_quotaoffs_flag & type)
+		return;
+
+	dq_f = item->ri_buf[0].i_addr;
+	ASSERT(dq_f);
+	ASSERT(dq_f->qlf_len == 1);
+
+	xlog_buf_readahead(log, dq_f->qlf_blkno,
+			XFS_FSB_TO_BB(mp, dq_f->qlf_len),
+			&xfs_dquot_buf_ra_ops);
+}
+
 const struct xlog_recover_item_ops xlog_dquot_item_ops = {
 	.item_type		= XFS_LI_DQUOT,
+	.ra_pass2		= xlog_recover_dquot_ra_pass2,
 };
 
 const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index b19a151efb10..a132cacd8d48 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -21,6 +21,25 @@
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
 
+STATIC void
+xlog_recover_inode_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
+		struct xfs_inode_log_format	*ilfp = item->ri_buf[0].i_addr;
+
+		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
+				   &xfs_inode_buf_ra_ops);
+	} else {
+		struct xfs_inode_log_format_32	*ilfp = item->ri_buf[0].i_addr;
+
+		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
+				   &xfs_inode_buf_ra_ops);
+	}
+}
+
 const struct xlog_recover_item_ops xlog_inode_item_ops = {
 	.item_type		= XFS_LI_INODE,
+	.ra_pass2		= xlog_recover_inode_ra_pass2,
 };
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0ef0d81fd190..ea566747d8e1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2023,7 +2023,7 @@ xlog_put_buffer_cancelled(
 	return true;
 }
 
-static void
+void
 xlog_buf_readahead(
 	struct xlog		*log,
 	xfs_daddr_t		blkno,
@@ -3890,96 +3890,6 @@ xlog_recover_do_icreate_pass2(
 				     length, be32_to_cpu(icl->icl_gen));
 }
 
-STATIC void
-xlog_recover_buffer_ra_pass2(
-	struct xlog                     *log,
-	struct xlog_recover_item        *item)
-{
-	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
-
-	xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
-}
-
-STATIC void
-xlog_recover_inode_ra_pass2(
-	struct xlog                     *log,
-	struct xlog_recover_item        *item)
-{
-	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
-		struct xfs_inode_log_format	*ilfp = item->ri_buf[0].i_addr;
-
-		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
-				   &xfs_inode_buf_ra_ops);
-	} else {
-		struct xfs_inode_log_format_32	*ilfp = item->ri_buf[0].i_addr;
-
-		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
-				   &xfs_inode_buf_ra_ops);
-	}
-}
-
-STATIC void
-xlog_recover_dquot_ra_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_mount	*mp = log->l_mp;
-	struct xfs_disk_dquot	*recddq;
-	struct xfs_dq_logformat	*dq_f;
-	uint			type;
-
-	if (mp->m_qflags == 0)
-		return;
-
-	recddq = item->ri_buf[1].i_addr;
-	if (recddq == NULL)
-		return;
-	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
-		return;
-
-	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
-	ASSERT(type);
-	if (log->l_quotaoffs_flag & type)
-		return;
-
-	dq_f = item->ri_buf[0].i_addr;
-	ASSERT(dq_f);
-	ASSERT(dq_f->qlf_len == 1);
-
-	xlog_buf_readahead(log, dq_f->qlf_blkno,
-			XFS_FSB_TO_BB(mp, dq_f->qlf_len),
-			&xfs_dquot_buf_ra_ops);
-}
-
-STATIC void
-xlog_recover_ra_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	switch (ITEM_TYPE(item)) {
-	case XFS_LI_BUF:
-		xlog_recover_buffer_ra_pass2(log, item);
-		break;
-	case XFS_LI_INODE:
-		xlog_recover_inode_ra_pass2(log, item);
-		break;
-	case XFS_LI_DQUOT:
-		xlog_recover_dquot_ra_pass2(log, item);
-		break;
-	case XFS_LI_EFI:
-	case XFS_LI_EFD:
-	case XFS_LI_QUOTAOFF:
-	case XFS_LI_RUI:
-	case XFS_LI_RUD:
-	case XFS_LI_CUI:
-	case XFS_LI_CUD:
-	case XFS_LI_BUI:
-	case XFS_LI_BUD:
-	default:
-		break;
-	}
-}
-
 STATIC int
 xlog_recover_commit_pass1(
 	struct xlog			*log,
@@ -4116,7 +4026,8 @@ xlog_recover_commit_trans(
 			error = xlog_recover_commit_pass1(log, trans, item);
 			break;
 		case XLOG_RECOVER_PASS2:
-			xlog_recover_ra_pass2(log, item);
+			if (item->ri_ops->ra_pass2)
+				item->ri_ops->ra_pass2(log, item);
 			list_move_tail(&item->ri_list, &ra_list);
 			items_queued++;
 			if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (2 preceding siblings ...)
  2020-05-05  1:10 ` [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions Darrick J. Wong
@ 2020-05-05  1:10 ` Darrick J. Wong
  2020-05-05  4:40   ` Chandan Babu R
  2020-05-06 15:07   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 " Darrick J. Wong
                   ` (23 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:10 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the pass1 commit code into the per-item source code files and use
the dispatch function to call them.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_log_recover.h |    4 ++
 fs/xfs/xfs_buf_item_recover.c   |   27 ++++++++++
 fs/xfs/xfs_dquot_item_recover.c |   28 +++++++++++
 fs/xfs/xfs_log_recover.c        |  101 +++++----------------------------------
 4 files changed, 71 insertions(+), 89 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index ff80871138bb..384b70d58993 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -34,6 +34,9 @@ struct xlog_recover_item_ops {
 
 	/* Start readahead for pass2, if provided. */
 	void (*ra_pass2)(struct xlog *log, struct xlog_recover_item *item);
+
+	/* Do whatever work we need to do for pass1, if provided. */
+	int (*commit_pass1)(struct xlog *log, struct xlog_recover_item *item);
 };
 
 extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
@@ -97,5 +100,6 @@ struct xlog_recover {
 
 void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
 		const struct xfs_buf_ops *ops);
+bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index a1327196b690..802f2206516d 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -42,8 +42,35 @@ xlog_recover_buf_ra_pass2(
 	xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
 }
 
+/*
+ * Build up the table of buf cancel records so that we don't replay cancelled
+ * data in the second pass.
+ */
+static int
+xlog_recover_buf_commit_pass1(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	struct xfs_buf_log_format	*bf = item->ri_buf[0].i_addr;
+
+	if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
+		xfs_err(log->l_mp, "bad buffer log item size (%d)",
+				item->ri_buf[0].i_len);
+		return -EFSCORRUPTED;
+	}
+
+	if (!(bf->blf_flags & XFS_BLF_CANCEL))
+		trace_xfs_log_recover_buf_not_cancel(log, bf);
+	else if (xlog_add_buffer_cancelled(log, bf->blf_blkno, bf->blf_len))
+		trace_xfs_log_recover_buf_cancel_add(log, bf);
+	else
+		trace_xfs_log_recover_buf_cancel_ref_inc(log, bf);
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_buf_item_ops = {
 	.item_type		= XFS_LI_BUF,
 	.reorder		= xlog_recover_buf_reorder,
 	.ra_pass2		= xlog_recover_buf_ra_pass2,
+	.commit_pass1		= xlog_recover_buf_commit_pass1,
 };
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index 215274173b70..ebc44c1bc2b1 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -58,6 +58,34 @@ const struct xlog_recover_item_ops xlog_dquot_item_ops = {
 	.ra_pass2		= xlog_recover_dquot_ra_pass2,
 };
 
+/*
+ * Recover QUOTAOFF records. We simply make a note of it in the xlog
+ * structure, so that we know not to do any dquot item or dquot buffer recovery,
+ * of that type.
+ */
+STATIC int
+xlog_recover_quotaoff_commit_pass1(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	struct xfs_qoff_logformat	*qoff_f = item->ri_buf[0].i_addr;
+	ASSERT(qoff_f);
+
+	/*
+	 * The logitem format's flag tells us if this was user quotaoff,
+	 * group/project quotaoff or both.
+	 */
+	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
+		log->l_quotaoffs_flag |= XFS_DQ_USER;
+	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+		log->l_quotaoffs_flag |= XFS_DQ_PROJ;
+	if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
+		log->l_quotaoffs_flag |= XFS_DQ_GROUP;
+
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
 	.item_type		= XFS_LI_QUOTAOFF,
+	.commit_pass1		= xlog_recover_quotaoff_commit_pass1,
 };
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ea566747d8e1..b3627ebf870e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1953,7 +1953,7 @@ xlog_find_buffer_cancelled(
 	return NULL;
 }
 
-static bool
+bool
 xlog_add_buffer_cancelled(
 	struct xlog		*log,
 	xfs_daddr_t		blkno,
@@ -2034,32 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * Build up the table of buf cancel records so that we don't replay cancelled
- * data in the second pass.
- */
-static int
-xlog_recover_buffer_pass1(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_buf_log_format	*bf = item->ri_buf[0].i_addr;
-
-	if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
-		xfs_err(log->l_mp, "bad buffer log item size (%d)",
-				item->ri_buf[0].i_len);
-		return -EFSCORRUPTED;
-	}
-
-	if (!(bf->blf_flags & XFS_BLF_CANCEL))
-		trace_xfs_log_recover_buf_not_cancel(log, bf);
-	else if (xlog_add_buffer_cancelled(log, bf->blf_blkno, bf->blf_len))
-		trace_xfs_log_recover_buf_cancel_add(log, bf);
-	else
-		trace_xfs_log_recover_buf_cancel_ref_inc(log, bf);
-	return 0;
-}
-
 /*
  * Perform recovery for a buffer full of inodes.  In these buffers, the only
  * data which should be recovered is that which corresponds to the
@@ -3197,33 +3171,6 @@ xlog_recover_inode_pass2(
 	return error;
 }
 
-/*
- * Recover QUOTAOFF records. We simply make a note of it in the xlog
- * structure, so that we know not to do any dquot item or dquot buffer recovery,
- * of that type.
- */
-STATIC int
-xlog_recover_quotaoff_pass1(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	xfs_qoff_logformat_t	*qoff_f = item->ri_buf[0].i_addr;
-	ASSERT(qoff_f);
-
-	/*
-	 * The logitem format's flag tells us if this was user quotaoff,
-	 * group/project quotaoff or both.
-	 */
-	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
-		log->l_quotaoffs_flag |= XFS_DQ_USER;
-	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
-		log->l_quotaoffs_flag |= XFS_DQ_PROJ;
-	if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
-		log->l_quotaoffs_flag |= XFS_DQ_GROUP;
-
-	return 0;
-}
-
 /*
  * Recover a dquot record
  */
@@ -3890,40 +3837,6 @@ xlog_recover_do_icreate_pass2(
 				     length, be32_to_cpu(icl->icl_gen));
 }
 
-STATIC int
-xlog_recover_commit_pass1(
-	struct xlog			*log,
-	struct xlog_recover		*trans,
-	struct xlog_recover_item	*item)
-{
-	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
-
-	switch (ITEM_TYPE(item)) {
-	case XFS_LI_BUF:
-		return xlog_recover_buffer_pass1(log, item);
-	case XFS_LI_QUOTAOFF:
-		return xlog_recover_quotaoff_pass1(log, item);
-	case XFS_LI_INODE:
-	case XFS_LI_EFI:
-	case XFS_LI_EFD:
-	case XFS_LI_DQUOT:
-	case XFS_LI_ICREATE:
-	case XFS_LI_RUI:
-	case XFS_LI_RUD:
-	case XFS_LI_CUI:
-	case XFS_LI_CUD:
-	case XFS_LI_BUI:
-	case XFS_LI_BUD:
-		/* nothing to do in pass 1 */
-		return 0;
-	default:
-		xfs_warn(log->l_mp, "%s: invalid item type (%d)",
-			__func__, ITEM_TYPE(item));
-		ASSERT(0);
-		return -EFSCORRUPTED;
-	}
-}
-
 STATIC int
 xlog_recover_commit_pass2(
 	struct xlog			*log,
@@ -4021,9 +3934,19 @@ xlog_recover_commit_trans(
 		return error;
 
 	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
+		trace_xfs_log_recover_item_recover(log, trans, item, pass);
+
+		if (!item->ri_ops) {
+			xfs_warn(log->l_mp, "%s: invalid item type (%d)",
+				__func__, ITEM_TYPE(item));
+			ASSERT(0);
+			return -EFSCORRUPTED;
+		}
+
 		switch (pass) {
 		case XLOG_RECOVER_PASS1:
-			error = xlog_recover_commit_pass1(log, trans, item);
+			if (item->ri_ops->commit_pass1)
+				error = item->ri_ops->commit_pass1(log, item);
 			break;
 		case XLOG_RECOVER_PASS2:
 			if (item->ri_ops->ra_pass2)


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (3 preceding siblings ...)
  2020-05-05  1:10 ` [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  5:03   ` Chandan Babu R
  2020-05-06 15:09   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 06/28] xfs: refactor log recovery inode " Darrick J. Wong
                   ` (22 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the log buffer item pass2 commit code into the per-item source code
files and use the dispatch function to call it.  We do these one at a
time because there's a lot of code to move.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_log_recover.h |   23 +
 fs/xfs/xfs_buf_item_recover.c   |  790 +++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_recover.c        |  798 ---------------------------------------
 3 files changed, 820 insertions(+), 791 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 384b70d58993..a45f6e9fa47b 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -37,6 +37,26 @@ struct xlog_recover_item_ops {
 
 	/* Do whatever work we need to do for pass1, if provided. */
 	int (*commit_pass1)(struct xlog *log, struct xlog_recover_item *item);
+
+	/*
+	 * This function should do whatever work is needed for pass2 of log
+	 * recovery, if provided.
+	 *
+	 * If the recovered item is an intent item, this function should parse
+	 * the recovered item to construct an in-core log intent item and
+	 * insert it into the AIL.  The in-core log intent item should have 1
+	 * refcount so that the item is freed either (a) when we commit the
+	 * recovered log item for the intent-done item; (b) replay the work and
+	 * log a new intent-done item; or (c) recovery fails and we have to
+	 * abort.
+	 *
+	 * If the recovered item is an intent-done item, this function should
+	 * parse the recovered item to find the id of the corresponding intent
+	 * log item.  Next, it should find the in-core log intent item in the
+	 * AIL and release it.
+	 */
+	int (*commit_pass2)(struct xlog *log, struct list_head *buffer_list,
+			    struct xlog_recover_item *item, xfs_lsn_t lsn);
 };
 
 extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
@@ -101,5 +121,8 @@ struct xlog_recover {
 void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
 		const struct xfs_buf_ops *ops);
 bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
+bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
+bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
+void xlog_recover_iodone(struct xfs_buf *bp);
 
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 802f2206516d..4ca6d47d6c95 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -18,6 +18,10 @@
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
+#include "xfs_error.h"
+#include "xfs_inode.h"
+#include "xfs_dir2.h"
+#include "xfs_quota.h"
 
 STATIC enum xlog_recover_reorder
 xlog_recover_buf_reorder(
@@ -68,9 +72,795 @@ xlog_recover_buf_commit_pass1(
 	return 0;
 }
 
+/*
+ * Validate the recovered buffer is of the correct type and attach the
+ * appropriate buffer operations to them for writeback. Magic numbers are in a
+ * few places:
+ *	the first 16 bits of the buffer (inode buffer, dquot buffer),
+ *	the first 32 bits of the buffer (most blocks),
+ *	inside a struct xfs_da_blkinfo at the start of the buffer.
+ */
+static void
+xlog_recover_validate_buf_type(
+	struct xfs_mount		*mp,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_da_blkinfo		*info = bp->b_addr;
+	uint32_t			magic32;
+	uint16_t			magic16;
+	uint16_t			magicda;
+	char				*warnmsg = NULL;
+
+	/*
+	 * We can only do post recovery validation on items on CRC enabled
+	 * fielsystems as we need to know when the buffer was written to be able
+	 * to determine if we should have replayed the item. If we replay old
+	 * metadata over a newer buffer, then it will enter a temporarily
+	 * inconsistent state resulting in verification failures. Hence for now
+	 * just avoid the verification stage for non-crc filesystems
+	 */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return;
+
+	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
+	magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
+	magicda = be16_to_cpu(info->magic);
+	switch (xfs_blft_from_flags(buf_f)) {
+	case XFS_BLFT_BTREE_BUF:
+		switch (magic32) {
+		case XFS_ABTB_CRC_MAGIC:
+		case XFS_ABTB_MAGIC:
+			bp->b_ops = &xfs_bnobt_buf_ops;
+			break;
+		case XFS_ABTC_CRC_MAGIC:
+		case XFS_ABTC_MAGIC:
+			bp->b_ops = &xfs_cntbt_buf_ops;
+			break;
+		case XFS_IBT_CRC_MAGIC:
+		case XFS_IBT_MAGIC:
+			bp->b_ops = &xfs_inobt_buf_ops;
+			break;
+		case XFS_FIBT_CRC_MAGIC:
+		case XFS_FIBT_MAGIC:
+			bp->b_ops = &xfs_finobt_buf_ops;
+			break;
+		case XFS_BMAP_CRC_MAGIC:
+		case XFS_BMAP_MAGIC:
+			bp->b_ops = &xfs_bmbt_buf_ops;
+			break;
+		case XFS_RMAP_CRC_MAGIC:
+			bp->b_ops = &xfs_rmapbt_buf_ops;
+			break;
+		case XFS_REFC_CRC_MAGIC:
+			bp->b_ops = &xfs_refcountbt_buf_ops;
+			break;
+		default:
+			warnmsg = "Bad btree block magic!";
+			break;
+		}
+		break;
+	case XFS_BLFT_AGF_BUF:
+		if (magic32 != XFS_AGF_MAGIC) {
+			warnmsg = "Bad AGF block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_agf_buf_ops;
+		break;
+	case XFS_BLFT_AGFL_BUF:
+		if (magic32 != XFS_AGFL_MAGIC) {
+			warnmsg = "Bad AGFL block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_agfl_buf_ops;
+		break;
+	case XFS_BLFT_AGI_BUF:
+		if (magic32 != XFS_AGI_MAGIC) {
+			warnmsg = "Bad AGI block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_agi_buf_ops;
+		break;
+	case XFS_BLFT_UDQUOT_BUF:
+	case XFS_BLFT_PDQUOT_BUF:
+	case XFS_BLFT_GDQUOT_BUF:
+#ifdef CONFIG_XFS_QUOTA
+		if (magic16 != XFS_DQUOT_MAGIC) {
+			warnmsg = "Bad DQUOT block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dquot_buf_ops;
+#else
+		xfs_alert(mp,
+	"Trying to recover dquots without QUOTA support built in!");
+		ASSERT(0);
+#endif
+		break;
+	case XFS_BLFT_DINO_BUF:
+		if (magic16 != XFS_DINODE_MAGIC) {
+			warnmsg = "Bad INODE block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_inode_buf_ops;
+		break;
+	case XFS_BLFT_SYMLINK_BUF:
+		if (magic32 != XFS_SYMLINK_MAGIC) {
+			warnmsg = "Bad symlink block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_symlink_buf_ops;
+		break;
+	case XFS_BLFT_DIR_BLOCK_BUF:
+		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
+		    magic32 != XFS_DIR3_BLOCK_MAGIC) {
+			warnmsg = "Bad dir block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_block_buf_ops;
+		break;
+	case XFS_BLFT_DIR_DATA_BUF:
+		if (magic32 != XFS_DIR2_DATA_MAGIC &&
+		    magic32 != XFS_DIR3_DATA_MAGIC) {
+			warnmsg = "Bad dir data magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_data_buf_ops;
+		break;
+	case XFS_BLFT_DIR_FREE_BUF:
+		if (magic32 != XFS_DIR2_FREE_MAGIC &&
+		    magic32 != XFS_DIR3_FREE_MAGIC) {
+			warnmsg = "Bad dir3 free magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_free_buf_ops;
+		break;
+	case XFS_BLFT_DIR_LEAF1_BUF:
+		if (magicda != XFS_DIR2_LEAF1_MAGIC &&
+		    magicda != XFS_DIR3_LEAF1_MAGIC) {
+			warnmsg = "Bad dir leaf1 magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+		break;
+	case XFS_BLFT_DIR_LEAFN_BUF:
+		if (magicda != XFS_DIR2_LEAFN_MAGIC &&
+		    magicda != XFS_DIR3_LEAFN_MAGIC) {
+			warnmsg = "Bad dir leafn magic!";
+			break;
+		}
+		bp->b_ops = &xfs_dir3_leafn_buf_ops;
+		break;
+	case XFS_BLFT_DA_NODE_BUF:
+		if (magicda != XFS_DA_NODE_MAGIC &&
+		    magicda != XFS_DA3_NODE_MAGIC) {
+			warnmsg = "Bad da node magic!";
+			break;
+		}
+		bp->b_ops = &xfs_da3_node_buf_ops;
+		break;
+	case XFS_BLFT_ATTR_LEAF_BUF:
+		if (magicda != XFS_ATTR_LEAF_MAGIC &&
+		    magicda != XFS_ATTR3_LEAF_MAGIC) {
+			warnmsg = "Bad attr leaf magic!";
+			break;
+		}
+		bp->b_ops = &xfs_attr3_leaf_buf_ops;
+		break;
+	case XFS_BLFT_ATTR_RMT_BUF:
+		if (magic32 != XFS_ATTR3_RMT_MAGIC) {
+			warnmsg = "Bad attr remote magic!";
+			break;
+		}
+		bp->b_ops = &xfs_attr3_rmt_buf_ops;
+		break;
+	case XFS_BLFT_SB_BUF:
+		if (magic32 != XFS_SB_MAGIC) {
+			warnmsg = "Bad SB block magic!";
+			break;
+		}
+		bp->b_ops = &xfs_sb_buf_ops;
+		break;
+#ifdef CONFIG_XFS_RT
+	case XFS_BLFT_RTBITMAP_BUF:
+	case XFS_BLFT_RTSUMMARY_BUF:
+		/* no magic numbers for verification of RT buffers */
+		bp->b_ops = &xfs_rtbuf_ops;
+		break;
+#endif /* CONFIG_XFS_RT */
+	default:
+		xfs_warn(mp, "Unknown buffer type %d!",
+			 xfs_blft_from_flags(buf_f));
+		break;
+	}
+
+	/*
+	 * Nothing else to do in the case of a NULL current LSN as this means
+	 * the buffer is more recent than the change in the log and will be
+	 * skipped.
+	 */
+	if (current_lsn == NULLCOMMITLSN)
+		return;
+
+	if (warnmsg) {
+		xfs_warn(mp, warnmsg);
+		ASSERT(0);
+	}
+
+	/*
+	 * We must update the metadata LSN of the buffer as it is written out to
+	 * ensure that older transactions never replay over this one and corrupt
+	 * the buffer. This can occur if log recovery is interrupted at some
+	 * point after the current transaction completes, at which point a
+	 * subsequent mount starts recovery from the beginning.
+	 *
+	 * Write verifiers update the metadata LSN from log items attached to
+	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
+	 * the verifier. We'll clean it up in our ->iodone() callback.
+	 */
+	if (bp->b_ops) {
+		struct xfs_buf_log_item	*bip;
+
+		ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
+		bp->b_iodone = xlog_recover_iodone;
+		xfs_buf_item_init(bp, mp);
+		bip = bp->b_log_item;
+		bip->bli_item.li_lsn = current_lsn;
+	}
+}
+
+/*
+ * Perform a 'normal' buffer recovery.  Each logged region of the
+ * buffer should be copied over the corresponding region in the
+ * given buffer.  The bitmap in the buf log format structure indicates
+ * where to place the logged data.
+ */
+STATIC void
+xlog_recover_do_reg_buffer(
+	struct xfs_mount		*mp,
+	struct xlog_recover_item	*item,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f,
+	xfs_lsn_t			current_lsn)
+{
+	int			i;
+	int			bit;
+	int			nbits;
+	xfs_failaddr_t		fa;
+	const size_t		size_disk_dquot = sizeof(struct xfs_disk_dquot);
+
+	trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
+	bit = 0;
+	i = 1;  /* 0 is the buf format structure */
+	while (1) {
+		bit = xfs_next_bit(buf_f->blf_data_map,
+				   buf_f->blf_map_size, bit);
+		if (bit == -1)
+			break;
+		nbits = xfs_contig_bits(buf_f->blf_data_map,
+					buf_f->blf_map_size, bit);
+		ASSERT(nbits > 0);
+		ASSERT(item->ri_buf[i].i_addr != NULL);
+		ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
+		ASSERT(BBTOB(bp->b_length) >=
+		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
+
+		/*
+		 * The dirty regions logged in the buffer, even though
+		 * contiguous, may span multiple chunks. This is because the
+		 * dirty region may span a physical page boundary in a buffer
+		 * and hence be split into two separate vectors for writing into
+		 * the log. Hence we need to trim nbits back to the length of
+		 * the current region being copied out of the log.
+		 */
+		if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
+			nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+
+		/*
+		 * Do a sanity check if this is a dquot buffer. Just checking
+		 * the first dquot in the buffer should do. XXXThis is
+		 * probably a good thing to do for other buf types also.
+		 */
+		fa = NULL;
+		if (buf_f->blf_flags &
+		   (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
+			if (item->ri_buf[i].i_addr == NULL) {
+				xfs_alert(mp,
+					"XFS: NULL dquot in %s.", __func__);
+				goto next;
+			}
+			if (item->ri_buf[i].i_len < size_disk_dquot) {
+				xfs_alert(mp,
+					"XFS: dquot too small (%d) in %s.",
+					item->ri_buf[i].i_len, __func__);
+				goto next;
+			}
+			fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
+					       -1, 0);
+			if (fa) {
+				xfs_alert(mp,
+	"dquot corrupt at %pS trying to replay into block 0x%llx",
+					fa, bp->b_bn);
+				goto next;
+			}
+		}
+
+		memcpy(xfs_buf_offset(bp,
+			(uint)bit << XFS_BLF_SHIFT),	/* dest */
+			item->ri_buf[i].i_addr,		/* source */
+			nbits<<XFS_BLF_SHIFT);		/* length */
+ next:
+		i++;
+		bit += nbits;
+	}
+
+	/* Shouldn't be any more regions */
+	ASSERT(i == item->ri_total);
+
+	xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
+}
+
+/*
+ * Perform a dquot buffer recovery.
+ * Simple algorithm: if we have found a QUOTAOFF log item of the same type
+ * (ie. USR or GRP), then just toss this buffer away; don't recover it.
+ * Else, treat it as a regular buffer and do recovery.
+ *
+ * Return false if the buffer was tossed and true if we recovered the buffer to
+ * indicate to the caller if the buffer needs writing.
+ */
+STATIC bool
+xlog_recover_do_dquot_buffer(
+	struct xfs_mount		*mp,
+	struct xlog			*log,
+	struct xlog_recover_item	*item,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f)
+{
+	uint			type;
+
+	trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
+
+	/*
+	 * Filesystems are required to send in quota flags at mount time.
+	 */
+	if (!mp->m_qflags)
+		return false;
+
+	type = 0;
+	if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
+		type |= XFS_DQ_USER;
+	if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
+		type |= XFS_DQ_PROJ;
+	if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
+		type |= XFS_DQ_GROUP;
+	/*
+	 * This type of quotas was turned off, so ignore this buffer
+	 */
+	if (log->l_quotaoffs_flag & type)
+		return false;
+
+	xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
+	return true;
+}
+
+/*
+ * Perform recovery for a buffer full of inodes.  In these buffers, the only
+ * data which should be recovered is that which corresponds to the
+ * di_next_unlinked pointers in the on disk inode structures.  The rest of the
+ * data for the inodes is always logged through the inodes themselves rather
+ * than the inode buffer and is recovered in xlog_recover_inode_pass2().
+ *
+ * The only time when buffers full of inodes are fully recovered is when the
+ * buffer is full of newly allocated inodes.  In this case the buffer will
+ * not be marked as an inode buffer and so will be sent to
+ * xlog_recover_do_reg_buffer() below during recovery.
+ */
+STATIC int
+xlog_recover_do_inode_buffer(
+	struct xfs_mount		*mp,
+	struct xlog_recover_item	*item,
+	struct xfs_buf			*bp,
+	struct xfs_buf_log_format	*buf_f)
+{
+	int				i;
+	int				item_index = 0;
+	int				bit = 0;
+	int				nbits = 0;
+	int				reg_buf_offset = 0;
+	int				reg_buf_bytes = 0;
+	int				next_unlinked_offset;
+	int				inodes_per_buf;
+	xfs_agino_t			*logged_nextp;
+	xfs_agino_t			*buffer_nextp;
+
+	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+
+	/*
+	 * Post recovery validation only works properly on CRC enabled
+	 * filesystems.
+	 */
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		bp->b_ops = &xfs_inode_buf_ops;
+
+	inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
+	for (i = 0; i < inodes_per_buf; i++) {
+		next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
+			offsetof(xfs_dinode_t, di_next_unlinked);
+
+		while (next_unlinked_offset >=
+		       (reg_buf_offset + reg_buf_bytes)) {
+			/*
+			 * The next di_next_unlinked field is beyond
+			 * the current logged region.  Find the next
+			 * logged region that contains or is beyond
+			 * the current di_next_unlinked field.
+			 */
+			bit += nbits;
+			bit = xfs_next_bit(buf_f->blf_data_map,
+					   buf_f->blf_map_size, bit);
+
+			/*
+			 * If there are no more logged regions in the
+			 * buffer, then we're done.
+			 */
+			if (bit == -1)
+				return 0;
+
+			nbits = xfs_contig_bits(buf_f->blf_data_map,
+						buf_f->blf_map_size, bit);
+			ASSERT(nbits > 0);
+			reg_buf_offset = bit << XFS_BLF_SHIFT;
+			reg_buf_bytes = nbits << XFS_BLF_SHIFT;
+			item_index++;
+		}
+
+		/*
+		 * If the current logged region starts after the current
+		 * di_next_unlinked field, then move on to the next
+		 * di_next_unlinked field.
+		 */
+		if (next_unlinked_offset < reg_buf_offset)
+			continue;
+
+		ASSERT(item->ri_buf[item_index].i_addr != NULL);
+		ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
+		ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
+
+		/*
+		 * The current logged region contains a copy of the
+		 * current di_next_unlinked field.  Extract its value
+		 * and copy it to the buffer copy.
+		 */
+		logged_nextp = item->ri_buf[item_index].i_addr +
+				next_unlinked_offset - reg_buf_offset;
+		if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
+			xfs_alert(mp,
+		"Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
+		"Trying to replay bad (0) inode di_next_unlinked field.",
+				item, bp);
+			return -EFSCORRUPTED;
+		}
+
+		buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
+		*buffer_nextp = *logged_nextp;
+
+		/*
+		 * If necessary, recalculate the CRC in the on-disk inode. We
+		 * have to leave the inode in a consistent state for whoever
+		 * reads it next....
+		 */
+		xfs_dinode_calc_crc(mp,
+				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
+
+	}
+
+	return 0;
+}
+
+/*
+ * V5 filesystems know the age of the buffer on disk being recovered. We can
+ * have newer objects on disk than we are replaying, and so for these cases we
+ * don't want to replay the current change as that will make the buffer contents
+ * temporarily invalid on disk.
+ *
+ * The magic number might not match the buffer type we are going to recover
+ * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
+ * extract the LSN of the existing object in the buffer based on it's current
+ * magic number.  If we don't recognise the magic number in the buffer, then
+ * return a LSN of -1 so that the caller knows it was an unrecognised block and
+ * so can recover the buffer.
+ *
+ * Note: we cannot rely solely on magic number matches to determine that the
+ * buffer has a valid LSN - we also need to verify that it belongs to this
+ * filesystem, so we need to extract the object's LSN and compare it to that
+ * which we read from the superblock. If the UUIDs don't match, then we've got a
+ * stale metadata block from an old filesystem instance that we need to recover
+ * over the top of.
+ */
+static xfs_lsn_t
+xlog_recover_get_buf_lsn(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp)
+{
+	uint32_t		magic32;
+	uint16_t		magic16;
+	uint16_t		magicda;
+	void			*blk = bp->b_addr;
+	uuid_t			*uuid;
+	xfs_lsn_t		lsn = -1;
+
+	/* v4 filesystems always recover immediately */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		goto recover_immediately;
+
+	magic32 = be32_to_cpu(*(__be32 *)blk);
+	switch (magic32) {
+	case XFS_ABTB_CRC_MAGIC:
+	case XFS_ABTC_CRC_MAGIC:
+	case XFS_ABTB_MAGIC:
+	case XFS_ABTC_MAGIC:
+	case XFS_RMAP_CRC_MAGIC:
+	case XFS_REFC_CRC_MAGIC:
+	case XFS_IBT_CRC_MAGIC:
+	case XFS_IBT_MAGIC: {
+		struct xfs_btree_block *btb = blk;
+
+		lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
+		uuid = &btb->bb_u.s.bb_uuid;
+		break;
+	}
+	case XFS_BMAP_CRC_MAGIC:
+	case XFS_BMAP_MAGIC: {
+		struct xfs_btree_block *btb = blk;
+
+		lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
+		uuid = &btb->bb_u.l.bb_uuid;
+		break;
+	}
+	case XFS_AGF_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
+		uuid = &((struct xfs_agf *)blk)->agf_uuid;
+		break;
+	case XFS_AGFL_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
+		uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
+		break;
+	case XFS_AGI_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
+		uuid = &((struct xfs_agi *)blk)->agi_uuid;
+		break;
+	case XFS_SYMLINK_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
+		uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
+		break;
+	case XFS_DIR3_BLOCK_MAGIC:
+	case XFS_DIR3_DATA_MAGIC:
+	case XFS_DIR3_FREE_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
+		uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
+		break;
+	case XFS_ATTR3_RMT_MAGIC:
+		/*
+		 * Remote attr blocks are written synchronously, rather than
+		 * being logged. That means they do not contain a valid LSN
+		 * (i.e. transactionally ordered) in them, and hence any time we
+		 * see a buffer to replay over the top of a remote attribute
+		 * block we should simply do so.
+		 */
+		goto recover_immediately;
+	case XFS_SB_MAGIC:
+		/*
+		 * superblock uuids are magic. We may or may not have a
+		 * sb_meta_uuid on disk, but it will be set in the in-core
+		 * superblock. We set the uuid pointer for verification
+		 * according to the superblock feature mask to ensure we check
+		 * the relevant UUID in the superblock.
+		 */
+		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
+		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
+			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
+		else
+			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
+		break;
+	default:
+		break;
+	}
+
+	if (lsn != (xfs_lsn_t)-1) {
+		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
+			goto recover_immediately;
+		return lsn;
+	}
+
+	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
+	switch (magicda) {
+	case XFS_DIR3_LEAF1_MAGIC:
+	case XFS_DIR3_LEAFN_MAGIC:
+	case XFS_DA3_NODE_MAGIC:
+		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
+		break;
+	default:
+		break;
+	}
+
+	if (lsn != (xfs_lsn_t)-1) {
+		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+			goto recover_immediately;
+		return lsn;
+	}
+
+	/*
+	 * We do individual object checks on dquot and inode buffers as they
+	 * have their own individual LSN records. Also, we could have a stale
+	 * buffer here, so we have to at least recognise these buffer types.
+	 *
+	 * A notd complexity here is inode unlinked list processing - it logs
+	 * the inode directly in the buffer, but we don't know which inodes have
+	 * been modified, and there is no global buffer LSN. Hence we need to
+	 * recover all inode buffer types immediately. This problem will be
+	 * fixed by logical logging of the unlinked list modifications.
+	 */
+	magic16 = be16_to_cpu(*(__be16 *)blk);
+	switch (magic16) {
+	case XFS_DQUOT_MAGIC:
+	case XFS_DINODE_MAGIC:
+		goto recover_immediately;
+	default:
+		break;
+	}
+
+	/* unknown buffer contents, recover immediately */
+
+recover_immediately:
+	return (xfs_lsn_t)-1;
+
+}
+
+/*
+ * This routine replays a modification made to a buffer at runtime.
+ * There are actually two types of buffer, regular and inode, which
+ * are handled differently.  Inode buffers are handled differently
+ * in that we only recover a specific set of data from them, namely
+ * the inode di_next_unlinked fields.  This is because all other inode
+ * data is actually logged via inode records and any data we replay
+ * here which overlaps that may be stale.
+ *
+ * When meta-data buffers are freed at run time we log a buffer item
+ * with the XFS_BLF_CANCEL bit set to indicate that previous copies
+ * of the buffer in the log should not be replayed at recovery time.
+ * This is so that if the blocks covered by the buffer are reused for
+ * file data before we crash we don't end up replaying old, freed
+ * meta-data into a user's file.
+ *
+ * To handle the cancellation of buffer log items, we make two passes
+ * over the log during recovery.  During the first we build a table of
+ * those buffers which have been cancelled, and during the second we
+ * only replay those buffers which do not have corresponding cancel
+ * records in the table.  See xlog_recover_buf_pass[1,2] above
+ * for more details on the implementation of the table of cancel records.
+ */
+STATIC int
+xlog_recover_buf_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_buf			*bp;
+	int				error;
+	uint				buf_flags;
+	xfs_lsn_t			lsn;
+
+	/*
+	 * In this pass we only want to recover all the buffers which have
+	 * not been cancelled and are not cancellation buffers themselves.
+	 */
+	if (buf_f->blf_flags & XFS_BLF_CANCEL) {
+		if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno,
+				buf_f->blf_len))
+			goto cancelled;
+	} else {
+
+		if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno,
+				buf_f->blf_len))
+			goto cancelled;
+	}
+
+	trace_xfs_log_recover_buf_recover(log, buf_f);
+
+	buf_flags = 0;
+	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
+		buf_flags |= XBF_UNMAPPED;
+
+	error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
+			  buf_flags, &bp, NULL);
+	if (error)
+		return error;
+
+	/*
+	 * Recover the buffer only if we get an LSN from it and it's less than
+	 * the lsn of the transaction we are replaying.
+	 *
+	 * Note that we have to be extremely careful of readahead here.
+	 * Readahead does not attach verfiers to the buffers so if we don't
+	 * actually do any replay after readahead because of the LSN we found
+	 * in the buffer if more recent than that current transaction then we
+	 * need to attach the verifier directly. Failure to do so can lead to
+	 * future recovery actions (e.g. EFI and unlinked list recovery) can
+	 * operate on the buffers and they won't get the verifier attached. This
+	 * can lead to blocks on disk having the correct content but a stale
+	 * CRC.
+	 *
+	 * It is safe to assume these clean buffers are currently up to date.
+	 * If the buffer is dirtied by a later transaction being replayed, then
+	 * the verifier will be reset to match whatever recover turns that
+	 * buffer into.
+	 */
+	lsn = xlog_recover_get_buf_lsn(mp, bp);
+	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+		trace_xfs_log_recover_buf_skip(log, buf_f);
+		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
+		goto out_release;
+	}
+
+	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
+		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
+		if (error)
+			goto out_release;
+	} else if (buf_f->blf_flags &
+		  (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
+		bool	dirty;
+
+		dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+		if (!dirty)
+			goto out_release;
+	} else {
+		xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
+	}
+
+	/*
+	 * Perform delayed write on the buffer.  Asynchronous writes will be
+	 * slower when taking into account all the buffers to be flushed.
+	 *
+	 * Also make sure that only inode buffers with good sizes stay in
+	 * the buffer cache.  The kernel moves inodes in buffers of 1 block
+	 * or inode_cluster_size bytes, whichever is bigger.  The inode
+	 * buffers in the log can be a different size if the log was generated
+	 * by an older kernel using unclustered inode buffers or a newer kernel
+	 * running with a different inode cluster size.  Regardless, if the
+	 * the inode buffer size isn't max(blocksize, inode_cluster_size)
+	 * for *our* value of inode_cluster_size, then we need to keep
+	 * the buffer out of the buffer cache so that the buffer won't
+	 * overlap with future reads of those inodes.
+	 */
+	if (XFS_DINODE_MAGIC ==
+	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
+	    (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
+		xfs_buf_stale(bp);
+		error = xfs_bwrite(bp);
+	} else {
+		ASSERT(bp->b_mount == mp);
+		bp->b_iodone = xlog_recover_iodone;
+		xfs_buf_delwri_queue(bp, buffer_list);
+	}
+
+out_release:
+	xfs_buf_relse(bp);
+	return error;
+cancelled:
+	trace_xfs_log_recover_buf_cancel(log, buf_f);
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_buf_item_ops = {
 	.item_type		= XFS_LI_BUF,
 	.reorder		= xlog_recover_buf_reorder,
 	.ra_pass2		= xlog_recover_buf_ra_pass2,
 	.commit_pass1		= xlog_recover_buf_commit_pass1,
+	.commit_pass2		= xlog_recover_buf_commit_pass2,
 };
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b3627ebf870e..d65dc3895a62 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -284,7 +284,7 @@ xlog_header_check_mount(
 	return 0;
 }
 
-STATIC void
+void
 xlog_recover_iodone(
 	struct xfs_buf	*bp)
 {
@@ -1985,7 +1985,7 @@ xlog_add_buffer_cancelled(
 /*
  * Check if there is and entry for blkno, len in the buffer cancel record table.
  */
-static bool
+bool
 xlog_is_buffer_cancelled(
 	struct xlog		*log,
 	xfs_daddr_t		blkno,
@@ -2002,7 +2002,7 @@ xlog_is_buffer_cancelled(
  * buffer is re-used again after its last cancellation we actually replay the
  * changes made at that point.
  */
-static bool
+bool
 xlog_put_buffer_cancelled(
 	struct xlog		*log,
 	xfs_daddr_t		blkno,
@@ -2034,791 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * Perform recovery for a buffer full of inodes.  In these buffers, the only
- * data which should be recovered is that which corresponds to the
- * di_next_unlinked pointers in the on disk inode structures.  The rest of the
- * data for the inodes is always logged through the inodes themselves rather
- * than the inode buffer and is recovered in xlog_recover_inode_pass2().
- *
- * The only time when buffers full of inodes are fully recovered is when the
- * buffer is full of newly allocated inodes.  In this case the buffer will
- * not be marked as an inode buffer and so will be sent to
- * xlog_recover_do_reg_buffer() below during recovery.
- */
-STATIC int
-xlog_recover_do_inode_buffer(
-	struct xfs_mount	*mp,
-	struct xlog_recover_item *item,
-	struct xfs_buf		*bp,
-	xfs_buf_log_format_t	*buf_f)
-{
-	int			i;
-	int			item_index = 0;
-	int			bit = 0;
-	int			nbits = 0;
-	int			reg_buf_offset = 0;
-	int			reg_buf_bytes = 0;
-	int			next_unlinked_offset;
-	int			inodes_per_buf;
-	xfs_agino_t		*logged_nextp;
-	xfs_agino_t		*buffer_nextp;
-
-	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
-
-	/*
-	 * Post recovery validation only works properly on CRC enabled
-	 * filesystems.
-	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb))
-		bp->b_ops = &xfs_inode_buf_ops;
-
-	inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
-	for (i = 0; i < inodes_per_buf; i++) {
-		next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
-			offsetof(xfs_dinode_t, di_next_unlinked);
-
-		while (next_unlinked_offset >=
-		       (reg_buf_offset + reg_buf_bytes)) {
-			/*
-			 * The next di_next_unlinked field is beyond
-			 * the current logged region.  Find the next
-			 * logged region that contains or is beyond
-			 * the current di_next_unlinked field.
-			 */
-			bit += nbits;
-			bit = xfs_next_bit(buf_f->blf_data_map,
-					   buf_f->blf_map_size, bit);
-
-			/*
-			 * If there are no more logged regions in the
-			 * buffer, then we're done.
-			 */
-			if (bit == -1)
-				return 0;
-
-			nbits = xfs_contig_bits(buf_f->blf_data_map,
-						buf_f->blf_map_size, bit);
-			ASSERT(nbits > 0);
-			reg_buf_offset = bit << XFS_BLF_SHIFT;
-			reg_buf_bytes = nbits << XFS_BLF_SHIFT;
-			item_index++;
-		}
-
-		/*
-		 * If the current logged region starts after the current
-		 * di_next_unlinked field, then move on to the next
-		 * di_next_unlinked field.
-		 */
-		if (next_unlinked_offset < reg_buf_offset)
-			continue;
-
-		ASSERT(item->ri_buf[item_index].i_addr != NULL);
-		ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
-		ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
-
-		/*
-		 * The current logged region contains a copy of the
-		 * current di_next_unlinked field.  Extract its value
-		 * and copy it to the buffer copy.
-		 */
-		logged_nextp = item->ri_buf[item_index].i_addr +
-				next_unlinked_offset - reg_buf_offset;
-		if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
-			xfs_alert(mp,
-		"Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
-		"Trying to replay bad (0) inode di_next_unlinked field.",
-				item, bp);
-			return -EFSCORRUPTED;
-		}
-
-		buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
-		*buffer_nextp = *logged_nextp;
-
-		/*
-		 * If necessary, recalculate the CRC in the on-disk inode. We
-		 * have to leave the inode in a consistent state for whoever
-		 * reads it next....
-		 */
-		xfs_dinode_calc_crc(mp,
-				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
-
-	}
-
-	return 0;
-}
-
-/*
- * V5 filesystems know the age of the buffer on disk being recovered. We can
- * have newer objects on disk than we are replaying, and so for these cases we
- * don't want to replay the current change as that will make the buffer contents
- * temporarily invalid on disk.
- *
- * The magic number might not match the buffer type we are going to recover
- * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
- * extract the LSN of the existing object in the buffer based on it's current
- * magic number.  If we don't recognise the magic number in the buffer, then
- * return a LSN of -1 so that the caller knows it was an unrecognised block and
- * so can recover the buffer.
- *
- * Note: we cannot rely solely on magic number matches to determine that the
- * buffer has a valid LSN - we also need to verify that it belongs to this
- * filesystem, so we need to extract the object's LSN and compare it to that
- * which we read from the superblock. If the UUIDs don't match, then we've got a
- * stale metadata block from an old filesystem instance that we need to recover
- * over the top of.
- */
-static xfs_lsn_t
-xlog_recover_get_buf_lsn(
-	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
-{
-	uint32_t		magic32;
-	uint16_t		magic16;
-	uint16_t		magicda;
-	void			*blk = bp->b_addr;
-	uuid_t			*uuid;
-	xfs_lsn_t		lsn = -1;
-
-	/* v4 filesystems always recover immediately */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		goto recover_immediately;
-
-	magic32 = be32_to_cpu(*(__be32 *)blk);
-	switch (magic32) {
-	case XFS_ABTB_CRC_MAGIC:
-	case XFS_ABTC_CRC_MAGIC:
-	case XFS_ABTB_MAGIC:
-	case XFS_ABTC_MAGIC:
-	case XFS_RMAP_CRC_MAGIC:
-	case XFS_REFC_CRC_MAGIC:
-	case XFS_IBT_CRC_MAGIC:
-	case XFS_IBT_MAGIC: {
-		struct xfs_btree_block *btb = blk;
-
-		lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
-		uuid = &btb->bb_u.s.bb_uuid;
-		break;
-	}
-	case XFS_BMAP_CRC_MAGIC:
-	case XFS_BMAP_MAGIC: {
-		struct xfs_btree_block *btb = blk;
-
-		lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
-		uuid = &btb->bb_u.l.bb_uuid;
-		break;
-	}
-	case XFS_AGF_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
-		uuid = &((struct xfs_agf *)blk)->agf_uuid;
-		break;
-	case XFS_AGFL_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
-		uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
-		break;
-	case XFS_AGI_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
-		uuid = &((struct xfs_agi *)blk)->agi_uuid;
-		break;
-	case XFS_SYMLINK_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
-		uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
-		break;
-	case XFS_DIR3_BLOCK_MAGIC:
-	case XFS_DIR3_DATA_MAGIC:
-	case XFS_DIR3_FREE_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
-		uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
-		break;
-	case XFS_ATTR3_RMT_MAGIC:
-		/*
-		 * Remote attr blocks are written synchronously, rather than
-		 * being logged. That means they do not contain a valid LSN
-		 * (i.e. transactionally ordered) in them, and hence any time we
-		 * see a buffer to replay over the top of a remote attribute
-		 * block we should simply do so.
-		 */
-		goto recover_immediately;
-	case XFS_SB_MAGIC:
-		/*
-		 * superblock uuids are magic. We may or may not have a
-		 * sb_meta_uuid on disk, but it will be set in the in-core
-		 * superblock. We set the uuid pointer for verification
-		 * according to the superblock feature mask to ensure we check
-		 * the relevant UUID in the superblock.
-		 */
-		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
-		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
-			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
-		else
-			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
-		break;
-	default:
-		break;
-	}
-
-	if (lsn != (xfs_lsn_t)-1) {
-		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
-			goto recover_immediately;
-		return lsn;
-	}
-
-	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
-	switch (magicda) {
-	case XFS_DIR3_LEAF1_MAGIC:
-	case XFS_DIR3_LEAFN_MAGIC:
-	case XFS_DA3_NODE_MAGIC:
-		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
-		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
-		break;
-	default:
-		break;
-	}
-
-	if (lsn != (xfs_lsn_t)-1) {
-		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
-			goto recover_immediately;
-		return lsn;
-	}
-
-	/*
-	 * We do individual object checks on dquot and inode buffers as they
-	 * have their own individual LSN records. Also, we could have a stale
-	 * buffer here, so we have to at least recognise these buffer types.
-	 *
-	 * A notd complexity here is inode unlinked list processing - it logs
-	 * the inode directly in the buffer, but we don't know which inodes have
-	 * been modified, and there is no global buffer LSN. Hence we need to
-	 * recover all inode buffer types immediately. This problem will be
-	 * fixed by logical logging of the unlinked list modifications.
-	 */
-	magic16 = be16_to_cpu(*(__be16 *)blk);
-	switch (magic16) {
-	case XFS_DQUOT_MAGIC:
-	case XFS_DINODE_MAGIC:
-		goto recover_immediately;
-	default:
-		break;
-	}
-
-	/* unknown buffer contents, recover immediately */
-
-recover_immediately:
-	return (xfs_lsn_t)-1;
-
-}
-
-/*
- * Validate the recovered buffer is of the correct type and attach the
- * appropriate buffer operations to them for writeback. Magic numbers are in a
- * few places:
- *	the first 16 bits of the buffer (inode buffer, dquot buffer),
- *	the first 32 bits of the buffer (most blocks),
- *	inside a struct xfs_da_blkinfo at the start of the buffer.
- */
-static void
-xlog_recover_validate_buf_type(
-	struct xfs_mount	*mp,
-	struct xfs_buf		*bp,
-	xfs_buf_log_format_t	*buf_f,
-	xfs_lsn_t		current_lsn)
-{
-	struct xfs_da_blkinfo	*info = bp->b_addr;
-	uint32_t		magic32;
-	uint16_t		magic16;
-	uint16_t		magicda;
-	char			*warnmsg = NULL;
-
-	/*
-	 * We can only do post recovery validation on items on CRC enabled
-	 * fielsystems as we need to know when the buffer was written to be able
-	 * to determine if we should have replayed the item. If we replay old
-	 * metadata over a newer buffer, then it will enter a temporarily
-	 * inconsistent state resulting in verification failures. Hence for now
-	 * just avoid the verification stage for non-crc filesystems
-	 */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return;
-
-	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
-	magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
-	magicda = be16_to_cpu(info->magic);
-	switch (xfs_blft_from_flags(buf_f)) {
-	case XFS_BLFT_BTREE_BUF:
-		switch (magic32) {
-		case XFS_ABTB_CRC_MAGIC:
-		case XFS_ABTB_MAGIC:
-			bp->b_ops = &xfs_bnobt_buf_ops;
-			break;
-		case XFS_ABTC_CRC_MAGIC:
-		case XFS_ABTC_MAGIC:
-			bp->b_ops = &xfs_cntbt_buf_ops;
-			break;
-		case XFS_IBT_CRC_MAGIC:
-		case XFS_IBT_MAGIC:
-			bp->b_ops = &xfs_inobt_buf_ops;
-			break;
-		case XFS_FIBT_CRC_MAGIC:
-		case XFS_FIBT_MAGIC:
-			bp->b_ops = &xfs_finobt_buf_ops;
-			break;
-		case XFS_BMAP_CRC_MAGIC:
-		case XFS_BMAP_MAGIC:
-			bp->b_ops = &xfs_bmbt_buf_ops;
-			break;
-		case XFS_RMAP_CRC_MAGIC:
-			bp->b_ops = &xfs_rmapbt_buf_ops;
-			break;
-		case XFS_REFC_CRC_MAGIC:
-			bp->b_ops = &xfs_refcountbt_buf_ops;
-			break;
-		default:
-			warnmsg = "Bad btree block magic!";
-			break;
-		}
-		break;
-	case XFS_BLFT_AGF_BUF:
-		if (magic32 != XFS_AGF_MAGIC) {
-			warnmsg = "Bad AGF block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_agf_buf_ops;
-		break;
-	case XFS_BLFT_AGFL_BUF:
-		if (magic32 != XFS_AGFL_MAGIC) {
-			warnmsg = "Bad AGFL block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_agfl_buf_ops;
-		break;
-	case XFS_BLFT_AGI_BUF:
-		if (magic32 != XFS_AGI_MAGIC) {
-			warnmsg = "Bad AGI block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_agi_buf_ops;
-		break;
-	case XFS_BLFT_UDQUOT_BUF:
-	case XFS_BLFT_PDQUOT_BUF:
-	case XFS_BLFT_GDQUOT_BUF:
-#ifdef CONFIG_XFS_QUOTA
-		if (magic16 != XFS_DQUOT_MAGIC) {
-			warnmsg = "Bad DQUOT block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dquot_buf_ops;
-#else
-		xfs_alert(mp,
-	"Trying to recover dquots without QUOTA support built in!");
-		ASSERT(0);
-#endif
-		break;
-	case XFS_BLFT_DINO_BUF:
-		if (magic16 != XFS_DINODE_MAGIC) {
-			warnmsg = "Bad INODE block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_inode_buf_ops;
-		break;
-	case XFS_BLFT_SYMLINK_BUF:
-		if (magic32 != XFS_SYMLINK_MAGIC) {
-			warnmsg = "Bad symlink block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_symlink_buf_ops;
-		break;
-	case XFS_BLFT_DIR_BLOCK_BUF:
-		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
-		    magic32 != XFS_DIR3_BLOCK_MAGIC) {
-			warnmsg = "Bad dir block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_block_buf_ops;
-		break;
-	case XFS_BLFT_DIR_DATA_BUF:
-		if (magic32 != XFS_DIR2_DATA_MAGIC &&
-		    magic32 != XFS_DIR3_DATA_MAGIC) {
-			warnmsg = "Bad dir data magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_data_buf_ops;
-		break;
-	case XFS_BLFT_DIR_FREE_BUF:
-		if (magic32 != XFS_DIR2_FREE_MAGIC &&
-		    magic32 != XFS_DIR3_FREE_MAGIC) {
-			warnmsg = "Bad dir3 free magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_free_buf_ops;
-		break;
-	case XFS_BLFT_DIR_LEAF1_BUF:
-		if (magicda != XFS_DIR2_LEAF1_MAGIC &&
-		    magicda != XFS_DIR3_LEAF1_MAGIC) {
-			warnmsg = "Bad dir leaf1 magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
-		break;
-	case XFS_BLFT_DIR_LEAFN_BUF:
-		if (magicda != XFS_DIR2_LEAFN_MAGIC &&
-		    magicda != XFS_DIR3_LEAFN_MAGIC) {
-			warnmsg = "Bad dir leafn magic!";
-			break;
-		}
-		bp->b_ops = &xfs_dir3_leafn_buf_ops;
-		break;
-	case XFS_BLFT_DA_NODE_BUF:
-		if (magicda != XFS_DA_NODE_MAGIC &&
-		    magicda != XFS_DA3_NODE_MAGIC) {
-			warnmsg = "Bad da node magic!";
-			break;
-		}
-		bp->b_ops = &xfs_da3_node_buf_ops;
-		break;
-	case XFS_BLFT_ATTR_LEAF_BUF:
-		if (magicda != XFS_ATTR_LEAF_MAGIC &&
-		    magicda != XFS_ATTR3_LEAF_MAGIC) {
-			warnmsg = "Bad attr leaf magic!";
-			break;
-		}
-		bp->b_ops = &xfs_attr3_leaf_buf_ops;
-		break;
-	case XFS_BLFT_ATTR_RMT_BUF:
-		if (magic32 != XFS_ATTR3_RMT_MAGIC) {
-			warnmsg = "Bad attr remote magic!";
-			break;
-		}
-		bp->b_ops = &xfs_attr3_rmt_buf_ops;
-		break;
-	case XFS_BLFT_SB_BUF:
-		if (magic32 != XFS_SB_MAGIC) {
-			warnmsg = "Bad SB block magic!";
-			break;
-		}
-		bp->b_ops = &xfs_sb_buf_ops;
-		break;
-#ifdef CONFIG_XFS_RT
-	case XFS_BLFT_RTBITMAP_BUF:
-	case XFS_BLFT_RTSUMMARY_BUF:
-		/* no magic numbers for verification of RT buffers */
-		bp->b_ops = &xfs_rtbuf_ops;
-		break;
-#endif /* CONFIG_XFS_RT */
-	default:
-		xfs_warn(mp, "Unknown buffer type %d!",
-			 xfs_blft_from_flags(buf_f));
-		break;
-	}
-
-	/*
-	 * Nothing else to do in the case of a NULL current LSN as this means
-	 * the buffer is more recent than the change in the log and will be
-	 * skipped.
-	 */
-	if (current_lsn == NULLCOMMITLSN)
-		return;
-
-	if (warnmsg) {
-		xfs_warn(mp, warnmsg);
-		ASSERT(0);
-	}
-
-	/*
-	 * We must update the metadata LSN of the buffer as it is written out to
-	 * ensure that older transactions never replay over this one and corrupt
-	 * the buffer. This can occur if log recovery is interrupted at some
-	 * point after the current transaction completes, at which point a
-	 * subsequent mount starts recovery from the beginning.
-	 *
-	 * Write verifiers update the metadata LSN from log items attached to
-	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
-	 * the verifier. We'll clean it up in our ->iodone() callback.
-	 */
-	if (bp->b_ops) {
-		struct xfs_buf_log_item	*bip;
-
-		ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
-		bp->b_iodone = xlog_recover_iodone;
-		xfs_buf_item_init(bp, mp);
-		bip = bp->b_log_item;
-		bip->bli_item.li_lsn = current_lsn;
-	}
-}
-
-/*
- * Perform a 'normal' buffer recovery.  Each logged region of the
- * buffer should be copied over the corresponding region in the
- * given buffer.  The bitmap in the buf log format structure indicates
- * where to place the logged data.
- */
-STATIC void
-xlog_recover_do_reg_buffer(
-	struct xfs_mount	*mp,
-	struct xlog_recover_item *item,
-	struct xfs_buf		*bp,
-	xfs_buf_log_format_t	*buf_f,
-	xfs_lsn_t		current_lsn)
-{
-	int			i;
-	int			bit;
-	int			nbits;
-	xfs_failaddr_t		fa;
-	const size_t		size_disk_dquot = sizeof(struct xfs_disk_dquot);
-
-	trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
-
-	bit = 0;
-	i = 1;  /* 0 is the buf format structure */
-	while (1) {
-		bit = xfs_next_bit(buf_f->blf_data_map,
-				   buf_f->blf_map_size, bit);
-		if (bit == -1)
-			break;
-		nbits = xfs_contig_bits(buf_f->blf_data_map,
-					buf_f->blf_map_size, bit);
-		ASSERT(nbits > 0);
-		ASSERT(item->ri_buf[i].i_addr != NULL);
-		ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
-		ASSERT(BBTOB(bp->b_length) >=
-		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
-
-		/*
-		 * The dirty regions logged in the buffer, even though
-		 * contiguous, may span multiple chunks. This is because the
-		 * dirty region may span a physical page boundary in a buffer
-		 * and hence be split into two separate vectors for writing into
-		 * the log. Hence we need to trim nbits back to the length of
-		 * the current region being copied out of the log.
-		 */
-		if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
-			nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
-
-		/*
-		 * Do a sanity check if this is a dquot buffer. Just checking
-		 * the first dquot in the buffer should do. XXXThis is
-		 * probably a good thing to do for other buf types also.
-		 */
-		fa = NULL;
-		if (buf_f->blf_flags &
-		   (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
-			if (item->ri_buf[i].i_addr == NULL) {
-				xfs_alert(mp,
-					"XFS: NULL dquot in %s.", __func__);
-				goto next;
-			}
-			if (item->ri_buf[i].i_len < size_disk_dquot) {
-				xfs_alert(mp,
-					"XFS: dquot too small (%d) in %s.",
-					item->ri_buf[i].i_len, __func__);
-				goto next;
-			}
-			fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
-					       -1, 0);
-			if (fa) {
-				xfs_alert(mp,
-	"dquot corrupt at %pS trying to replay into block 0x%llx",
-					fa, bp->b_bn);
-				goto next;
-			}
-		}
-
-		memcpy(xfs_buf_offset(bp,
-			(uint)bit << XFS_BLF_SHIFT),	/* dest */
-			item->ri_buf[i].i_addr,		/* source */
-			nbits<<XFS_BLF_SHIFT);		/* length */
- next:
-		i++;
-		bit += nbits;
-	}
-
-	/* Shouldn't be any more regions */
-	ASSERT(i == item->ri_total);
-
-	xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
-}
-
-/*
- * Perform a dquot buffer recovery.
- * Simple algorithm: if we have found a QUOTAOFF log item of the same type
- * (ie. USR or GRP), then just toss this buffer away; don't recover it.
- * Else, treat it as a regular buffer and do recovery.
- *
- * Return false if the buffer was tossed and true if we recovered the buffer to
- * indicate to the caller if the buffer needs writing.
- */
-STATIC bool
-xlog_recover_do_dquot_buffer(
-	struct xfs_mount		*mp,
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	struct xfs_buf			*bp,
-	struct xfs_buf_log_format	*buf_f)
-{
-	uint			type;
-
-	trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
-
-	/*
-	 * Filesystems are required to send in quota flags at mount time.
-	 */
-	if (!mp->m_qflags)
-		return false;
-
-	type = 0;
-	if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
-		type |= XFS_DQ_USER;
-	if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
-		type |= XFS_DQ_PROJ;
-	if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
-		type |= XFS_DQ_GROUP;
-	/*
-	 * This type of quotas was turned off, so ignore this buffer
-	 */
-	if (log->l_quotaoffs_flag & type)
-		return false;
-
-	xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
-	return true;
-}
-
-/*
- * This routine replays a modification made to a buffer at runtime.
- * There are actually two types of buffer, regular and inode, which
- * are handled differently.  Inode buffers are handled differently
- * in that we only recover a specific set of data from them, namely
- * the inode di_next_unlinked fields.  This is because all other inode
- * data is actually logged via inode records and any data we replay
- * here which overlaps that may be stale.
- *
- * When meta-data buffers are freed at run time we log a buffer item
- * with the XFS_BLF_CANCEL bit set to indicate that previous copies
- * of the buffer in the log should not be replayed at recovery time.
- * This is so that if the blocks covered by the buffer are reused for
- * file data before we crash we don't end up replaying old, freed
- * meta-data into a user's file.
- *
- * To handle the cancellation of buffer log items, we make two passes
- * over the log during recovery.  During the first we build a table of
- * those buffers which have been cancelled, and during the second we
- * only replay those buffers which do not have corresponding cancel
- * records in the table.  See xlog_recover_buffer_pass[1,2] above
- * for more details on the implementation of the table of cancel records.
- */
-STATIC int
-xlog_recover_buffer_pass2(
-	struct xlog			*log,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			current_lsn)
-{
-	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
-	xfs_mount_t		*mp = log->l_mp;
-	xfs_buf_t		*bp;
-	int			error;
-	uint			buf_flags;
-	xfs_lsn_t		lsn;
-
-	/*
-	 * In this pass we only want to recover all the buffers which have
-	 * not been cancelled and are not cancellation buffers themselves.
-	 */
-	if (buf_f->blf_flags & XFS_BLF_CANCEL) {
-		if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno,
-				buf_f->blf_len))
-			goto cancelled;
-	} else {
-
-		if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno,
-				buf_f->blf_len))
-			goto cancelled;
-	}
-
-	trace_xfs_log_recover_buf_recover(log, buf_f);
-
-	buf_flags = 0;
-	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
-		buf_flags |= XBF_UNMAPPED;
-
-	error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
-			  buf_flags, &bp, NULL);
-	if (error)
-		return error;
-
-	/*
-	 * Recover the buffer only if we get an LSN from it and it's less than
-	 * the lsn of the transaction we are replaying.
-	 *
-	 * Note that we have to be extremely careful of readahead here.
-	 * Readahead does not attach verfiers to the buffers so if we don't
-	 * actually do any replay after readahead because of the LSN we found
-	 * in the buffer if more recent than that current transaction then we
-	 * need to attach the verifier directly. Failure to do so can lead to
-	 * future recovery actions (e.g. EFI and unlinked list recovery) can
-	 * operate on the buffers and they won't get the verifier attached. This
-	 * can lead to blocks on disk having the correct content but a stale
-	 * CRC.
-	 *
-	 * It is safe to assume these clean buffers are currently up to date.
-	 * If the buffer is dirtied by a later transaction being replayed, then
-	 * the verifier will be reset to match whatever recover turns that
-	 * buffer into.
-	 */
-	lsn = xlog_recover_get_buf_lsn(mp, bp);
-	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
-		trace_xfs_log_recover_buf_skip(log, buf_f);
-		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
-		goto out_release;
-	}
-
-	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
-		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
-		if (error)
-			goto out_release;
-	} else if (buf_f->blf_flags &
-		  (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
-		bool	dirty;
-
-		dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
-		if (!dirty)
-			goto out_release;
-	} else {
-		xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
-	}
-
-	/*
-	 * Perform delayed write on the buffer.  Asynchronous writes will be
-	 * slower when taking into account all the buffers to be flushed.
-	 *
-	 * Also make sure that only inode buffers with good sizes stay in
-	 * the buffer cache.  The kernel moves inodes in buffers of 1 block
-	 * or inode_cluster_size bytes, whichever is bigger.  The inode
-	 * buffers in the log can be a different size if the log was generated
-	 * by an older kernel using unclustered inode buffers or a newer kernel
-	 * running with a different inode cluster size.  Regardless, if the
-	 * the inode buffer size isn't max(blocksize, inode_cluster_size)
-	 * for *our* value of inode_cluster_size, then we need to keep
-	 * the buffer out of the buffer cache so that the buffer won't
-	 * overlap with future reads of those inodes.
-	 */
-	if (XFS_DINODE_MAGIC ==
-	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
-	    (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
-		xfs_buf_stale(bp);
-		error = xfs_bwrite(bp);
-	} else {
-		ASSERT(bp->b_mount == mp);
-		bp->b_iodone = xlog_recover_iodone;
-		xfs_buf_delwri_queue(bp, buffer_list);
-	}
-
-out_release:
-	xfs_buf_relse(bp);
-	return error;
-cancelled:
-	trace_xfs_log_recover_buf_cancel(log, buf_f);
-	return 0;
-}
-
 /*
  * Inode fork owner changes
  *
@@ -3846,10 +3061,11 @@ xlog_recover_commit_pass2(
 {
 	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
 
+	if (item->ri_ops && item->ri_ops->commit_pass2)
+		return item->ri_ops->commit_pass2(log, buffer_list, item,
+				trans->r_lsn);
+
 	switch (ITEM_TYPE(item)) {
-	case XFS_LI_BUF:
-		return xlog_recover_buffer_pass2(log, buffer_list, item,
-						 trans->r_lsn);
 	case XFS_LI_INODE:
 		return xlog_recover_inode_pass2(log, buffer_list, item,
 						 trans->r_lsn);


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 06/28] xfs: refactor log recovery inode item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (4 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  5:09   ` Chandan Babu R
  2020-05-06 15:10   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 07/28] xfs: refactor log recovery dquot " Darrick J. Wong
                   ` (21 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the log inode item pass2 commit code into the per-item source code
files and use the dispatch function to call it.  We do these one at a
time because there's a lot of code to move.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode_item_recover.c |  355 +++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_recover.c        |  355 ---------------------------------------
 2 files changed, 355 insertions(+), 355 deletions(-)


diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index a132cacd8d48..2bdba612aa71 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -20,6 +20,8 @@
 #include "xfs_error.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
+#include "xfs_icache.h"
+#include "xfs_bmap_btree.h"
 
 STATIC void
 xlog_recover_inode_ra_pass2(
@@ -39,7 +41,360 @@ xlog_recover_inode_ra_pass2(
 	}
 }
 
+/*
+ * Inode fork owner changes
+ *
+ * If we have been told that we have to reparent the inode fork, it's because an
+ * extent swap operation on a CRC enabled filesystem has been done and we are
+ * replaying it. We need to walk the BMBT of the appropriate fork and change the
+ * owners of it.
+ *
+ * The complexity here is that we don't have an inode context to work with, so
+ * after we've replayed the inode we need to instantiate one.  This is where the
+ * fun begins.
+ *
+ * We are in the middle of log recovery, so we can't run transactions. That
+ * means we cannot use cache coherent inode instantiation via xfs_iget(), as
+ * that will result in the corresponding iput() running the inode through
+ * xfs_inactive(). If we've just replayed an inode core that changes the link
+ * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
+ * transactions (bad!).
+ *
+ * So, to avoid this, we instantiate an inode directly from the inode core we've
+ * just recovered. We have the buffer still locked, and all we really need to
+ * instantiate is the inode core and the forks being modified. We can do this
+ * manually, then run the inode btree owner change, and then tear down the
+ * xfs_inode without having to run any transactions at all.
+ *
+ * Also, because we don't have a transaction context available here but need to
+ * gather all the buffers we modify for writeback so we pass the buffer_list
+ * instead for the operation to use.
+ */
+
+STATIC int
+xfs_recover_inode_owner_change(
+	struct xfs_mount	*mp,
+	struct xfs_dinode	*dip,
+	struct xfs_inode_log_format *in_f,
+	struct list_head	*buffer_list)
+{
+	struct xfs_inode	*ip;
+	int			error;
+
+	ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
+
+	ip = xfs_inode_alloc(mp, in_f->ilf_ino);
+	if (!ip)
+		return -ENOMEM;
+
+	/* instantiate the inode */
+	ASSERT(dip->di_version >= 3);
+	xfs_inode_from_disk(ip, dip);
+
+	error = xfs_iformat_fork(ip, dip);
+	if (error)
+		goto out_free_ip;
+
+	if (!xfs_inode_verify_forks(ip)) {
+		error = -EFSCORRUPTED;
+		goto out_free_ip;
+	}
+
+	if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
+		ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
+		error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
+					      ip->i_ino, buffer_list);
+		if (error)
+			goto out_free_ip;
+	}
+
+	if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
+		ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
+		error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
+					      ip->i_ino, buffer_list);
+		if (error)
+			goto out_free_ip;
+	}
+
+out_free_ip:
+	xfs_inode_free(ip);
+	return error;
+}
+
+STATIC int
+xlog_recover_inode_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_inode_log_format	*in_f;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_buf			*bp;
+	struct xfs_dinode		*dip;
+	int				len;
+	char				*src;
+	char				*dest;
+	int				error;
+	int				attr_index;
+	uint				fields;
+	struct xfs_log_dinode		*ldip;
+	uint				isize;
+	int				need_free = 0;
+
+	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
+		in_f = item->ri_buf[0].i_addr;
+	} else {
+		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
+		need_free = 1;
+		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
+		if (error)
+			goto error;
+	}
+
+	/*
+	 * Inode buffers can be freed, look out for it,
+	 * and do not replay the inode.
+	 */
+	if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
+		error = 0;
+		trace_xfs_log_recover_inode_cancel(log, in_f);
+		goto error;
+	}
+	trace_xfs_log_recover_inode_recover(log, in_f);
+
+	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
+			0, &bp, &xfs_inode_buf_ops);
+	if (error)
+		goto error;
+	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
+	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
+
+	/*
+	 * Make sure the place we're flushing out to really looks
+	 * like an inode!
+	 */
+	if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
+		xfs_alert(mp,
+	"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
+			__func__, dip, bp, in_f->ilf_ino);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+	ldip = item->ri_buf[1].i_addr;
+	if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
+		xfs_alert(mp,
+			"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
+			__func__, item, in_f->ilf_ino);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+
+	/*
+	 * If the inode has an LSN in it, recover the inode only if it's less
+	 * than the lsn of the transaction we are replaying. Note: we still
+	 * need to replay an owner change even though the inode is more recent
+	 * than the transaction as there is no guarantee that all the btree
+	 * blocks are more recent than this transaction, too.
+	 */
+	if (dip->di_version >= 3) {
+		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
+
+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+			trace_xfs_log_recover_inode_skip(log, in_f);
+			error = 0;
+			goto out_owner_change;
+		}
+	}
+
+	/*
+	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
+	 * are transactional and if ordering is necessary we can determine that
+	 * more accurately by the LSN field in the V3 inode core. Don't trust
+	 * the inode versions we might be changing them here - use the
+	 * superblock flag to determine whether we need to look at di_flushiter
+	 * to skip replay when the on disk inode is newer than the log one
+	 */
+	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
+	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+		/*
+		 * Deal with the wrap case, DI_MAX_FLUSH is less
+		 * than smaller numbers
+		 */
+		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+			/* do nothing */
+		} else {
+			trace_xfs_log_recover_inode_skip(log, in_f);
+			error = 0;
+			goto out_release;
+		}
+	}
+
+	/* Take the opportunity to reset the flush iteration count */
+	ldip->di_flushiter = 0;
+
+	if (unlikely(S_ISREG(ldip->di_mode))) {
+		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
+			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
+					 XFS_ERRLEVEL_LOW, mp, ldip,
+					 sizeof(*ldip));
+			xfs_alert(mp,
+		"%s: Bad regular inode log record, rec ptr "PTR_FMT", "
+		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
+				__func__, item, dip, bp, in_f->ilf_ino);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
+		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
+		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
+			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
+					     XFS_ERRLEVEL_LOW, mp, ldip,
+					     sizeof(*ldip));
+			xfs_alert(mp,
+		"%s: Bad dir inode log record, rec ptr "PTR_FMT", "
+		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
+				__func__, item, dip, bp, in_f->ilf_ino);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	}
+	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+		xfs_alert(mp,
+	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
+			__func__, item, dip, bp, in_f->ilf_ino,
+			ldip->di_nextents + ldip->di_anextents,
+			ldip->di_nblocks);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+		xfs_alert(mp,
+	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+	"dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
+			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+	isize = xfs_log_dinode_size(mp);
+	if (unlikely(item->ri_buf[1].i_len > isize)) {
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+		xfs_alert(mp,
+			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
+			__func__, item->ri_buf[1].i_len, item);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+
+	/* recover the log dinode inode into the on disk inode */
+	xfs_log_dinode_to_disk(ldip, dip);
+
+	fields = in_f->ilf_fields;
+	if (fields & XFS_ILOG_DEV)
+		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
+
+	if (in_f->ilf_size == 2)
+		goto out_owner_change;
+	len = item->ri_buf[2].i_len;
+	src = item->ri_buf[2].i_addr;
+	ASSERT(in_f->ilf_size <= 4);
+	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
+	ASSERT(!(fields & XFS_ILOG_DFORK) ||
+	       (len == in_f->ilf_dsize));
+
+	switch (fields & XFS_ILOG_DFORK) {
+	case XFS_ILOG_DDATA:
+	case XFS_ILOG_DEXT:
+		memcpy(XFS_DFORK_DPTR(dip), src, len);
+		break;
+
+	case XFS_ILOG_DBROOT:
+		xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
+				 (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
+				 XFS_DFORK_DSIZE(dip, mp));
+		break;
+
+	default:
+		/*
+		 * There are no data fork flags set.
+		 */
+		ASSERT((fields & XFS_ILOG_DFORK) == 0);
+		break;
+	}
+
+	/*
+	 * If we logged any attribute data, recover it.  There may or
+	 * may not have been any other non-core data logged in this
+	 * transaction.
+	 */
+	if (in_f->ilf_fields & XFS_ILOG_AFORK) {
+		if (in_f->ilf_fields & XFS_ILOG_DFORK) {
+			attr_index = 3;
+		} else {
+			attr_index = 2;
+		}
+		len = item->ri_buf[attr_index].i_len;
+		src = item->ri_buf[attr_index].i_addr;
+		ASSERT(len == in_f->ilf_asize);
+
+		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
+		case XFS_ILOG_ADATA:
+		case XFS_ILOG_AEXT:
+			dest = XFS_DFORK_APTR(dip);
+			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
+			memcpy(dest, src, len);
+			break;
+
+		case XFS_ILOG_ABROOT:
+			dest = XFS_DFORK_APTR(dip);
+			xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
+					 len, (struct xfs_bmdr_block *)dest,
+					 XFS_DFORK_ASIZE(dip, mp));
+			break;
+
+		default:
+			xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
+			ASSERT(0);
+			error = -EFSCORRUPTED;
+			goto out_release;
+		}
+	}
+
+out_owner_change:
+	/* Recover the swapext owner change unless inode has been deleted */
+	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
+	    (dip->di_mode != 0))
+		error = xfs_recover_inode_owner_change(mp, dip, in_f,
+						       buffer_list);
+	/* re-generate the checksum. */
+	xfs_dinode_calc_crc(log->l_mp, dip);
+
+	ASSERT(bp->b_mount == mp);
+	bp->b_iodone = xlog_recover_iodone;
+	xfs_buf_delwri_queue(bp, buffer_list);
+
+out_release:
+	xfs_buf_relse(bp);
+error:
+	if (need_free)
+		kmem_free(in_f);
+	return error;
+}
+
 const struct xlog_recover_item_ops xlog_inode_item_ops = {
 	.item_type		= XFS_LI_INODE,
 	.ra_pass2		= xlog_recover_inode_ra_pass2,
+	.commit_pass2		= xlog_recover_inode_commit_pass2,
 };
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d65dc3895a62..cb5902550e8c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2034,358 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * Inode fork owner changes
- *
- * If we have been told that we have to reparent the inode fork, it's because an
- * extent swap operation on a CRC enabled filesystem has been done and we are
- * replaying it. We need to walk the BMBT of the appropriate fork and change the
- * owners of it.
- *
- * The complexity here is that we don't have an inode context to work with, so
- * after we've replayed the inode we need to instantiate one.  This is where the
- * fun begins.
- *
- * We are in the middle of log recovery, so we can't run transactions. That
- * means we cannot use cache coherent inode instantiation via xfs_iget(), as
- * that will result in the corresponding iput() running the inode through
- * xfs_inactive(). If we've just replayed an inode core that changes the link
- * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
- * transactions (bad!).
- *
- * So, to avoid this, we instantiate an inode directly from the inode core we've
- * just recovered. We have the buffer still locked, and all we really need to
- * instantiate is the inode core and the forks being modified. We can do this
- * manually, then run the inode btree owner change, and then tear down the
- * xfs_inode without having to run any transactions at all.
- *
- * Also, because we don't have a transaction context available here but need to
- * gather all the buffers we modify for writeback so we pass the buffer_list
- * instead for the operation to use.
- */
-
-STATIC int
-xfs_recover_inode_owner_change(
-	struct xfs_mount	*mp,
-	struct xfs_dinode	*dip,
-	struct xfs_inode_log_format *in_f,
-	struct list_head	*buffer_list)
-{
-	struct xfs_inode	*ip;
-	int			error;
-
-	ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
-
-	ip = xfs_inode_alloc(mp, in_f->ilf_ino);
-	if (!ip)
-		return -ENOMEM;
-
-	/* instantiate the inode */
-	ASSERT(dip->di_version >= 3);
-	xfs_inode_from_disk(ip, dip);
-
-	error = xfs_iformat_fork(ip, dip);
-	if (error)
-		goto out_free_ip;
-
-	if (!xfs_inode_verify_forks(ip)) {
-		error = -EFSCORRUPTED;
-		goto out_free_ip;
-	}
-
-	if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
-		ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
-		error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
-					      ip->i_ino, buffer_list);
-		if (error)
-			goto out_free_ip;
-	}
-
-	if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
-		ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
-		error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
-					      ip->i_ino, buffer_list);
-		if (error)
-			goto out_free_ip;
-	}
-
-out_free_ip:
-	xfs_inode_free(ip);
-	return error;
-}
-
-STATIC int
-xlog_recover_inode_pass2(
-	struct xlog			*log,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			current_lsn)
-{
-	struct xfs_inode_log_format	*in_f;
-	xfs_mount_t		*mp = log->l_mp;
-	xfs_buf_t		*bp;
-	xfs_dinode_t		*dip;
-	int			len;
-	char			*src;
-	char			*dest;
-	int			error;
-	int			attr_index;
-	uint			fields;
-	struct xfs_log_dinode	*ldip;
-	uint			isize;
-	int			need_free = 0;
-
-	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
-		in_f = item->ri_buf[0].i_addr;
-	} else {
-		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
-		need_free = 1;
-		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
-		if (error)
-			goto error;
-	}
-
-	/*
-	 * Inode buffers can be freed, look out for it,
-	 * and do not replay the inode.
-	 */
-	if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
-		error = 0;
-		trace_xfs_log_recover_inode_cancel(log, in_f);
-		goto error;
-	}
-	trace_xfs_log_recover_inode_recover(log, in_f);
-
-	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
-			0, &bp, &xfs_inode_buf_ops);
-	if (error)
-		goto error;
-	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
-	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
-
-	/*
-	 * Make sure the place we're flushing out to really looks
-	 * like an inode!
-	 */
-	if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
-		xfs_alert(mp,
-	"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
-			__func__, dip, bp, in_f->ilf_ino);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-	ldip = item->ri_buf[1].i_addr;
-	if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
-		xfs_alert(mp,
-			"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
-			__func__, item, in_f->ilf_ino);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-
-	/*
-	 * If the inode has an LSN in it, recover the inode only if it's less
-	 * than the lsn of the transaction we are replaying. Note: we still
-	 * need to replay an owner change even though the inode is more recent
-	 * than the transaction as there is no guarantee that all the btree
-	 * blocks are more recent than this transaction, too.
-	 */
-	if (dip->di_version >= 3) {
-		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
-
-		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
-			trace_xfs_log_recover_inode_skip(log, in_f);
-			error = 0;
-			goto out_owner_change;
-		}
-	}
-
-	/*
-	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
-	 * are transactional and if ordering is necessary we can determine that
-	 * more accurately by the LSN field in the V3 inode core. Don't trust
-	 * the inode versions we might be changing them here - use the
-	 * superblock flag to determine whether we need to look at di_flushiter
-	 * to skip replay when the on disk inode is newer than the log one
-	 */
-	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
-	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
-		/*
-		 * Deal with the wrap case, DI_MAX_FLUSH is less
-		 * than smaller numbers
-		 */
-		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
-		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
-			/* do nothing */
-		} else {
-			trace_xfs_log_recover_inode_skip(log, in_f);
-			error = 0;
-			goto out_release;
-		}
-	}
-
-	/* Take the opportunity to reset the flush iteration count */
-	ldip->di_flushiter = 0;
-
-	if (unlikely(S_ISREG(ldip->di_mode))) {
-		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
-		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
-			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
-					 XFS_ERRLEVEL_LOW, mp, ldip,
-					 sizeof(*ldip));
-			xfs_alert(mp,
-		"%s: Bad regular inode log record, rec ptr "PTR_FMT", "
-		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
-				__func__, item, dip, bp, in_f->ilf_ino);
-			error = -EFSCORRUPTED;
-			goto out_release;
-		}
-	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
-		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
-		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
-		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
-			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
-					     XFS_ERRLEVEL_LOW, mp, ldip,
-					     sizeof(*ldip));
-			xfs_alert(mp,
-		"%s: Bad dir inode log record, rec ptr "PTR_FMT", "
-		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
-				__func__, item, dip, bp, in_f->ilf_ino);
-			error = -EFSCORRUPTED;
-			goto out_release;
-		}
-	}
-	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
-				     XFS_ERRLEVEL_LOW, mp, ldip,
-				     sizeof(*ldip));
-		xfs_alert(mp,
-	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
-	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
-			__func__, item, dip, bp, in_f->ilf_ino,
-			ldip->di_nextents + ldip->di_anextents,
-			ldip->di_nblocks);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
-				     XFS_ERRLEVEL_LOW, mp, ldip,
-				     sizeof(*ldip));
-		xfs_alert(mp,
-	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
-	"dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
-			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-	isize = xfs_log_dinode_size(mp);
-	if (unlikely(item->ri_buf[1].i_len > isize)) {
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
-				     XFS_ERRLEVEL_LOW, mp, ldip,
-				     sizeof(*ldip));
-		xfs_alert(mp,
-			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
-			__func__, item->ri_buf[1].i_len, item);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
-
-	/* recover the log dinode inode into the on disk inode */
-	xfs_log_dinode_to_disk(ldip, dip);
-
-	fields = in_f->ilf_fields;
-	if (fields & XFS_ILOG_DEV)
-		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
-
-	if (in_f->ilf_size == 2)
-		goto out_owner_change;
-	len = item->ri_buf[2].i_len;
-	src = item->ri_buf[2].i_addr;
-	ASSERT(in_f->ilf_size <= 4);
-	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
-	ASSERT(!(fields & XFS_ILOG_DFORK) ||
-	       (len == in_f->ilf_dsize));
-
-	switch (fields & XFS_ILOG_DFORK) {
-	case XFS_ILOG_DDATA:
-	case XFS_ILOG_DEXT:
-		memcpy(XFS_DFORK_DPTR(dip), src, len);
-		break;
-
-	case XFS_ILOG_DBROOT:
-		xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
-				 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
-				 XFS_DFORK_DSIZE(dip, mp));
-		break;
-
-	default:
-		/*
-		 * There are no data fork flags set.
-		 */
-		ASSERT((fields & XFS_ILOG_DFORK) == 0);
-		break;
-	}
-
-	/*
-	 * If we logged any attribute data, recover it.  There may or
-	 * may not have been any other non-core data logged in this
-	 * transaction.
-	 */
-	if (in_f->ilf_fields & XFS_ILOG_AFORK) {
-		if (in_f->ilf_fields & XFS_ILOG_DFORK) {
-			attr_index = 3;
-		} else {
-			attr_index = 2;
-		}
-		len = item->ri_buf[attr_index].i_len;
-		src = item->ri_buf[attr_index].i_addr;
-		ASSERT(len == in_f->ilf_asize);
-
-		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
-		case XFS_ILOG_ADATA:
-		case XFS_ILOG_AEXT:
-			dest = XFS_DFORK_APTR(dip);
-			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
-			memcpy(dest, src, len);
-			break;
-
-		case XFS_ILOG_ABROOT:
-			dest = XFS_DFORK_APTR(dip);
-			xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
-					 len, (xfs_bmdr_block_t*)dest,
-					 XFS_DFORK_ASIZE(dip, mp));
-			break;
-
-		default:
-			xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
-			ASSERT(0);
-			error = -EFSCORRUPTED;
-			goto out_release;
-		}
-	}
-
-out_owner_change:
-	/* Recover the swapext owner change unless inode has been deleted */
-	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
-	    (dip->di_mode != 0))
-		error = xfs_recover_inode_owner_change(mp, dip, in_f,
-						       buffer_list);
-	/* re-generate the checksum. */
-	xfs_dinode_calc_crc(log->l_mp, dip);
-
-	ASSERT(bp->b_mount == mp);
-	bp->b_iodone = xlog_recover_iodone;
-	xfs_buf_delwri_queue(bp, buffer_list);
-
-out_release:
-	xfs_buf_relse(bp);
-error:
-	if (need_free)
-		kmem_free(in_f);
-	return error;
-}
-
 /*
  * Recover a dquot record
  */
@@ -3066,9 +2714,6 @@ xlog_recover_commit_pass2(
 				trans->r_lsn);
 
 	switch (ITEM_TYPE(item)) {
-	case XFS_LI_INODE:
-		return xlog_recover_inode_pass2(log, buffer_list, item,
-						 trans->r_lsn);
 	case XFS_LI_EFI:
 		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
 	case XFS_LI_EFD:


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 07/28] xfs: refactor log recovery dquot item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (5 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 06/28] xfs: refactor log recovery inode " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  5:13   ` Chandan Babu R
  2020-05-06 15:11   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 08/28] xfs: refactor log recovery icreate " Darrick J. Wong
                   ` (20 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the log dquot item pass2 commit code into the per-item source code
files and use the dispatch function to call it.  We do these one at a
time because there's a lot of code to move.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_dquot_item_recover.c |  109 ++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_recover.c        |  112 ---------------------------------------
 2 files changed, 109 insertions(+), 112 deletions(-)


diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index ebc44c1bc2b1..07ff943972a3 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -53,9 +53,118 @@ xlog_recover_dquot_ra_pass2(
 			&xfs_dquot_buf_ra_ops);
 }
 
+/*
+ * Recover a dquot record
+ */
+STATIC int
+xlog_recover_dquot_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
+{
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_buf			*bp;
+	struct xfs_disk_dquot		*ddq, *recddq;
+	struct xfs_dq_logformat		*dq_f;
+	xfs_failaddr_t			fa;
+	int				error;
+	uint				type;
+
+	/*
+	 * Filesystems are required to send in quota flags at mount time.
+	 */
+	if (mp->m_qflags == 0)
+		return 0;
+
+	recddq = item->ri_buf[1].i_addr;
+	if (recddq == NULL) {
+		xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
+		return -EFSCORRUPTED;
+	}
+	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
+		xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
+			item->ri_buf[1].i_len, __func__);
+		return -EFSCORRUPTED;
+	}
+
+	/*
+	 * This type of quotas was turned off, so ignore this record.
+	 */
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	ASSERT(type);
+	if (log->l_quotaoffs_flag & type)
+		return 0;
+
+	/*
+	 * At this point we know that quota was _not_ turned off.
+	 * Since the mount flags are not indicating to us otherwise, this
+	 * must mean that quota is on, and the dquot needs to be replayed.
+	 * Remember that we may not have fully recovered the superblock yet,
+	 * so we can't do the usual trick of looking at the SB quota bits.
+	 *
+	 * The other possibility, of course, is that the quota subsystem was
+	 * removed since the last mount - ENOSYS.
+	 */
+	dq_f = item->ri_buf[0].i_addr;
+	ASSERT(dq_f);
+	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
+	if (fa) {
+		xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
+				dq_f->qlf_id, fa);
+		return -EFSCORRUPTED;
+	}
+	ASSERT(dq_f->qlf_len == 1);
+
+	/*
+	 * At this point we are assuming that the dquots have been allocated
+	 * and hence the buffer has valid dquots stamped in it. It should,
+	 * therefore, pass verifier validation. If the dquot is bad, then the
+	 * we'll return an error here, so we don't need to specifically check
+	 * the dquot in the buffer after the verifier has run.
+	 */
+	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
+				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
+				   &xfs_dquot_buf_ops);
+	if (error)
+		return error;
+
+	ASSERT(bp);
+	ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
+
+	/*
+	 * If the dquot has an LSN in it, recover the dquot only if it's less
+	 * than the lsn of the transaction we are replaying.
+	 */
+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
+		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
+
+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+			goto out_release;
+		}
+	}
+
+	memcpy(ddq, recddq, item->ri_buf[1].i_len);
+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+				 XFS_DQUOT_CRC_OFF);
+	}
+
+	ASSERT(dq_f->qlf_size == 2);
+	ASSERT(bp->b_mount == mp);
+	bp->b_iodone = xlog_recover_iodone;
+	xfs_buf_delwri_queue(bp, buffer_list);
+
+out_release:
+	xfs_buf_relse(bp);
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_dquot_item_ops = {
 	.item_type		= XFS_LI_DQUOT,
 	.ra_pass2		= xlog_recover_dquot_ra_pass2,
+	.commit_pass2		= xlog_recover_dquot_commit_pass2,
 };
 
 /*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cb5902550e8c..ea2a53b614c7 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2034,115 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * Recover a dquot record
- */
-STATIC int
-xlog_recover_dquot_pass2(
-	struct xlog			*log,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			current_lsn)
-{
-	xfs_mount_t		*mp = log->l_mp;
-	xfs_buf_t		*bp;
-	struct xfs_disk_dquot	*ddq, *recddq;
-	xfs_failaddr_t		fa;
-	int			error;
-	xfs_dq_logformat_t	*dq_f;
-	uint			type;
-
-
-	/*
-	 * Filesystems are required to send in quota flags at mount time.
-	 */
-	if (mp->m_qflags == 0)
-		return 0;
-
-	recddq = item->ri_buf[1].i_addr;
-	if (recddq == NULL) {
-		xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
-		return -EFSCORRUPTED;
-	}
-	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
-		xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
-			item->ri_buf[1].i_len, __func__);
-		return -EFSCORRUPTED;
-	}
-
-	/*
-	 * This type of quotas was turned off, so ignore this record.
-	 */
-	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
-	ASSERT(type);
-	if (log->l_quotaoffs_flag & type)
-		return 0;
-
-	/*
-	 * At this point we know that quota was _not_ turned off.
-	 * Since the mount flags are not indicating to us otherwise, this
-	 * must mean that quota is on, and the dquot needs to be replayed.
-	 * Remember that we may not have fully recovered the superblock yet,
-	 * so we can't do the usual trick of looking at the SB quota bits.
-	 *
-	 * The other possibility, of course, is that the quota subsystem was
-	 * removed since the last mount - ENOSYS.
-	 */
-	dq_f = item->ri_buf[0].i_addr;
-	ASSERT(dq_f);
-	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
-	if (fa) {
-		xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
-				dq_f->qlf_id, fa);
-		return -EFSCORRUPTED;
-	}
-	ASSERT(dq_f->qlf_len == 1);
-
-	/*
-	 * At this point we are assuming that the dquots have been allocated
-	 * and hence the buffer has valid dquots stamped in it. It should,
-	 * therefore, pass verifier validation. If the dquot is bad, then the
-	 * we'll return an error here, so we don't need to specifically check
-	 * the dquot in the buffer after the verifier has run.
-	 */
-	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
-				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
-				   &xfs_dquot_buf_ops);
-	if (error)
-		return error;
-
-	ASSERT(bp);
-	ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
-
-	/*
-	 * If the dquot has an LSN in it, recover the dquot only if it's less
-	 * than the lsn of the transaction we are replaying.
-	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
-		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
-		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
-
-		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
-			goto out_release;
-		}
-	}
-
-	memcpy(ddq, recddq, item->ri_buf[1].i_len);
-	if (xfs_sb_version_hascrc(&mp->m_sb)) {
-		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
-				 XFS_DQUOT_CRC_OFF);
-	}
-
-	ASSERT(dq_f->qlf_size == 2);
-	ASSERT(bp->b_mount == mp);
-	bp->b_iodone = xlog_recover_iodone;
-	xfs_buf_delwri_queue(bp, buffer_list);
-
-out_release:
-	xfs_buf_relse(bp);
-	return 0;
-}
-
 /*
  * This routine is called to create an in-core extent free intent
  * item from the efi format structure which was logged on disk.
@@ -2730,9 +2621,6 @@ xlog_recover_commit_pass2(
 		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
 	case XFS_LI_BUD:
 		return xlog_recover_bud_pass2(log, item);
-	case XFS_LI_DQUOT:
-		return xlog_recover_dquot_pass2(log, buffer_list, item,
-						trans->r_lsn);
 	case XFS_LI_ICREATE:
 		return xlog_recover_do_icreate_pass2(log, buffer_list, item);
 	case XFS_LI_QUOTAOFF:


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 08/28] xfs: refactor log recovery icreate item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (6 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 07/28] xfs: refactor log recovery dquot " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  6:10   ` Chandan Babu R
  2020-05-06 15:11   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 09/28] xfs: refactor log recovery EFI " Darrick J. Wong
                   ` (19 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the log icreate item pass2 commit code into the per-item source code
files and use the dispatch function to call it.  We do these one at a
time because there's a lot of code to move.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_icreate_item.c |  132 +++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_recover.c  |  126 -------------------------------------------
 2 files changed, 132 insertions(+), 126 deletions(-)


diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 366c1e722a29..287a9e5c7d75 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -6,13 +6,19 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
+#include "xfs_format.h"
 #include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_icreate_item.h"
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
+#include "xfs_ialloc.h"
+#include "xfs_trace.h"
 
 kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */
 
@@ -123,7 +129,133 @@ xlog_recover_icreate_reorder(
 	return XLOG_REORDER_BUFFER_LIST;
 }
 
+/*
+ * This routine is called when an inode create format structure is found in a
+ * committed transaction in the log.  It's purpose is to initialise the inodes
+ * being allocated on disk. This requires us to get inode cluster buffers that
+ * match the range to be initialised, stamped with inode templates and written
+ * by delayed write so that subsequent modifications will hit the cached buffer
+ * and only need writing out at the end of recovery.
+ */
+STATIC int
+xlog_recover_icreate_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_icreate_log		*icl;
+	struct xfs_ino_geometry		*igeo = M_IGEO(mp);
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
+	unsigned int			count;
+	unsigned int			isize;
+	xfs_agblock_t			length;
+	int				bb_per_cluster;
+	int				cancel_count;
+	int				nbufs;
+	int				i;
+
+	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
+	if (icl->icl_type != XFS_LI_ICREATE) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
+		return -EINVAL;
+	}
+
+	if (icl->icl_size != 1) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
+		return -EINVAL;
+	}
+
+	agno = be32_to_cpu(icl->icl_ag);
+	if (agno >= mp->m_sb.sb_agcount) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
+		return -EINVAL;
+	}
+	agbno = be32_to_cpu(icl->icl_agbno);
+	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
+		return -EINVAL;
+	}
+	isize = be32_to_cpu(icl->icl_isize);
+	if (isize != mp->m_sb.sb_inodesize) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
+		return -EINVAL;
+	}
+	count = be32_to_cpu(icl->icl_count);
+	if (!count) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
+		return -EINVAL;
+	}
+	length = be32_to_cpu(icl->icl_length);
+	if (!length || length >= mp->m_sb.sb_agblocks) {
+		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
+		return -EINVAL;
+	}
+
+	/*
+	 * The inode chunk is either full or sparse and we only support
+	 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
+	 */
+	if (length != igeo->ialloc_blks &&
+	    length != igeo->ialloc_min_blks) {
+		xfs_warn(log->l_mp,
+			 "%s: unsupported chunk length", __FUNCTION__);
+		return -EINVAL;
+	}
+
+	/* verify inode count is consistent with extent length */
+	if ((count >> mp->m_sb.sb_inopblog) != length) {
+		xfs_warn(log->l_mp,
+			 "%s: inconsistent inode count and chunk length",
+			 __FUNCTION__);
+		return -EINVAL;
+	}
+
+	/*
+	 * The icreate transaction can cover multiple cluster buffers and these
+	 * buffers could have been freed and reused. Check the individual
+	 * buffers for cancellation so we don't overwrite anything written after
+	 * a cancellation.
+	 */
+	bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
+	nbufs = length / igeo->blocks_per_cluster;
+	for (i = 0, cancel_count = 0; i < nbufs; i++) {
+		xfs_daddr_t	daddr;
+
+		daddr = XFS_AGB_TO_DADDR(mp, agno,
+				agbno + i * igeo->blocks_per_cluster);
+		if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster))
+			cancel_count++;
+	}
+
+	/*
+	 * We currently only use icreate for a single allocation at a time. This
+	 * means we should expect either all or none of the buffers to be
+	 * cancelled. Be conservative and skip replay if at least one buffer is
+	 * cancelled, but warn the user that something is awry if the buffers
+	 * are not consistent.
+	 *
+	 * XXX: This must be refined to only skip cancelled clusters once we use
+	 * icreate for multiple chunk allocations.
+	 */
+	ASSERT(!cancel_count || cancel_count == nbufs);
+	if (cancel_count) {
+		if (cancel_count != nbufs)
+			xfs_warn(mp,
+	"WARNING: partial inode chunk cancellation, skipped icreate.");
+		trace_xfs_log_recover_icreate_cancel(log, icl);
+		return 0;
+	}
+
+	trace_xfs_log_recover_icreate_recover(log, icl);
+	return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
+				     length, be32_to_cpu(icl->icl_gen));
+}
+
 const struct xlog_recover_item_ops xlog_icreate_item_ops = {
 	.item_type		= XFS_LI_ICREATE,
 	.reorder		= xlog_recover_icreate_reorder,
+	.commit_pass2		= xlog_recover_icreate_commit_pass2,
 };
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ea2a53b614c7..86bf2da28dcd 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2467,130 +2467,6 @@ xlog_recover_bud_pass2(
 	return 0;
 }
 
-/*
- * This routine is called when an inode create format structure is found in a
- * committed transaction in the log.  It's purpose is to initialise the inodes
- * being allocated on disk. This requires us to get inode cluster buffers that
- * match the range to be initialised, stamped with inode templates and written
- * by delayed write so that subsequent modifications will hit the cached buffer
- * and only need writing out at the end of recovery.
- */
-STATIC int
-xlog_recover_do_icreate_pass2(
-	struct xlog		*log,
-	struct list_head	*buffer_list,
-	struct xlog_recover_item *item)
-{
-	struct xfs_mount	*mp = log->l_mp;
-	struct xfs_icreate_log	*icl;
-	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
-	xfs_agnumber_t		agno;
-	xfs_agblock_t		agbno;
-	unsigned int		count;
-	unsigned int		isize;
-	xfs_agblock_t		length;
-	int			bb_per_cluster;
-	int			cancel_count;
-	int			nbufs;
-	int			i;
-
-	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
-	if (icl->icl_type != XFS_LI_ICREATE) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
-		return -EINVAL;
-	}
-
-	if (icl->icl_size != 1) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
-		return -EINVAL;
-	}
-
-	agno = be32_to_cpu(icl->icl_ag);
-	if (agno >= mp->m_sb.sb_agcount) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
-		return -EINVAL;
-	}
-	agbno = be32_to_cpu(icl->icl_agbno);
-	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
-		return -EINVAL;
-	}
-	isize = be32_to_cpu(icl->icl_isize);
-	if (isize != mp->m_sb.sb_inodesize) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
-		return -EINVAL;
-	}
-	count = be32_to_cpu(icl->icl_count);
-	if (!count) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
-		return -EINVAL;
-	}
-	length = be32_to_cpu(icl->icl_length);
-	if (!length || length >= mp->m_sb.sb_agblocks) {
-		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
-		return -EINVAL;
-	}
-
-	/*
-	 * The inode chunk is either full or sparse and we only support
-	 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
-	 */
-	if (length != igeo->ialloc_blks &&
-	    length != igeo->ialloc_min_blks) {
-		xfs_warn(log->l_mp,
-			 "%s: unsupported chunk length", __FUNCTION__);
-		return -EINVAL;
-	}
-
-	/* verify inode count is consistent with extent length */
-	if ((count >> mp->m_sb.sb_inopblog) != length) {
-		xfs_warn(log->l_mp,
-			 "%s: inconsistent inode count and chunk length",
-			 __FUNCTION__);
-		return -EINVAL;
-	}
-
-	/*
-	 * The icreate transaction can cover multiple cluster buffers and these
-	 * buffers could have been freed and reused. Check the individual
-	 * buffers for cancellation so we don't overwrite anything written after
-	 * a cancellation.
-	 */
-	bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
-	nbufs = length / igeo->blocks_per_cluster;
-	for (i = 0, cancel_count = 0; i < nbufs; i++) {
-		xfs_daddr_t	daddr;
-
-		daddr = XFS_AGB_TO_DADDR(mp, agno,
-				agbno + i * igeo->blocks_per_cluster);
-		if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster))
-			cancel_count++;
-	}
-
-	/*
-	 * We currently only use icreate for a single allocation at a time. This
-	 * means we should expect either all or none of the buffers to be
-	 * cancelled. Be conservative and skip replay if at least one buffer is
-	 * cancelled, but warn the user that something is awry if the buffers
-	 * are not consistent.
-	 *
-	 * XXX: This must be refined to only skip cancelled clusters once we use
-	 * icreate for multiple chunk allocations.
-	 */
-	ASSERT(!cancel_count || cancel_count == nbufs);
-	if (cancel_count) {
-		if (cancel_count != nbufs)
-			xfs_warn(mp,
-	"WARNING: partial inode chunk cancellation, skipped icreate.");
-		trace_xfs_log_recover_icreate_cancel(log, icl);
-		return 0;
-	}
-
-	trace_xfs_log_recover_icreate_recover(log, icl);
-	return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
-				     length, be32_to_cpu(icl->icl_gen));
-}
-
 STATIC int
 xlog_recover_commit_pass2(
 	struct xlog			*log,
@@ -2621,8 +2497,6 @@ xlog_recover_commit_pass2(
 		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
 	case XFS_LI_BUD:
 		return xlog_recover_bud_pass2(log, item);
-	case XFS_LI_ICREATE:
-		return xlog_recover_do_icreate_pass2(log, buffer_list, item);
 	case XFS_LI_QUOTAOFF:
 		/* nothing to do in pass2 */
 		return 0;


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 09/28] xfs: refactor log recovery EFI item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (7 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 08/28] xfs: refactor log recovery icreate " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  6:46   ` Chandan Babu R
  2020-05-06 15:12   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 10/28] xfs: refactor log recovery RUI " Darrick J. Wong
                   ` (18 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the extent free intent and intent-done pass2 commit code into the
per-item source code files and use dispatch functions to call them.  We
do these one at a time because there's a lot of code to move.  No
functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_extfree_item.c |  107 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_extfree_item.h |    4 --
 fs/xfs/xfs_log_recover.c  |  100 ------------------------------------------
 3 files changed, 104 insertions(+), 107 deletions(-)


diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index b43bb087aef3..dca098660753 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -22,6 +22,7 @@
 #include "xfs_bmap.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_efi_zone;
@@ -32,7 +33,7 @@ static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
 	return container_of(lip, struct xfs_efi_log_item, efi_item);
 }
 
-void
+STATIC void
 xfs_efi_item_free(
 	struct xfs_efi_log_item	*efip)
 {
@@ -151,7 +152,7 @@ static const struct xfs_item_ops xfs_efi_item_ops = {
 /*
  * Allocate and initialize an efi item with the given number of extents.
  */
-struct xfs_efi_log_item *
+STATIC struct xfs_efi_log_item *
 xfs_efi_init(
 	struct xfs_mount	*mp,
 	uint			nextents)
@@ -185,7 +186,7 @@ xfs_efi_init(
  * one of which will be the native format for this kernel.
  * It will handle the conversion of formats if necessary.
  */
-int
+STATIC int
 xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
 {
 	xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
@@ -646,10 +647,110 @@ xfs_efi_recover(
 	return error;
 }
 
+/*
+ * This routine is called to create an in-core extent free intent
+ * item from the efi format structure which was logged on disk.
+ * It allocates an in-core efi, copies the extents from the format
+ * structure into it, and adds the efi to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_extfree_intent_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_efi_log_item		*efip;
+	struct xfs_efi_log_format	*efi_formatp;
+	int				error;
+
+	efi_formatp = item->ri_buf[0].i_addr;
+
+	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
+	error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
+	if (error) {
+		xfs_efi_item_free(efip);
+		return error;
+	}
+	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
+
+	spin_lock(&log->l_ailp->ail_lock);
+	/*
+	 * The EFI has two references. One for the EFD and one for EFI to ensure
+	 * it makes it into the AIL. Insert the EFI into the AIL directly and
+	 * drop the EFI reference. Note that xfs_trans_ail_update() drops the
+	 * AIL lock.
+	 */
+	xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
+	xfs_efi_release(efip);
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_extfree_intent_item_ops = {
 	.item_type		= XFS_LI_EFI,
+	.commit_pass2		= xlog_recover_extfree_intent_commit_pass2,
 };
 
+/*
+ * This routine is called when an EFD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding EFI if it
+ * was still in the log. To do this it searches the AIL for the EFI with an id
+ * equal to that in the EFD format structure. If we find it we drop the EFD
+ * reference, which removes the EFI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_extfree_done_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_ail_cursor		cur;
+	struct xfs_efd_log_format	*efd_formatp;
+	struct xfs_efi_log_item		*efip = NULL;
+	struct xfs_log_item		*lip;
+	struct xfs_ail			*ailp = log->l_ailp;
+	uint64_t			efi_id;
+
+	efd_formatp = item->ri_buf[0].i_addr;
+	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
+		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
+	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
+		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
+	efi_id = efd_formatp->efd_efi_id;
+
+	/*
+	 * Search for the EFI with the id in the EFD format structure in the
+	 * AIL.
+	 */
+	spin_lock(&ailp->ail_lock);
+	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+	while (lip != NULL) {
+		if (lip->li_type == XFS_LI_EFI) {
+			efip = (struct xfs_efi_log_item *)lip;
+			if (efip->efi_format.efi_id == efi_id) {
+				/*
+				 * Drop the EFD reference to the EFI. This
+				 * removes the EFI from the AIL and frees it.
+				 */
+				spin_unlock(&ailp->ail_lock);
+				xfs_efi_release(efip);
+				spin_lock(&ailp->ail_lock);
+				break;
+			}
+		}
+		lip = xfs_trans_ail_cursor_next(ailp, &cur);
+	}
+
+	xfs_trans_ail_cursor_done(&cur);
+	spin_unlock(&ailp->ail_lock);
+
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_extfree_done_item_ops = {
 	.item_type		= XFS_LI_EFD,
+	.commit_pass2		= xlog_recover_extfree_done_commit_pass2,
 };
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index a2a736a77fa9..876e3d237f48 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -78,10 +78,6 @@ struct xfs_efd_log_item {
 extern struct kmem_zone	*xfs_efi_zone;
 extern struct kmem_zone	*xfs_efd_zone;
 
-struct xfs_efi_log_item	*xfs_efi_init(struct xfs_mount *, uint);
-int			xfs_efi_copy_format(xfs_log_iovec_t *buf,
-					    xfs_efi_log_format_t *dst_efi_fmt);
-void			xfs_efi_item_free(struct xfs_efi_log_item *);
 void			xfs_efi_release(struct xfs_efi_log_item *);
 
 int			xfs_efi_recover(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 86bf2da28dcd..d7c5f75cf992 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2034,102 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * This routine is called to create an in-core extent free intent
- * item from the efi format structure which was logged on disk.
- * It allocates an in-core efi, copies the extents from the format
- * structure into it, and adds the efi to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_efi_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_efi_log_item		*efip;
-	struct xfs_efi_log_format	*efi_formatp;
-
-	efi_formatp = item->ri_buf[0].i_addr;
-
-	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-	error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
-	if (error) {
-		xfs_efi_item_free(efip);
-		return error;
-	}
-	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The EFI has two references. One for the EFD and one for EFI to ensure
-	 * it makes it into the AIL. Insert the EFI into the AIL directly and
-	 * drop the EFI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
-	xfs_efi_release(efip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an EFD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding EFI if it
- * was still in the log. To do this it searches the AIL for the EFI with an id
- * equal to that in the EFD format structure. If we find it we drop the EFD
- * reference, which removes the EFI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_efd_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	xfs_efd_log_format_t	*efd_formatp;
-	struct xfs_efi_log_item	*efip = NULL;
-	struct xfs_log_item	*lip;
-	uint64_t		efi_id;
-	struct xfs_ail_cursor	cur;
-	struct xfs_ail		*ailp = log->l_ailp;
-
-	efd_formatp = item->ri_buf[0].i_addr;
-	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
-		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
-	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
-		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
-	efi_id = efd_formatp->efd_efi_id;
-
-	/*
-	 * Search for the EFI with the id in the EFD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_EFI) {
-			efip = (struct xfs_efi_log_item *)lip;
-			if (efip->efi_format.efi_id == efi_id) {
-				/*
-				 * Drop the EFD reference to the EFI. This
-				 * removes the EFI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_efi_release(efip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
 /*
  * This routine is called to create an in-core extent rmap update
  * item from the rui format structure which was logged on disk.
@@ -2481,10 +2385,6 @@ xlog_recover_commit_pass2(
 				trans->r_lsn);
 
 	switch (ITEM_TYPE(item)) {
-	case XFS_LI_EFI:
-		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
-	case XFS_LI_EFD:
-		return xlog_recover_efd_pass2(log, item);
 	case XFS_LI_RUI:
 		return xlog_recover_rui_pass2(log, item, trans->r_lsn);
 	case XFS_LI_RUD:


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 10/28] xfs: refactor log recovery RUI item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (8 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 09/28] xfs: refactor log recovery EFI " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  7:02   ` Chandan Babu R
                     ` (2 more replies)
  2020-05-05  1:11 ` [PATCH 11/28] xfs: refactor log recovery CUI " Darrick J. Wong
                   ` (17 subsequent siblings)
  27 siblings, 3 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the rmap update intent and intent-done pass2 commit code into the
per-item source code files and use dispatch functions to call them.  We
do these one at a time because there's a lot of code to move.  No
functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_recover.c |   97 -------------------------------------------
 fs/xfs/xfs_rmap_item.c   |  104 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_rmap_item.h   |    4 --
 3 files changed, 101 insertions(+), 104 deletions(-)


diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d7c5f75cf992..0c0ce7bfc30e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2034,99 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * This routine is called to create an in-core extent rmap update
- * item from the rui format structure which was logged on disk.
- * It allocates an in-core rui, copies the extents from the format
- * structure into it, and adds the rui to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_rui_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_rui_log_item		*ruip;
-	struct xfs_rui_log_format	*rui_formatp;
-
-	rui_formatp = item->ri_buf[0].i_addr;
-
-	ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
-	error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
-	if (error) {
-		xfs_rui_item_free(ruip);
-		return error;
-	}
-	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The RUI has two references. One for the RUD and one for RUI to ensure
-	 * it makes it into the AIL. Insert the RUI into the AIL directly and
-	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
-	xfs_rui_release(ruip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an RUD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding RUI if it
- * was still in the log. To do this it searches the AIL for the RUI with an id
- * equal to that in the RUD format structure. If we find it we drop the RUD
- * reference, which removes the RUI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_rud_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_rud_log_format	*rud_formatp;
-	struct xfs_rui_log_item		*ruip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			rui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
-
-	rud_formatp = item->ri_buf[0].i_addr;
-	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
-	rui_id = rud_formatp->rud_rui_id;
-
-	/*
-	 * Search for the RUI with the id in the RUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_RUI) {
-			ruip = (struct xfs_rui_log_item *)lip;
-			if (ruip->rui_format.rui_id == rui_id) {
-				/*
-				 * Drop the RUD reference to the RUI. This
-				 * removes the RUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_rui_release(ruip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
 /*
  * Copy an CUI format buffer from the given buf, and into the destination
  * CUI format structure.  The CUI/CUD items were designed not to need any
@@ -2385,10 +2292,6 @@ xlog_recover_commit_pass2(
 				trans->r_lsn);
 
 	switch (ITEM_TYPE(item)) {
-	case XFS_LI_RUI:
-		return xlog_recover_rui_pass2(log, item, trans->r_lsn);
-	case XFS_LI_RUD:
-		return xlog_recover_rud_pass2(log, item);
 	case XFS_LI_CUI:
 		return xlog_recover_cui_pass2(log, item, trans->r_lsn);
 	case XFS_LI_CUD:
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 3eb538674cb9..c87f4e429c12 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -18,6 +18,7 @@
 #include "xfs_log.h"
 #include "xfs_rmap.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_rui_zone;
@@ -28,7 +29,7 @@ static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
 	return container_of(lip, struct xfs_rui_log_item, rui_item);
 }
 
-void
+STATIC void
 xfs_rui_item_free(
 	struct xfs_rui_log_item	*ruip)
 {
@@ -133,7 +134,7 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
 /*
  * Allocate and initialize an rui item with the given number of extents.
  */
-struct xfs_rui_log_item *
+STATIC struct xfs_rui_log_item *
 xfs_rui_init(
 	struct xfs_mount		*mp,
 	uint				nextents)
@@ -161,7 +162,7 @@ xfs_rui_init(
  * RUI format structure.  The RUI/RUD items were designed not to need any
  * special alignment handling.
  */
-int
+STATIC int
 xfs_rui_copy_format(
 	struct xfs_log_iovec		*buf,
 	struct xfs_rui_log_format	*dst_rui_fmt)
@@ -587,10 +588,107 @@ xfs_rui_recover(
 	return error;
 }
 
+/*
+ * This routine is called to create an in-core extent rmap update
+ * item from the rui format structure which was logged on disk.
+ * It allocates an in-core rui, copies the extents from the format
+ * structure into it, and adds the rui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_rmap_intent_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	int				error;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_rui_log_item		*ruip;
+	struct xfs_rui_log_format	*rui_formatp;
+
+	rui_formatp = item->ri_buf[0].i_addr;
+
+	ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+	error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
+	if (error) {
+		xfs_rui_item_free(ruip);
+		return error;
+	}
+	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
+
+	spin_lock(&log->l_ailp->ail_lock);
+	/*
+	 * The RUI has two references. One for the RUD and one for RUI to ensure
+	 * it makes it into the AIL. Insert the RUI into the AIL directly and
+	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
+	 * AIL lock.
+	 */
+	xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
+	xfs_rui_release(ruip);
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_rmap_intent_item_ops = {
 	.item_type		= XFS_LI_RUI,
+	.commit_pass2		= xlog_recover_rmap_intent_commit_pass2,
 };
 
+/*
+ * This routine is called when an RUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding RUI if it
+ * was still in the log. To do this it searches the AIL for the RUI with an id
+ * equal to that in the RUD format structure. If we find it we drop the RUD
+ * reference, which removes the RUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_rmap_done_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_rud_log_format	*rud_formatp;
+	struct xfs_rui_log_item		*ruip = NULL;
+	struct xfs_log_item		*lip;
+	uint64_t			rui_id;
+	struct xfs_ail_cursor		cur;
+	struct xfs_ail			*ailp = log->l_ailp;
+
+	rud_formatp = item->ri_buf[0].i_addr;
+	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+	rui_id = rud_formatp->rud_rui_id;
+
+	/*
+	 * Search for the RUI with the id in the RUD format structure in the
+	 * AIL.
+	 */
+	spin_lock(&ailp->ail_lock);
+	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+	while (lip != NULL) {
+		if (lip->li_type == XFS_LI_RUI) {
+			ruip = (struct xfs_rui_log_item *)lip;
+			if (ruip->rui_format.rui_id == rui_id) {
+				/*
+				 * Drop the RUD reference to the RUI. This
+				 * removes the RUI from the AIL and frees it.
+				 */
+				spin_unlock(&ailp->ail_lock);
+				xfs_rui_release(ruip);
+				spin_lock(&ailp->ail_lock);
+				break;
+			}
+		}
+		lip = xfs_trans_ail_cursor_next(ailp, &cur);
+	}
+
+	xfs_trans_ail_cursor_done(&cur);
+	spin_unlock(&ailp->ail_lock);
+
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_rmap_done_item_ops = {
 	.item_type		= XFS_LI_RUD,
+	.commit_pass2		= xlog_recover_rmap_done_commit_pass2,
 };
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 8708e4a5aa5c..89bd192779f8 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -77,10 +77,6 @@ struct xfs_rud_log_item {
 extern struct kmem_zone	*xfs_rui_zone;
 extern struct kmem_zone	*xfs_rud_zone;
 
-struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
-int xfs_rui_copy_format(struct xfs_log_iovec *buf,
-		struct xfs_rui_log_format *dst_rui_fmt);
-void xfs_rui_item_free(struct xfs_rui_log_item *);
 void xfs_rui_release(struct xfs_rui_log_item *);
 int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
 


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 11/28] xfs: refactor log recovery CUI item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (9 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 10/28] xfs: refactor log recovery RUI " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  7:06   ` Chandan Babu R
  2020-05-06 15:13   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 12/28] xfs: refactor log recovery BUI " Darrick J. Wong
                   ` (16 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the refcount update intent and intent-done pass2 commit code into
the per-item source code files and use dispatch functions to call them.
We do these one at a time because there's a lot of code to move.  No
functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_recover.c   |  124 ------------------------------------------
 fs/xfs/xfs_refcount_item.c |  129 +++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_refcount_item.h |    2 -
 3 files changed, 127 insertions(+), 128 deletions(-)


diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0c0ce7bfc30e..23008b7cf93c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2034,126 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * Copy an CUI format buffer from the given buf, and into the destination
- * CUI format structure.  The CUI/CUD items were designed not to need any
- * special alignment handling.
- */
-static int
-xfs_cui_copy_format(
-	struct xfs_log_iovec		*buf,
-	struct xfs_cui_log_format	*dst_cui_fmt)
-{
-	struct xfs_cui_log_format	*src_cui_fmt;
-	uint				len;
-
-	src_cui_fmt = buf->i_addr;
-	len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
-
-	if (buf->i_len == len) {
-		memcpy(dst_cui_fmt, src_cui_fmt, len);
-		return 0;
-	}
-	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
-	return -EFSCORRUPTED;
-}
-
-/*
- * This routine is called to create an in-core extent refcount update
- * item from the cui format structure which was logged on disk.
- * It allocates an in-core cui, copies the extents from the format
- * structure into it, and adds the cui to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_cui_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_cui_log_item		*cuip;
-	struct xfs_cui_log_format	*cui_formatp;
-
-	cui_formatp = item->ri_buf[0].i_addr;
-
-	cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
-	error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
-	if (error) {
-		xfs_cui_item_free(cuip);
-		return error;
-	}
-	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The CUI has two references. One for the CUD and one for CUI to ensure
-	 * it makes it into the AIL. Insert the CUI into the AIL directly and
-	 * drop the CUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
-	xfs_cui_release(cuip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an CUD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding CUI if it
- * was still in the log. To do this it searches the AIL for the CUI with an id
- * equal to that in the CUD format structure. If we find it we drop the CUD
- * reference, which removes the CUI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_cud_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_cud_log_format	*cud_formatp;
-	struct xfs_cui_log_item		*cuip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			cui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
-
-	cud_formatp = item->ri_buf[0].i_addr;
-	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
-		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
-		return -EFSCORRUPTED;
-	}
-	cui_id = cud_formatp->cud_cui_id;
-
-	/*
-	 * Search for the CUI with the id in the CUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_CUI) {
-			cuip = (struct xfs_cui_log_item *)lip;
-			if (cuip->cui_format.cui_id == cui_id) {
-				/*
-				 * Drop the CUD reference to the CUI. This
-				 * removes the CUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_cui_release(cuip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
 /*
  * Copy an BUI format buffer from the given buf, and into the destination
  * BUI format structure.  The BUI/BUD items were designed not to need any
@@ -2292,10 +2172,6 @@ xlog_recover_commit_pass2(
 				trans->r_lsn);
 
 	switch (ITEM_TYPE(item)) {
-	case XFS_LI_CUI:
-		return xlog_recover_cui_pass2(log, item, trans->r_lsn);
-	case XFS_LI_CUD:
-		return xlog_recover_cud_pass2(log, item);
 	case XFS_LI_BUI:
 		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
 	case XFS_LI_BUD:
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 0e8e8bab4344..28b41f5dd6bc 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -18,6 +18,7 @@
 #include "xfs_log.h"
 #include "xfs_refcount.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_cui_zone;
@@ -28,7 +29,7 @@ static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
 	return container_of(lip, struct xfs_cui_log_item, cui_item);
 }
 
-void
+STATIC void
 xfs_cui_item_free(
 	struct xfs_cui_log_item	*cuip)
 {
@@ -134,7 +135,7 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
 /*
  * Allocate and initialize an cui item with the given number of extents.
  */
-struct xfs_cui_log_item *
+STATIC struct xfs_cui_log_item *
 xfs_cui_init(
 	struct xfs_mount		*mp,
 	uint				nextents)
@@ -572,10 +573,134 @@ xfs_cui_recover(
 	return error;
 }
 
+/*
+ * Copy an CUI format buffer from the given buf, and into the destination
+ * CUI format structure.  The CUI/CUD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_cui_copy_format(
+	struct xfs_log_iovec		*buf,
+	struct xfs_cui_log_format	*dst_cui_fmt)
+{
+	struct xfs_cui_log_format	*src_cui_fmt;
+	uint				len;
+
+	src_cui_fmt = buf->i_addr;
+	len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
+
+	if (buf->i_len == len) {
+		memcpy(dst_cui_fmt, src_cui_fmt, len);
+		return 0;
+	}
+	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+	return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent refcount update
+ * item from the cui format structure which was logged on disk.
+ * It allocates an in-core cui, copies the extents from the format
+ * structure into it, and adds the cui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_refcount_intent_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	int				error;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_cui_log_item		*cuip;
+	struct xfs_cui_log_format	*cui_formatp;
+
+	cui_formatp = item->ri_buf[0].i_addr;
+
+	cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+	error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
+	if (error) {
+		xfs_cui_item_free(cuip);
+		return error;
+	}
+	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
+
+	spin_lock(&log->l_ailp->ail_lock);
+	/*
+	 * The CUI has two references. One for the CUD and one for CUI to ensure
+	 * it makes it into the AIL. Insert the CUI into the AIL directly and
+	 * drop the CUI reference. Note that xfs_trans_ail_update() drops the
+	 * AIL lock.
+	 */
+	xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
+	xfs_cui_release(cuip);
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_refcount_intent_item_ops = {
 	.item_type		= XFS_LI_CUI,
+	.commit_pass2		= xlog_recover_refcount_intent_commit_pass2,
 };
 
+/*
+ * This routine is called when an CUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding CUI if it
+ * was still in the log. To do this it searches the AIL for the CUI with an id
+ * equal to that in the CUD format structure. If we find it we drop the CUD
+ * reference, which removes the CUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_refcount_done_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_cud_log_format	*cud_formatp;
+	struct xfs_cui_log_item		*cuip = NULL;
+	struct xfs_log_item		*lip;
+	uint64_t			cui_id;
+	struct xfs_ail_cursor		cur;
+	struct xfs_ail			*ailp = log->l_ailp;
+
+	cud_formatp = item->ri_buf[0].i_addr;
+	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+		return -EFSCORRUPTED;
+	}
+	cui_id = cud_formatp->cud_cui_id;
+
+	/*
+	 * Search for the CUI with the id in the CUD format structure in the
+	 * AIL.
+	 */
+	spin_lock(&ailp->ail_lock);
+	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+	while (lip != NULL) {
+		if (lip->li_type == XFS_LI_CUI) {
+			cuip = (struct xfs_cui_log_item *)lip;
+			if (cuip->cui_format.cui_id == cui_id) {
+				/*
+				 * Drop the CUD reference to the CUI. This
+				 * removes the CUI from the AIL and frees it.
+				 */
+				spin_unlock(&ailp->ail_lock);
+				xfs_cui_release(cuip);
+				spin_lock(&ailp->ail_lock);
+				break;
+			}
+		}
+		lip = xfs_trans_ail_cursor_next(ailp, &cur);
+	}
+
+	xfs_trans_ail_cursor_done(&cur);
+	spin_unlock(&ailp->ail_lock);
+
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_refcount_done_item_ops = {
 	.item_type		= XFS_LI_CUD,
+	.commit_pass2		= xlog_recover_refcount_done_commit_pass2,
 };
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index e47530f30489..ebe12779eaac 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -77,8 +77,6 @@ struct xfs_cud_log_item {
 extern struct kmem_zone	*xfs_cui_zone;
 extern struct kmem_zone	*xfs_cud_zone;
 
-struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
-void xfs_cui_item_free(struct xfs_cui_log_item *);
 void xfs_cui_release(struct xfs_cui_log_item *);
 int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
 


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 12/28] xfs: refactor log recovery BUI item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (10 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 11/28] xfs: refactor log recovery CUI " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  7:14   ` Chandan Babu R
  2020-05-06 15:14   ` Christoph Hellwig
  2020-05-05  1:11 ` [PATCH 13/28] xfs: remove log recovery quotaoff " Darrick J. Wong
                   ` (15 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the bmap update intent and intent-done pass2 commit code into the
per-item source code files and use dispatch functions to call them.  We
do these one at a time because there's a lot of code to move.  No
functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c   |  133 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_bmap_item.h   |    2 -
 fs/xfs/xfs_log_recover.c |  128 --------------------------------------------
 3 files changed, 131 insertions(+), 132 deletions(-)


diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 42354403fec7..0fbebef69e26 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -22,6 +22,7 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_trans_space.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
 
 kmem_zone_t	*xfs_bui_zone;
@@ -32,7 +33,7 @@ static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
 	return container_of(lip, struct xfs_bui_log_item, bui_item);
 }
 
-void
+STATIC void
 xfs_bui_item_free(
 	struct xfs_bui_log_item	*buip)
 {
@@ -135,7 +136,7 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
 /*
  * Allocate and initialize an bui item with the given number of extents.
  */
-struct xfs_bui_log_item *
+STATIC struct xfs_bui_log_item *
 xfs_bui_init(
 	struct xfs_mount		*mp)
 
@@ -559,10 +560,138 @@ xfs_bui_recover(
 	return error;
 }
 
+/*
+ * Copy an BUI format buffer from the given buf, and into the destination
+ * BUI format structure.  The BUI/BUD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_bui_copy_format(
+	struct xfs_log_iovec		*buf,
+	struct xfs_bui_log_format	*dst_bui_fmt)
+{
+	struct xfs_bui_log_format	*src_bui_fmt;
+	uint				len;
+
+	src_bui_fmt = buf->i_addr;
+	len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
+
+	if (buf->i_len == len) {
+		memcpy(dst_bui_fmt, src_bui_fmt, len);
+		return 0;
+	}
+	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+	return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent bmap update
+ * item from the bui format structure which was logged on disk.
+ * It allocates an in-core bui, copies the extents from the format
+ * structure into it, and adds the bui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_bmap_intent_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	int				error;
+	struct xfs_mount		*mp = log->l_mp;
+	struct xfs_bui_log_item		*buip;
+	struct xfs_bui_log_format	*bui_formatp;
+
+	bui_formatp = item->ri_buf[0].i_addr;
+
+	if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+		return -EFSCORRUPTED;
+	}
+	buip = xfs_bui_init(mp);
+	error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
+	if (error) {
+		xfs_bui_item_free(buip);
+		return error;
+	}
+	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
+
+	spin_lock(&log->l_ailp->ail_lock);
+	/*
+	 * The RUI has two references. One for the RUD and one for RUI to ensure
+	 * it makes it into the AIL. Insert the RUI into the AIL directly and
+	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
+	 * AIL lock.
+	 */
+	xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
+	xfs_bui_release(buip);
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_bmap_intent_item_ops = {
 	.item_type		= XFS_LI_BUI,
+	.commit_pass2		= xlog_recover_bmap_intent_commit_pass2,
 };
 
+/*
+ * This routine is called when an BUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding BUI if it
+ * was still in the log. To do this it searches the AIL for the BUI with an id
+ * equal to that in the BUD format structure. If we find it we drop the BUD
+ * reference, which removes the BUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_bmap_done_commit_pass2(
+	struct xlog			*log,
+	struct list_head		*buffer_list,
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			lsn)
+{
+	struct xfs_bud_log_format	*bud_formatp;
+	struct xfs_bui_log_item		*buip = NULL;
+	struct xfs_log_item		*lip;
+	uint64_t			bui_id;
+	struct xfs_ail_cursor		cur;
+	struct xfs_ail			*ailp = log->l_ailp;
+
+	bud_formatp = item->ri_buf[0].i_addr;
+	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+		return -EFSCORRUPTED;
+	}
+	bui_id = bud_formatp->bud_bui_id;
+
+	/*
+	 * Search for the BUI with the id in the BUD format structure in the
+	 * AIL.
+	 */
+	spin_lock(&ailp->ail_lock);
+	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+	while (lip != NULL) {
+		if (lip->li_type == XFS_LI_BUI) {
+			buip = (struct xfs_bui_log_item *)lip;
+			if (buip->bui_format.bui_id == bui_id) {
+				/*
+				 * Drop the BUD reference to the BUI. This
+				 * removes the BUI from the AIL and frees it.
+				 */
+				spin_unlock(&ailp->ail_lock);
+				xfs_bui_release(buip);
+				spin_lock(&ailp->ail_lock);
+				break;
+			}
+		}
+		lip = xfs_trans_ail_cursor_next(ailp, &cur);
+	}
+
+	xfs_trans_ail_cursor_done(&cur);
+	spin_unlock(&ailp->ail_lock);
+
+	return 0;
+}
+
 const struct xlog_recover_item_ops xlog_bmap_done_item_ops = {
 	.item_type		= XFS_LI_BUD,
+	.commit_pass2		= xlog_recover_bmap_done_commit_pass2,
 };
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index ad479cc73de8..515b1d5d6ab7 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -74,8 +74,6 @@ struct xfs_bud_log_item {
 extern struct kmem_zone	*xfs_bui_zone;
 extern struct kmem_zone	*xfs_bud_zone;
 
-struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
-void xfs_bui_item_free(struct xfs_bui_log_item *);
 void xfs_bui_release(struct xfs_bui_log_item *);
 int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 23008b7cf93c..a5158e9e0662 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2034,130 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-/*
- * Copy an BUI format buffer from the given buf, and into the destination
- * BUI format structure.  The BUI/BUD items were designed not to need any
- * special alignment handling.
- */
-static int
-xfs_bui_copy_format(
-	struct xfs_log_iovec		*buf,
-	struct xfs_bui_log_format	*dst_bui_fmt)
-{
-	struct xfs_bui_log_format	*src_bui_fmt;
-	uint				len;
-
-	src_bui_fmt = buf->i_addr;
-	len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
-
-	if (buf->i_len == len) {
-		memcpy(dst_bui_fmt, src_bui_fmt, len);
-		return 0;
-	}
-	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
-	return -EFSCORRUPTED;
-}
-
-/*
- * This routine is called to create an in-core extent bmap update
- * item from the bui format structure which was logged on disk.
- * It allocates an in-core bui, copies the extents from the format
- * structure into it, and adds the bui to the AIL with the given
- * LSN.
- */
-STATIC int
-xlog_recover_bui_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item,
-	xfs_lsn_t			lsn)
-{
-	int				error;
-	struct xfs_mount		*mp = log->l_mp;
-	struct xfs_bui_log_item		*buip;
-	struct xfs_bui_log_format	*bui_formatp;
-
-	bui_formatp = item->ri_buf[0].i_addr;
-
-	if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
-		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
-		return -EFSCORRUPTED;
-	}
-	buip = xfs_bui_init(mp);
-	error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
-	if (error) {
-		xfs_bui_item_free(buip);
-		return error;
-	}
-	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The RUI has two references. One for the RUD and one for RUI to ensure
-	 * it makes it into the AIL. Insert the RUI into the AIL directly and
-	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
-	xfs_bui_release(buip);
-	return 0;
-}
-
-
-/*
- * This routine is called when an BUD format structure is found in a committed
- * transaction in the log. Its purpose is to cancel the corresponding BUI if it
- * was still in the log. To do this it searches the AIL for the BUI with an id
- * equal to that in the BUD format structure. If we find it we drop the BUD
- * reference, which removes the BUI from the AIL and frees it.
- */
-STATIC int
-xlog_recover_bud_pass2(
-	struct xlog			*log,
-	struct xlog_recover_item	*item)
-{
-	struct xfs_bud_log_format	*bud_formatp;
-	struct xfs_bui_log_item		*buip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			bui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
-
-	bud_formatp = item->ri_buf[0].i_addr;
-	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
-		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
-		return -EFSCORRUPTED;
-	}
-	bui_id = bud_formatp->bud_bui_id;
-
-	/*
-	 * Search for the BUI with the id in the BUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_BUI) {
-			buip = (struct xfs_bui_log_item *)lip;
-			if (buip->bui_format.bui_id == bui_id) {
-				/*
-				 * Drop the BUD reference to the BUI. This
-				 * removes the BUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_bui_release(buip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
-
-	return 0;
-}
-
 STATIC int
 xlog_recover_commit_pass2(
 	struct xlog			*log,
@@ -2172,10 +2048,6 @@ xlog_recover_commit_pass2(
 				trans->r_lsn);
 
 	switch (ITEM_TYPE(item)) {
-	case XFS_LI_BUI:
-		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
-	case XFS_LI_BUD:
-		return xlog_recover_bud_pass2(log, item);
 	case XFS_LI_QUOTAOFF:
 		/* nothing to do in pass2 */
 		return 0;


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 13/28] xfs: remove log recovery quotaoff item dispatch for pass2 commit functions
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (11 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 12/28] xfs: refactor log recovery BUI " Darrick J. Wong
@ 2020-05-05  1:11 ` Darrick J. Wong
  2020-05-05  7:32   ` Chandan Babu R
  2020-05-06 15:16   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 14/28] xfs: refactor recovered EFI log item playback Darrick J. Wong
                   ` (14 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Quotaoff doesn't actually do anything, so take advantage of the
commit_pass2 pointer being optional and get rid of the switch
statement clause.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_dquot_item_recover.c |    1 +
 fs/xfs/xfs_log_recover.c        |   33 ++++++---------------------------
 2 files changed, 7 insertions(+), 27 deletions(-)


diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index 07ff943972a3..a07c1c8344d8 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -197,4 +197,5 @@ xlog_recover_quotaoff_commit_pass1(
 const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
 	.item_type		= XFS_LI_QUOTAOFF,
 	.commit_pass1		= xlog_recover_quotaoff_commit_pass1,
+	.commit_pass2		= NULL, /* nothing to do in pass2 */
 };
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a5158e9e0662..929e2caeeb42 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2034,31 +2034,6 @@ xlog_buf_readahead(
 		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
-STATIC int
-xlog_recover_commit_pass2(
-	struct xlog			*log,
-	struct xlog_recover		*trans,
-	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item)
-{
-	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
-
-	if (item->ri_ops && item->ri_ops->commit_pass2)
-		return item->ri_ops->commit_pass2(log, buffer_list, item,
-				trans->r_lsn);
-
-	switch (ITEM_TYPE(item)) {
-	case XFS_LI_QUOTAOFF:
-		/* nothing to do in pass2 */
-		return 0;
-	default:
-		xfs_warn(log->l_mp, "%s: invalid item type (%d)",
-			__func__, ITEM_TYPE(item));
-		ASSERT(0);
-		return -EFSCORRUPTED;
-	}
-}
-
 STATIC int
 xlog_recover_items_pass2(
 	struct xlog                     *log,
@@ -2070,8 +2045,12 @@ xlog_recover_items_pass2(
 	int				error = 0;
 
 	list_for_each_entry(item, item_list, ri_list) {
-		error = xlog_recover_commit_pass2(log, trans,
-					  buffer_list, item);
+		trace_xfs_log_recover_item_recover(log, trans, item,
+				XLOG_RECOVER_PASS2);
+
+		if (item->ri_ops->commit_pass2)
+			error = item->ri_ops->commit_pass2(log, buffer_list,
+					item, trans->r_lsn);
 		if (error)
 			return error;
 	}


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 14/28] xfs: refactor recovered EFI log item playback
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (12 preceding siblings ...)
  2020-05-05  1:11 ` [PATCH 13/28] xfs: remove log recovery quotaoff " Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-05  9:03   ` Chandan Babu R
  2020-05-06 15:18   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 15/28] xfs: refactor recovered RUI " Darrick J. Wong
                   ` (13 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the code that processes the log items created from the recovered
log items into the per-item source code files and use dispatch functions
to call them.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_extfree_item.c |   47 +++++++++++++++++++++++++++++++++++----------
 fs/xfs/xfs_extfree_item.h |    5 -----
 fs/xfs/xfs_log_recover.c  |   46 ++++----------------------------------------
 fs/xfs/xfs_trans.h        |    1 +
 4 files changed, 42 insertions(+), 57 deletions(-)


diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index dca098660753..3fc8a9864217 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -28,6 +28,8 @@
 kmem_zone_t	*xfs_efi_zone;
 kmem_zone_t	*xfs_efd_zone;
 
+static const struct xfs_item_ops xfs_efi_item_ops;
+
 static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_efi_log_item, efi_item);
@@ -51,7 +53,7 @@ xfs_efi_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the EFI.
  */
-void
+STATIC void
 xfs_efi_release(
 	struct xfs_efi_log_item	*efip)
 {
@@ -141,14 +143,6 @@ xfs_efi_item_release(
 	xfs_efi_release(EFI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_efi_item_ops = {
-	.iop_size	= xfs_efi_item_size,
-	.iop_format	= xfs_efi_item_format,
-	.iop_unpin	= xfs_efi_item_unpin,
-	.iop_release	= xfs_efi_item_release,
-};
-
-
 /*
  * Allocate and initialize an efi item with the given number of extents.
  */
@@ -586,7 +580,7 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
  * Process an extent free intent item that was recovered from
  * the log.  We need to free the extents that it describes.
  */
-int
+STATIC int
 xfs_efi_recover(
 	struct xfs_mount	*mp,
 	struct xfs_efi_log_item	*efip)
@@ -647,6 +641,39 @@ xfs_efi_recover(
 	return error;
 }
 
+/* Recover the EFI if necessary. */
+STATIC int
+xfs_efi_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*tp)
+{
+	struct xfs_ail			*ailp = lip->li_ailp;
+	struct xfs_efi_log_item		*efip;
+	int				error;
+
+	/*
+	 * Skip EFIs that we've already processed.
+	 */
+	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
+		return 0;
+
+	spin_unlock(&ailp->ail_lock);
+	error = xfs_efi_recover(tp->t_mountp, efip);
+	spin_lock(&ailp->ail_lock);
+
+	return error;
+}
+
+static const struct xfs_item_ops xfs_efi_item_ops = {
+	.iop_size	= xfs_efi_item_size,
+	.iop_format	= xfs_efi_item_format,
+	.iop_unpin	= xfs_efi_item_unpin,
+	.iop_release	= xfs_efi_item_release,
+	.iop_recover	= xfs_efi_item_recover,
+};
+
+
 /*
  * This routine is called to create an in-core extent free intent
  * item from the efi format structure which was logged on disk.
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 876e3d237f48..4b2c2c5c5985 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -78,9 +78,4 @@ struct xfs_efd_log_item {
 extern struct kmem_zone	*xfs_efi_zone;
 extern struct kmem_zone	*xfs_efd_zone;
 
-void			xfs_efi_release(struct xfs_efi_log_item *);
-
-int			xfs_efi_recover(struct xfs_mount *mp,
-					struct xfs_efi_log_item *efip);
-
 #endif	/* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 929e2caeeb42..f12e14719202 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2553,46 +2553,6 @@ xlog_recover_process_data(
 	return 0;
 }
 
-/* Recover the EFI if necessary. */
-STATIC int
-xlog_recover_process_efi(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_efi_log_item		*efip;
-	int				error;
-
-	/*
-	 * Skip EFIs that we've already processed.
-	 */
-	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_efi_recover(mp, efip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the EFI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_efi(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_efi_log_item		*efip;
-
-	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_efi_release(efip);
-	spin_lock(&ailp->ail_lock);
-}
-
 /* Recover the RUI if necessary. */
 STATIC int
 xlog_recover_process_rui(
@@ -2837,7 +2797,7 @@ xlog_recover_process_intents(
 		 */
 		switch (lip->li_type) {
 		case XFS_LI_EFI:
-			error = xlog_recover_process_efi(log->l_mp, ailp, lip);
+			error = lip->li_ops->iop_recover(lip, parent_tp);
 			break;
 		case XFS_LI_RUI:
 			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
@@ -2893,7 +2853,9 @@ xlog_recover_cancel_intents(
 
 		switch (lip->li_type) {
 		case XFS_LI_EFI:
-			xlog_recover_cancel_efi(log->l_mp, ailp, lip);
+			spin_unlock(&ailp->ail_lock);
+			lip->li_ops->iop_release(lip);
+			spin_lock(&ailp->ail_lock);
 			break;
 		case XFS_LI_RUI:
 			xlog_recover_cancel_rui(log->l_mp, ailp, lip);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 752c7fef9de7..3f6a79108991 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -77,6 +77,7 @@ struct xfs_item_ops {
 	void (*iop_release)(struct xfs_log_item *);
 	xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
 	void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
+	int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
 };
 
 /*


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 15/28] xfs: refactor recovered RUI log item playback
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (13 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 14/28] xfs: refactor recovered EFI log item playback Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-05  9:10   ` Chandan Babu R
                     ` (2 more replies)
  2020-05-05  1:12 ` [PATCH 16/28] xfs: refactor recovered CUI " Darrick J. Wong
                   ` (12 subsequent siblings)
  27 siblings, 3 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the code that processes the log items created from the recovered
log items into the per-item source code files and use dispatch functions
to call them.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_recover.c |   48 ++--------------------------------------------
 fs/xfs/xfs_rmap_item.c   |   44 ++++++++++++++++++++++++++++++++++--------
 fs/xfs/xfs_rmap_item.h   |    3 ---
 3 files changed, 37 insertions(+), 58 deletions(-)


diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index f12e14719202..da66484acaa7 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2553,46 +2553,6 @@ xlog_recover_process_data(
 	return 0;
 }
 
-/* Recover the RUI if necessary. */
-STATIC int
-xlog_recover_process_rui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_rui_log_item		*ruip;
-	int				error;
-
-	/*
-	 * Skip RUIs that we've already processed.
-	 */
-	ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
-	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_rui_recover(mp, ruip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the RUI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_rui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_rui_log_item		*ruip;
-
-	ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_rui_release(ruip);
-	spin_lock(&ailp->ail_lock);
-}
-
 /* Recover the CUI if necessary. */
 STATIC int
 xlog_recover_process_cui(
@@ -2797,10 +2757,8 @@ xlog_recover_process_intents(
 		 */
 		switch (lip->li_type) {
 		case XFS_LI_EFI:
-			error = lip->li_ops->iop_recover(lip, parent_tp);
-			break;
 		case XFS_LI_RUI:
-			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
+			error = lip->li_ops->iop_recover(lip, parent_tp);
 			break;
 		case XFS_LI_CUI:
 			error = xlog_recover_process_cui(parent_tp, ailp, lip);
@@ -2853,13 +2811,11 @@ xlog_recover_cancel_intents(
 
 		switch (lip->li_type) {
 		case XFS_LI_EFI:
+		case XFS_LI_RUI:
 			spin_unlock(&ailp->ail_lock);
 			lip->li_ops->iop_release(lip);
 			spin_lock(&ailp->ail_lock);
 			break;
-		case XFS_LI_RUI:
-			xlog_recover_cancel_rui(log->l_mp, ailp, lip);
-			break;
 		case XFS_LI_CUI:
 			xlog_recover_cancel_cui(log->l_mp, ailp, lip);
 			break;
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index c87f4e429c12..e763dd8ed0a6 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -24,6 +24,8 @@
 kmem_zone_t	*xfs_rui_zone;
 kmem_zone_t	*xfs_rud_zone;
 
+static const struct xfs_item_ops xfs_rui_item_ops;
+
 static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_rui_log_item, rui_item);
@@ -46,7 +48,7 @@ xfs_rui_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the RUI.
  */
-void
+STATIC void
 xfs_rui_release(
 	struct xfs_rui_log_item	*ruip)
 {
@@ -124,13 +126,6 @@ xfs_rui_item_release(
 	xfs_rui_release(RUI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_rui_item_ops = {
-	.iop_size	= xfs_rui_item_size,
-	.iop_format	= xfs_rui_item_format,
-	.iop_unpin	= xfs_rui_item_unpin,
-	.iop_release	= xfs_rui_item_release,
-};
-
 /*
  * Allocate and initialize an rui item with the given number of extents.
  */
@@ -468,7 +463,7 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
  * Process an rmap update intent item that was recovered from the log.
  * We need to update the rmapbt.
  */
-int
+STATIC int
 xfs_rui_recover(
 	struct xfs_mount		*mp,
 	struct xfs_rui_log_item		*ruip)
@@ -588,6 +583,37 @@ xfs_rui_recover(
 	return error;
 }
 
+/* Recover the RUI if necessary. */
+STATIC int
+xfs_rui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*tp)
+{
+	struct xfs_ail			*ailp = lip->li_ailp;
+	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);
+	int				error;
+
+	/*
+	 * Skip RUIs that we've already processed.
+	 */
+	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
+		return 0;
+
+	spin_unlock(&ailp->ail_lock);
+	error = xfs_rui_recover(tp->t_mountp, ruip);
+	spin_lock(&ailp->ail_lock);
+
+	return error;
+}
+
+static const struct xfs_item_ops xfs_rui_item_ops = {
+	.iop_size	= xfs_rui_item_size,
+	.iop_format	= xfs_rui_item_format,
+	.iop_unpin	= xfs_rui_item_unpin,
+	.iop_release	= xfs_rui_item_release,
+	.iop_recover	= xfs_rui_item_recover,
+};
+
 /*
  * This routine is called to create an in-core extent rmap update
  * item from the rui format structure which was logged on disk.
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 89bd192779f8..48a77a6f5c94 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -77,7 +77,4 @@ struct xfs_rud_log_item {
 extern struct kmem_zone	*xfs_rui_zone;
 extern struct kmem_zone	*xfs_rud_zone;
 
-void xfs_rui_release(struct xfs_rui_log_item *);
-int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
-
 #endif	/* __XFS_RMAP_ITEM_H__ */


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 16/28] xfs: refactor recovered CUI log item playback
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (14 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 15/28] xfs: refactor recovered RUI " Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-05  9:29   ` Chandan Babu R
  2020-05-06 15:19   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 17/28] xfs: refactor recovered BUI " Darrick J. Wong
                   ` (11 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the code that processes the log items created from the recovered
log items into the per-item source code files and use dispatch functions
to call them.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_recover.c   |   48 ++------------------------------------------
 fs/xfs/xfs_refcount_item.c |   44 ++++++++++++++++++++++++++++++++--------
 fs/xfs/xfs_refcount_item.h |    3 ---
 3 files changed, 37 insertions(+), 58 deletions(-)


diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index da66484acaa7..ad5ac97ed0c7 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2553,46 +2553,6 @@ xlog_recover_process_data(
 	return 0;
 }
 
-/* Recover the CUI if necessary. */
-STATIC int
-xlog_recover_process_cui(
-	struct xfs_trans		*parent_tp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_cui_log_item		*cuip;
-	int				error;
-
-	/*
-	 * Skip CUIs that we've already processed.
-	 */
-	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
-	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_cui_recover(parent_tp, cuip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the CUI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_cui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_cui_log_item		*cuip;
-
-	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_cui_release(cuip);
-	spin_lock(&ailp->ail_lock);
-}
-
 /* Recover the BUI if necessary. */
 STATIC int
 xlog_recover_process_bui(
@@ -2758,10 +2718,8 @@ xlog_recover_process_intents(
 		switch (lip->li_type) {
 		case XFS_LI_EFI:
 		case XFS_LI_RUI:
-			error = lip->li_ops->iop_recover(lip, parent_tp);
-			break;
 		case XFS_LI_CUI:
-			error = xlog_recover_process_cui(parent_tp, ailp, lip);
+			error = lip->li_ops->iop_recover(lip, parent_tp);
 			break;
 		case XFS_LI_BUI:
 			error = xlog_recover_process_bui(parent_tp, ailp, lip);
@@ -2812,13 +2770,11 @@ xlog_recover_cancel_intents(
 		switch (lip->li_type) {
 		case XFS_LI_EFI:
 		case XFS_LI_RUI:
+		case XFS_LI_CUI:
 			spin_unlock(&ailp->ail_lock);
 			lip->li_ops->iop_release(lip);
 			spin_lock(&ailp->ail_lock);
 			break;
-		case XFS_LI_CUI:
-			xlog_recover_cancel_cui(log->l_mp, ailp, lip);
-			break;
 		case XFS_LI_BUI:
 			xlog_recover_cancel_bui(log->l_mp, ailp, lip);
 			break;
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 28b41f5dd6bc..5b72eebd8764 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -24,6 +24,8 @@
 kmem_zone_t	*xfs_cui_zone;
 kmem_zone_t	*xfs_cud_zone;
 
+static const struct xfs_item_ops xfs_cui_item_ops;
+
 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_cui_log_item, cui_item);
@@ -46,7 +48,7 @@ xfs_cui_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the CUI.
  */
-void
+STATIC void
 xfs_cui_release(
 	struct xfs_cui_log_item	*cuip)
 {
@@ -125,13 +127,6 @@ xfs_cui_item_release(
 	xfs_cui_release(CUI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_cui_item_ops = {
-	.iop_size	= xfs_cui_item_size,
-	.iop_format	= xfs_cui_item_format,
-	.iop_unpin	= xfs_cui_item_unpin,
-	.iop_release	= xfs_cui_item_release,
-};
-
 /*
  * Allocate and initialize an cui item with the given number of extents.
  */
@@ -425,7 +420,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
  * Process a refcount update intent item that was recovered from the log.
  * We need to update the refcountbt.
  */
-int
+STATIC int
 xfs_cui_recover(
 	struct xfs_trans		*parent_tp,
 	struct xfs_cui_log_item		*cuip)
@@ -573,6 +568,37 @@ xfs_cui_recover(
 	return error;
 }
 
+/* Recover the CUI if necessary. */
+STATIC int
+xfs_cui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*tp)
+{
+	struct xfs_ail			*ailp = lip->li_ailp;
+	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
+	int				error;
+
+	/*
+	 * Skip CUIs that we've already processed.
+	 */
+	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
+		return 0;
+
+	spin_unlock(&ailp->ail_lock);
+	error = xfs_cui_recover(tp, cuip);
+	spin_lock(&ailp->ail_lock);
+
+	return error;
+}
+
+static const struct xfs_item_ops xfs_cui_item_ops = {
+	.iop_size	= xfs_cui_item_size,
+	.iop_format	= xfs_cui_item_format,
+	.iop_unpin	= xfs_cui_item_unpin,
+	.iop_release	= xfs_cui_item_release,
+	.iop_recover	= xfs_cui_item_recover,
+};
+
 /*
  * Copy an CUI format buffer from the given buf, and into the destination
  * CUI format structure.  The CUI/CUD items were designed not to need any
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index ebe12779eaac..cfaa857673a6 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -77,7 +77,4 @@ struct xfs_cud_log_item {
 extern struct kmem_zone	*xfs_cui_zone;
 extern struct kmem_zone	*xfs_cud_zone;
 
-void xfs_cui_release(struct xfs_cui_log_item *);
-int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
-
 #endif	/* __XFS_REFCOUNT_ITEM_H__ */


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 17/28] xfs: refactor recovered BUI log item playback
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (15 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 16/28] xfs: refactor recovered CUI " Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-05  9:49   ` Chandan Babu R
  2020-05-06 15:21   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 18/28] xfs: refactor unlinked inode recovery Darrick J. Wong
                   ` (10 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the code that processes the log items created from the recovered
log items into the per-item source code files and use dispatch functions
to call them.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c   |   44 ++++++++++++++++++----
 fs/xfs/xfs_bmap_item.h   |    3 --
 fs/xfs/xfs_log_recover.c |   91 ++++++----------------------------------------
 3 files changed, 47 insertions(+), 91 deletions(-)


diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 0fbebef69e26..f88ebf8634c4 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -28,6 +28,8 @@
 kmem_zone_t	*xfs_bui_zone;
 kmem_zone_t	*xfs_bud_zone;
 
+static const struct xfs_item_ops xfs_bui_item_ops;
+
 static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_bui_log_item, bui_item);
@@ -47,7 +49,7 @@ xfs_bui_item_free(
  * committed vs unpin operations in bulk insert operations. Hence the reference
  * count to ensure only the last caller frees the BUI.
  */
-void
+STATIC void
 xfs_bui_release(
 	struct xfs_bui_log_item	*buip)
 {
@@ -126,13 +128,6 @@ xfs_bui_item_release(
 	xfs_bui_release(BUI_ITEM(lip));
 }
 
-static const struct xfs_item_ops xfs_bui_item_ops = {
-	.iop_size	= xfs_bui_item_size,
-	.iop_format	= xfs_bui_item_format,
-	.iop_unpin	= xfs_bui_item_unpin,
-	.iop_release	= xfs_bui_item_release,
-};
-
 /*
  * Allocate and initialize an bui item with the given number of extents.
  */
@@ -425,7 +420,7 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
  * Process a bmap update intent item that was recovered from the log.
  * We need to update some inode's bmbt.
  */
-int
+STATIC int
 xfs_bui_recover(
 	struct xfs_trans		*parent_tp,
 	struct xfs_bui_log_item		*buip)
@@ -560,6 +555,37 @@ xfs_bui_recover(
 	return error;
 }
 
+/* Recover the BUI if necessary. */
+STATIC int
+xfs_bui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*tp)
+{
+	struct xfs_ail			*ailp = lip->li_ailp;
+	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);
+	int				error;
+
+	/*
+	 * Skip BUIs that we've already processed.
+	 */
+	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
+		return 0;
+
+	spin_unlock(&ailp->ail_lock);
+	error = xfs_bui_recover(tp, buip);
+	spin_lock(&ailp->ail_lock);
+
+	return error;
+}
+
+static const struct xfs_item_ops xfs_bui_item_ops = {
+	.iop_size	= xfs_bui_item_size,
+	.iop_format	= xfs_bui_item_format,
+	.iop_unpin	= xfs_bui_item_unpin,
+	.iop_release	= xfs_bui_item_release,
+	.iop_recover	= xfs_bui_item_recover,
+};
+
 /*
  * Copy an BUI format buffer from the given buf, and into the destination
  * BUI format structure.  The BUI/BUD items were designed not to need any
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index 515b1d5d6ab7..44d06e62f8f9 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -74,7 +74,4 @@ struct xfs_bud_log_item {
 extern struct kmem_zone	*xfs_bui_zone;
 extern struct kmem_zone	*xfs_bud_zone;
 
-void xfs_bui_release(struct xfs_bui_log_item *);
-int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
-
 #endif	/* __XFS_BMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ad5ac97ed0c7..20ee32c2652d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2553,60 +2553,6 @@ xlog_recover_process_data(
 	return 0;
 }
 
-/* Recover the BUI if necessary. */
-STATIC int
-xlog_recover_process_bui(
-	struct xfs_trans		*parent_tp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_bui_log_item		*buip;
-	int				error;
-
-	/*
-	 * Skip BUIs that we've already processed.
-	 */
-	buip = container_of(lip, struct xfs_bui_log_item, bui_item);
-	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_bui_recover(parent_tp, buip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
-/* Release the BUI since we're cancelling everything. */
-STATIC void
-xlog_recover_cancel_bui(
-	struct xfs_mount		*mp,
-	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip)
-{
-	struct xfs_bui_log_item		*buip;
-
-	buip = container_of(lip, struct xfs_bui_log_item, bui_item);
-
-	spin_unlock(&ailp->ail_lock);
-	xfs_bui_release(buip);
-	spin_lock(&ailp->ail_lock);
-}
-
-/* Is this log item a deferred action intent? */
-static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
-{
-	switch (lip->li_type) {
-	case XFS_LI_EFI:
-	case XFS_LI_RUI:
-	case XFS_LI_CUI:
-	case XFS_LI_BUI:
-		return true;
-	default:
-		return false;
-	}
-}
-
 /* Take all the collected deferred ops and finish them in order. */
 static int
 xlog_finish_defer_ops(
@@ -2641,6 +2587,12 @@ xlog_finish_defer_ops(
 	return xfs_trans_commit(tp);
 }
 
+/* Is this log item a deferred action intent? */
+static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+{
+	return lip->li_ops->iop_recover != NULL;
+}
+
 /*
  * When this is called, all of the log intent items which did not have
  * corresponding log done items should be in the AIL.  What we do now
@@ -2711,20 +2663,11 @@ xlog_recover_process_intents(
 
 		/*
 		 * NOTE: If your intent processing routine can create more
-		 * deferred ops, you /must/ attach them to the dfops in this
-		 * routine or else those subsequent intents will get
+		 * deferred ops, you /must/ attach them to the transaction in
+		 * this routine or else those subsequent intents will get
 		 * replayed in the wrong order!
 		 */
-		switch (lip->li_type) {
-		case XFS_LI_EFI:
-		case XFS_LI_RUI:
-		case XFS_LI_CUI:
-			error = lip->li_ops->iop_recover(lip, parent_tp);
-			break;
-		case XFS_LI_BUI:
-			error = xlog_recover_process_bui(parent_tp, ailp, lip);
-			break;
-		}
+		error = lip->li_ops->iop_recover(lip, parent_tp);
 		if (error)
 			goto out;
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
@@ -2767,19 +2710,9 @@ xlog_recover_cancel_intents(
 			break;
 		}
 
-		switch (lip->li_type) {
-		case XFS_LI_EFI:
-		case XFS_LI_RUI:
-		case XFS_LI_CUI:
-			spin_unlock(&ailp->ail_lock);
-			lip->li_ops->iop_release(lip);
-			spin_lock(&ailp->ail_lock);
-			break;
-		case XFS_LI_BUI:
-			xlog_recover_cancel_bui(log->l_mp, ailp, lip);
-			break;
-		}
-
+		spin_unlock(&ailp->ail_lock);
+		lip->li_ops->iop_release(lip);
+		spin_lock(&ailp->ail_lock);
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
 	}
 


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 18/28] xfs: refactor unlinked inode recovery
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (16 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 17/28] xfs: refactor recovered BUI " Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-05 13:05   ` Chandan Babu R
  2020-05-06 15:26   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked Darrick J. Wong
                   ` (9 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the code that processes unlinked inodes into a separate file in
preparation for centralizing the log recovery bits that have to walk
every AG.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Makefile                 |    3 -
 fs/xfs/libxfs/xfs_log_recover.h |    1 
 fs/xfs/xfs_log_recover.c        |  177 -----------------------------------
 fs/xfs/xfs_unlink_recover.c     |  198 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 202 insertions(+), 177 deletions(-)
 create mode 100644 fs/xfs/xfs_unlink_recover.c


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 04611a1068b4..505c898d6cee 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -109,7 +109,8 @@ xfs-y				+= xfs_log.o \
 				   xfs_rmap_item.o \
 				   xfs_log_recover.o \
 				   xfs_trans_ail.o \
-				   xfs_trans_buf.o
+				   xfs_trans_buf.o \
+				   xfs_unlink_recover.o
 
 # optional features
 xfs-$(CONFIG_XFS_QUOTA)		+= xfs_dquot.o \
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index a45f6e9fa47b..33c14dd22b77 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -124,5 +124,6 @@ bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 void xlog_recover_iodone(struct xfs_buf *bp);
+void xlog_recover_process_unlinked(struct xlog *log);
 
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 20ee32c2652d..362296b34490 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2720,181 +2720,6 @@ xlog_recover_cancel_intents(
 	spin_unlock(&ailp->ail_lock);
 }
 
-/*
- * This routine performs a transaction to null out a bad inode pointer
- * in an agi unlinked inode hash bucket.
- */
-STATIC void
-xlog_recover_clear_agi_bucket(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	agno,
-	int		bucket)
-{
-	xfs_trans_t	*tp;
-	xfs_agi_t	*agi;
-	xfs_buf_t	*agibp;
-	int		offset;
-	int		error;
-
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
-	if (error)
-		goto out_error;
-
-	error = xfs_read_agi(mp, tp, agno, &agibp);
-	if (error)
-		goto out_abort;
-
-	agi = agibp->b_addr;
-	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
-	offset = offsetof(xfs_agi_t, agi_unlinked) +
-		 (sizeof(xfs_agino_t) * bucket);
-	xfs_trans_log_buf(tp, agibp, offset,
-			  (offset + sizeof(xfs_agino_t) - 1));
-
-	error = xfs_trans_commit(tp);
-	if (error)
-		goto out_error;
-	return;
-
-out_abort:
-	xfs_trans_cancel(tp);
-out_error:
-	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
-	return;
-}
-
-STATIC xfs_agino_t
-xlog_recover_process_one_iunlink(
-	struct xfs_mount		*mp,
-	xfs_agnumber_t			agno,
-	xfs_agino_t			agino,
-	int				bucket)
-{
-	struct xfs_buf			*ibp;
-	struct xfs_dinode		*dip;
-	struct xfs_inode		*ip;
-	xfs_ino_t			ino;
-	int				error;
-
-	ino = XFS_AGINO_TO_INO(mp, agno, agino);
-	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
-	if (error)
-		goto fail;
-
-	/*
-	 * Get the on disk inode to find the next inode in the bucket.
-	 */
-	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
-	if (error)
-		goto fail_iput;
-
-	xfs_iflags_clear(ip, XFS_IRECOVERY);
-	ASSERT(VFS_I(ip)->i_nlink == 0);
-	ASSERT(VFS_I(ip)->i_mode != 0);
-
-	/* setup for the next pass */
-	agino = be32_to_cpu(dip->di_next_unlinked);
-	xfs_buf_relse(ibp);
-
-	/*
-	 * Prevent any DMAPI event from being sent when the reference on
-	 * the inode is dropped.
-	 */
-	ip->i_d.di_dmevmask = 0;
-
-	xfs_irele(ip);
-	return agino;
-
- fail_iput:
-	xfs_irele(ip);
- fail:
-	/*
-	 * We can't read in the inode this bucket points to, or this inode
-	 * is messed up.  Just ditch this bucket of inodes.  We will lose
-	 * some inodes and space, but at least we won't hang.
-	 *
-	 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
-	 * clear the inode pointer in the bucket.
-	 */
-	xlog_recover_clear_agi_bucket(mp, agno, bucket);
-	return NULLAGINO;
-}
-
-/*
- * Recover AGI unlinked lists
- *
- * This is called during recovery to process any inodes which we unlinked but
- * not freed when the system crashed.  These inodes will be on the lists in the
- * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
- * any inodes found on the lists. Each inode is removed from the lists when it
- * has been fully truncated and is freed. The freeing of the inode and its
- * removal from the list must be atomic.
- *
- * If everything we touch in the agi processing loop is already in memory, this
- * loop can hold the cpu for a long time. It runs without lock contention,
- * memory allocation contention, the need wait for IO, etc, and so will run
- * until we either run out of inodes to process, run low on memory or we run out
- * of log space.
- *
- * This behaviour is bad for latency on single CPU and non-preemptible kernels,
- * and can prevent other filesytem work (such as CIL pushes) from running. This
- * can lead to deadlocks if the recovery process runs out of log reservation
- * space. Hence we need to yield the CPU when there is other kernel work
- * scheduled on this CPU to ensure other scheduled work can run without undue
- * latency.
- */
-STATIC void
-xlog_recover_process_iunlinks(
-	struct xlog	*log)
-{
-	xfs_mount_t	*mp;
-	xfs_agnumber_t	agno;
-	xfs_agi_t	*agi;
-	xfs_buf_t	*agibp;
-	xfs_agino_t	agino;
-	int		bucket;
-	int		error;
-
-	mp = log->l_mp;
-
-	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
-		/*
-		 * Find the agi for this ag.
-		 */
-		error = xfs_read_agi(mp, NULL, agno, &agibp);
-		if (error) {
-			/*
-			 * AGI is b0rked. Don't process it.
-			 *
-			 * We should probably mark the filesystem as corrupt
-			 * after we've recovered all the ag's we can....
-			 */
-			continue;
-		}
-		/*
-		 * Unlock the buffer so that it can be acquired in the normal
-		 * course of the transaction to truncate and free each inode.
-		 * Because we are not racing with anyone else here for the AGI
-		 * buffer, we don't even need to hold it locked to read the
-		 * initial unlinked bucket entries out of the buffer. We keep
-		 * buffer reference though, so that it stays pinned in memory
-		 * while we need the buffer.
-		 */
-		agi = agibp->b_addr;
-		xfs_buf_unlock(agibp);
-
-		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
-			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
-			while (agino != NULLAGINO) {
-				agino = xlog_recover_process_one_iunlink(mp,
-							agno, agino, bucket);
-				cond_resched();
-			}
-		}
-		xfs_buf_rele(agibp);
-	}
-}
-
 STATIC void
 xlog_unpack_data(
 	struct xlog_rec_header	*rhead,
@@ -3574,7 +3399,7 @@ xlog_recover_finish(
 		 */
 		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
 
-		xlog_recover_process_iunlinks(log);
+		xlog_recover_process_unlinked(log);
 
 		xlog_recover_check_summary(log);
 
diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
new file mode 100644
index 000000000000..2a19d096e88d
--- /dev/null
+++ b/fs/xfs/xfs_unlink_recover.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+#include "xfs_trans_priv.h"
+#include "xfs_ialloc.h"
+#include "xfs_icache.h"
+
+/*
+ * This routine performs a transaction to null out a bad inode pointer
+ * in an agi unlinked inode hash bucket.
+ */
+STATIC void
+xlog_recover_clear_agi_bucket(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	int			bucket)
+{
+	struct xfs_trans	*tp;
+	struct xfs_agi		*agi;
+	struct xfs_buf		*agibp;
+	int			offset;
+	int			error;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
+	if (error)
+		goto out_error;
+
+	error = xfs_read_agi(mp, tp, agno, &agibp);
+	if (error)
+		goto out_abort;
+
+	agi = agibp->b_addr;
+	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
+	offset = offsetof(xfs_agi_t, agi_unlinked) +
+		 (sizeof(xfs_agino_t) * bucket);
+	xfs_trans_log_buf(tp, agibp, offset,
+			  (offset + sizeof(xfs_agino_t) - 1));
+
+	error = xfs_trans_commit(tp);
+	if (error)
+		goto out_error;
+	return;
+
+out_abort:
+	xfs_trans_cancel(tp);
+out_error:
+	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
+	return;
+}
+
+STATIC xfs_agino_t
+xlog_recover_process_one_iunlink(
+	struct xfs_mount		*mp,
+	xfs_agnumber_t			agno,
+	xfs_agino_t			agino,
+	int				bucket)
+{
+	struct xfs_buf			*ibp;
+	struct xfs_dinode		*dip;
+	struct xfs_inode		*ip;
+	xfs_ino_t			ino;
+	int				error;
+
+	ino = XFS_AGINO_TO_INO(mp, agno, agino);
+	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
+	if (error)
+		goto fail;
+
+	/*
+	 * Get the on disk inode to find the next inode in the bucket.
+	 */
+	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
+	if (error)
+		goto fail_iput;
+
+	xfs_iflags_clear(ip, XFS_IRECOVERY);
+	ASSERT(VFS_I(ip)->i_nlink == 0);
+	ASSERT(VFS_I(ip)->i_mode != 0);
+
+	/* setup for the next pass */
+	agino = be32_to_cpu(dip->di_next_unlinked);
+	xfs_buf_relse(ibp);
+
+	/*
+	 * Prevent any DMAPI event from being sent when the reference on
+	 * the inode is dropped.
+	 */
+	ip->i_d.di_dmevmask = 0;
+
+	xfs_irele(ip);
+	return agino;
+
+ fail_iput:
+	xfs_irele(ip);
+ fail:
+	/*
+	 * We can't read in the inode this bucket points to, or this inode
+	 * is messed up.  Just ditch this bucket of inodes.  We will lose
+	 * some inodes and space, but at least we won't hang.
+	 *
+	 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
+	 * clear the inode pointer in the bucket.
+	 */
+	xlog_recover_clear_agi_bucket(mp, agno, bucket);
+	return NULLAGINO;
+}
+
+/*
+ * Recover AGI unlinked lists
+ *
+ * This is called during recovery to process any inodes which we unlinked but
+ * not freed when the system crashed.  These inodes will be on the lists in the
+ * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
+ * any inodes found on the lists. Each inode is removed from the lists when it
+ * has been fully truncated and is freed. The freeing of the inode and its
+ * removal from the list must be atomic.
+ *
+ * If everything we touch in the agi processing loop is already in memory, this
+ * loop can hold the cpu for a long time. It runs without lock contention,
+ * memory allocation contention, the need wait for IO, etc, and so will run
+ * until we either run out of inodes to process, run low on memory or we run out
+ * of log space.
+ *
+ * This behaviour is bad for latency on single CPU and non-preemptible kernels,
+ * and can prevent other filesytem work (such as CIL pushes) from running. This
+ * can lead to deadlocks if the recovery process runs out of log reservation
+ * space. Hence we need to yield the CPU when there is other kernel work
+ * scheduled on this CPU to ensure other scheduled work can run without undue
+ * latency.
+ */
+void
+xlog_recover_process_unlinked(
+	struct xlog		*log)
+{
+	struct xfs_mount	*mp;
+	struct xfs_agi		*agi;
+	struct xfs_buf		*agibp;
+	xfs_agnumber_t		agno;
+	xfs_agino_t		agino;
+	int			bucket;
+	int			error;
+
+	mp = log->l_mp;
+
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		/*
+		 * Find the agi for this ag.
+		 */
+		error = xfs_read_agi(mp, NULL, agno, &agibp);
+		if (error) {
+			/*
+			 * AGI is b0rked. Don't process it.
+			 *
+			 * We should probably mark the filesystem as corrupt
+			 * after we've recovered all the ag's we can....
+			 */
+			continue;
+		}
+		/*
+		 * Unlock the buffer so that it can be acquired in the normal
+		 * course of the transaction to truncate and free each inode.
+		 * Because we are not racing with anyone else here for the AGI
+		 * buffer, we don't even need to hold it locked to read the
+		 * initial unlinked bucket entries out of the buffer. We keep
+		 * buffer reference though, so that it stays pinned in memory
+		 * while we need the buffer.
+		 */
+		agi = agibp->b_addr;
+		xfs_buf_unlock(agibp);
+
+		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
+			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
+			while (agino != NULLAGINO) {
+				agino = xlog_recover_process_one_iunlink(mp,
+							agno, agino, bucket);
+				cond_resched();
+			}
+		}
+		xfs_buf_rele(agibp);
+	}
+}


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (17 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 18/28] xfs: refactor unlinked inode recovery Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-05 13:19   ` Chandan Babu R
  2020-05-06 15:27   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 20/28] xfs: report iunlink recovery failure upwards Darrick J. Wong
                   ` (8 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Hoist the unlinked inode processing logic out of the AG loop and into
its own function.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_unlink_recover.c |   91 +++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 39 deletions(-)


diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
index 2a19d096e88d..413b34085640 100644
--- a/fs/xfs/xfs_unlink_recover.c
+++ b/fs/xfs/xfs_unlink_recover.c
@@ -145,54 +145,67 @@ xlog_recover_process_one_iunlink(
  * scheduled on this CPU to ensure other scheduled work can run without undue
  * latency.
  */
-void
-xlog_recover_process_unlinked(
-	struct xlog		*log)
+STATIC int
+xlog_recover_process_iunlinked(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno)
 {
-	struct xfs_mount	*mp;
 	struct xfs_agi		*agi;
 	struct xfs_buf		*agibp;
-	xfs_agnumber_t		agno;
 	xfs_agino_t		agino;
 	int			bucket;
 	int			error;
 
-	mp = log->l_mp;
-
-	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
-		/*
-		 * Find the agi for this ag.
-		 */
-		error = xfs_read_agi(mp, NULL, agno, &agibp);
-		if (error) {
-			/*
-			 * AGI is b0rked. Don't process it.
-			 *
-			 * We should probably mark the filesystem as corrupt
-			 * after we've recovered all the ag's we can....
-			 */
-			continue;
-		}
+	/*
+	 * Find the agi for this ag.
+	 */
+	error = xfs_read_agi(mp, NULL, agno, &agibp);
+	if (error) {
 		/*
-		 * Unlock the buffer so that it can be acquired in the normal
-		 * course of the transaction to truncate and free each inode.
-		 * Because we are not racing with anyone else here for the AGI
-		 * buffer, we don't even need to hold it locked to read the
-		 * initial unlinked bucket entries out of the buffer. We keep
-		 * buffer reference though, so that it stays pinned in memory
-		 * while we need the buffer.
+		 * AGI is b0rked. Don't process it.
+		 *
+		 * We should probably mark the filesystem as corrupt
+		 * after we've recovered all the ag's we can....
 		 */
-		agi = agibp->b_addr;
-		xfs_buf_unlock(agibp);
-
-		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
-			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
-			while (agino != NULLAGINO) {
-				agino = xlog_recover_process_one_iunlink(mp,
-							agno, agino, bucket);
-				cond_resched();
-			}
+		return error;
+	}
+
+	/*
+	 * Unlock the buffer so that it can be acquired in the normal
+	 * course of the transaction to truncate and free each inode.
+	 * Because we are not racing with anyone else here for the AGI
+	 * buffer, we don't even need to hold it locked to read the
+	 * initial unlinked bucket entries out of the buffer. We keep
+	 * buffer reference though, so that it stays pinned in memory
+	 * while we need the buffer.
+	 */
+	agi = agibp->b_addr;
+	xfs_buf_unlock(agibp);
+
+	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
+		agino = be32_to_cpu(agi->agi_unlinked[bucket]);
+		while (agino != NULLAGINO) {
+			agino = xlog_recover_process_one_iunlink(mp,
+						agno, agino, bucket);
+			cond_resched();
 		}
-		xfs_buf_rele(agibp);
+	}
+	xfs_buf_rele(agibp);
+
+	return 0;
+}
+
+void
+xlog_recover_process_unlinked(
+	struct xlog		*log)
+{
+	struct xfs_mount	*mp = log->l_mp;
+	xfs_agnumber_t		agno;
+	int			error;
+
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		error = xlog_recover_process_iunlinked(mp, agno);
+		if (error)
+			break;
 	}
 }


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 20/28] xfs: report iunlink recovery failure upwards
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (18 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-05 13:43   ` Chandan Babu R
  2020-05-06 15:27   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 21/28] xfs: refactor releasing finished intents during log recovery Darrick J. Wong
                   ` (7 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

If we fail to recover unlinked inodes due to corruption or whatnot, we
should report this upwards and fail the mount instead of continuing on
like nothing's wrong.  Eventually the user will trip over the busted
AGI anyway.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_log_recover.h |    2 +-
 fs/xfs/xfs_log.c                |    4 +++-
 fs/xfs/xfs_log_recover.c        |    7 ++++++-
 fs/xfs/xfs_unlink_recover.c     |    4 +++-
 4 files changed, 13 insertions(+), 4 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 33c14dd22b77..d4d6d4f84fda 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -124,6 +124,6 @@ bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 void xlog_recover_iodone(struct xfs_buf *bp);
-void xlog_recover_process_unlinked(struct xlog *log);
+int xlog_recover_process_unlinked(struct xlog *log);
 
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 00fda2e8e738..8203b9b0fd08 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -727,6 +727,8 @@ xfs_log_mount_finish(
 		xfs_log_work_queue(mp);
 	mp->m_super->s_flags &= ~SB_ACTIVE;
 	evict_inodes(mp->m_super);
+	if (error)
+		return error;
 
 	/*
 	 * Drain the buffer LRU after log recovery. This is required for v4
@@ -737,7 +739,7 @@ xfs_log_mount_finish(
 	 * Don't push in the error case because the AIL may have pending intents
 	 * that aren't removed until recovery is cancelled.
 	 */
-	if (!error && recovered) {
+	if (recovered) {
 		xfs_log_force(mp, XFS_LOG_SYNC);
 		xfs_ail_push_all_sync(mp->m_ail);
 	}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 362296b34490..0ccc09c004f1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3399,7 +3399,12 @@ xlog_recover_finish(
 		 */
 		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
 
-		xlog_recover_process_unlinked(log);
+		error = xlog_recover_process_unlinked(log);
+		if (error) {
+			xfs_alert(log->l_mp,
+					"Failed to recover unlinked metadata");
+			return error;
+		}
 
 		xlog_recover_check_summary(log);
 
diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
index 413b34085640..fe7fa3d623f2 100644
--- a/fs/xfs/xfs_unlink_recover.c
+++ b/fs/xfs/xfs_unlink_recover.c
@@ -195,7 +195,7 @@ xlog_recover_process_iunlinked(
 	return 0;
 }
 
-void
+int
 xlog_recover_process_unlinked(
 	struct xlog		*log)
 {
@@ -208,4 +208,6 @@ xlog_recover_process_unlinked(
 		if (error)
 			break;
 	}
+
+	return error;
 }


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 21/28] xfs: refactor releasing finished intents during log recovery
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (19 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 20/28] xfs: report iunlink recovery failure upwards Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-06  4:06   ` Chandan Babu R
  2020-05-06 15:29   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 22/28] xfs: refactor adding recovered intent items to the log Darrick J. Wong
                   ` (6 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Replace the open-coded AIL item walking with a proper helper when we're
trying to release an intent item that has been finished.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_log_recover.h |    3 +++
 fs/xfs/xfs_bmap_item.c          |   42 +++++++++------------------------------
 fs/xfs/xfs_extfree_item.c       |   42 +++++++++------------------------------
 fs/xfs/xfs_log_recover.c        |   35 ++++++++++++++++++++++++++++++++-
 fs/xfs/xfs_refcount_item.c      |   42 +++++++++------------------------------
 fs/xfs/xfs_rmap_item.c          |   42 +++++++++------------------------------
 fs/xfs/xfs_trans.h              |    1 +
 7 files changed, 78 insertions(+), 129 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index d4d6d4f84fda..b875819a1c04 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -126,4 +126,7 @@ bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
 void xlog_recover_iodone(struct xfs_buf *bp);
 int xlog_recover_process_unlinked(struct xlog *log);
 
+void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
+		uint64_t intent_id);
+
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index f88ebf8634c4..96627ea800c8 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -578,12 +578,21 @@ xfs_bui_item_recover(
 	return error;
 }
 
+STATIC bool
+xfs_bui_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
+}
+
 static const struct xfs_item_ops xfs_bui_item_ops = {
 	.iop_size	= xfs_bui_item_size,
 	.iop_format	= xfs_bui_item_format,
 	.iop_unpin	= xfs_bui_item_unpin,
 	.iop_release	= xfs_bui_item_release,
 	.iop_recover	= xfs_bui_item_recover,
+	.iop_match	= xfs_bui_item_match,
 };
 
 /*
@@ -675,45 +684,14 @@ xlog_recover_bmap_done_commit_pass2(
 	xfs_lsn_t			lsn)
 {
 	struct xfs_bud_log_format	*bud_formatp;
-	struct xfs_bui_log_item		*buip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			bui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
 
 	bud_formatp = item->ri_buf[0].i_addr;
 	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
 		return -EFSCORRUPTED;
 	}
-	bui_id = bud_formatp->bud_bui_id;
-
-	/*
-	 * Search for the BUI with the id in the BUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_BUI) {
-			buip = (struct xfs_bui_log_item *)lip;
-			if (buip->bui_format.bui_id == bui_id) {
-				/*
-				 * Drop the BUD reference to the BUI. This
-				 * removes the BUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_bui_release(buip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
 
+	xlog_recover_release_intent(log, XFS_LI_BUI, bud_formatp->bud_bui_id);
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 3fc8a9864217..4e1b10ab17a5 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -665,12 +665,21 @@ xfs_efi_item_recover(
 	return error;
 }
 
+STATIC bool
+xfs_efi_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return EFI_ITEM(lip)->efi_format.efi_id == intent_id;
+}
+
 static const struct xfs_item_ops xfs_efi_item_ops = {
 	.iop_size	= xfs_efi_item_size,
 	.iop_format	= xfs_efi_item_format,
 	.iop_unpin	= xfs_efi_item_unpin,
 	.iop_release	= xfs_efi_item_release,
 	.iop_recover	= xfs_efi_item_recover,
+	.iop_match	= xfs_efi_item_match,
 };
 
 
@@ -734,46 +743,15 @@ xlog_recover_extfree_done_commit_pass2(
 	struct xlog_recover_item	*item,
 	xfs_lsn_t			lsn)
 {
-	struct xfs_ail_cursor		cur;
 	struct xfs_efd_log_format	*efd_formatp;
-	struct xfs_efi_log_item		*efip = NULL;
-	struct xfs_log_item		*lip;
-	struct xfs_ail			*ailp = log->l_ailp;
-	uint64_t			efi_id;
 
 	efd_formatp = item->ri_buf[0].i_addr;
 	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
 		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
 	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
 		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
-	efi_id = efd_formatp->efd_efi_id;
-
-	/*
-	 * Search for the EFI with the id in the EFD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_EFI) {
-			efip = (struct xfs_efi_log_item *)lip;
-			if (efip->efi_format.efi_id == efi_id) {
-				/*
-				 * Drop the EFD reference to the EFI. This
-				 * removes the EFI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_efi_release(efip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
 
+	xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id);
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0ccc09c004f1..55477b9b9311 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1779,6 +1779,38 @@ xlog_clear_stale_blocks(
 	return 0;
 }
 
+/*
+ * Release the recovered intent item in the AIL that matches the given intent
+ * type and intent id.
+ */
+void
+xlog_recover_release_intent(
+	struct xlog		*log,
+	unsigned short		intent_type,
+	uint64_t		intent_id)
+{
+	struct xfs_ail_cursor	cur;
+	struct xfs_log_item	*lip;
+	struct xfs_ail		*ailp = log->l_ailp;
+
+	spin_lock(&ailp->ail_lock);
+	for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL;
+	     lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
+		if (lip->li_type != intent_type)
+			continue;
+		if (!lip->li_ops->iop_match(lip, intent_id))
+			continue;
+
+		spin_unlock(&ailp->ail_lock);
+		lip->li_ops->iop_release(lip);
+		spin_lock(&ailp->ail_lock);
+		break;
+	}
+
+	xfs_trans_ail_cursor_done(&cur);
+	spin_unlock(&ailp->ail_lock);
+}
+
 /******************************************************************************
  *
  *		Log recover routines
@@ -2590,7 +2622,8 @@ xlog_finish_defer_ops(
 /* Is this log item a deferred action intent? */
 static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
 {
-	return lip->li_ops->iop_recover != NULL;
+	return lip->li_ops->iop_recover != NULL &&
+	       lip->li_ops->iop_match != NULL;
 }
 
 /*
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 5b72eebd8764..27126b136b5a 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -591,12 +591,21 @@ xfs_cui_item_recover(
 	return error;
 }
 
+STATIC bool
+xfs_cui_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return CUI_ITEM(lip)->cui_format.cui_id == intent_id;
+}
+
 static const struct xfs_item_ops xfs_cui_item_ops = {
 	.iop_size	= xfs_cui_item_size,
 	.iop_format	= xfs_cui_item_format,
 	.iop_unpin	= xfs_cui_item_unpin,
 	.iop_release	= xfs_cui_item_release,
 	.iop_recover	= xfs_cui_item_recover,
+	.iop_match	= xfs_cui_item_match,
 };
 
 /*
@@ -684,45 +693,14 @@ xlog_recover_refcount_done_commit_pass2(
 	xfs_lsn_t			lsn)
 {
 	struct xfs_cud_log_format	*cud_formatp;
-	struct xfs_cui_log_item		*cuip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			cui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
 
 	cud_formatp = item->ri_buf[0].i_addr;
 	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
 		return -EFSCORRUPTED;
 	}
-	cui_id = cud_formatp->cud_cui_id;
-
-	/*
-	 * Search for the CUI with the id in the CUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_CUI) {
-			cuip = (struct xfs_cui_log_item *)lip;
-			if (cuip->cui_format.cui_id == cui_id) {
-				/*
-				 * Drop the CUD reference to the CUI. This
-				 * removes the CUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_cui_release(cuip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
 
+	xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id);
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index e763dd8ed0a6..3987f217415c 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -606,12 +606,21 @@ xfs_rui_item_recover(
 	return error;
 }
 
+STATIC bool
+xfs_rui_item_match(
+	struct xfs_log_item	*lip,
+	uint64_t		intent_id)
+{
+	return RUI_ITEM(lip)->rui_format.rui_id == intent_id;
+}
+
 static const struct xfs_item_ops xfs_rui_item_ops = {
 	.iop_size	= xfs_rui_item_size,
 	.iop_format	= xfs_rui_item_format,
 	.iop_unpin	= xfs_rui_item_unpin,
 	.iop_release	= xfs_rui_item_release,
 	.iop_recover	= xfs_rui_item_recover,
+	.iop_match	= xfs_rui_item_match,
 };
 
 /*
@@ -675,42 +684,11 @@ xlog_recover_rmap_done_commit_pass2(
 	xfs_lsn_t			lsn)
 {
 	struct xfs_rud_log_format	*rud_formatp;
-	struct xfs_rui_log_item		*ruip = NULL;
-	struct xfs_log_item		*lip;
-	uint64_t			rui_id;
-	struct xfs_ail_cursor		cur;
-	struct xfs_ail			*ailp = log->l_ailp;
 
 	rud_formatp = item->ri_buf[0].i_addr;
 	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
-	rui_id = rud_formatp->rud_rui_id;
-
-	/*
-	 * Search for the RUI with the id in the RUD format structure in the
-	 * AIL.
-	 */
-	spin_lock(&ailp->ail_lock);
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-	while (lip != NULL) {
-		if (lip->li_type == XFS_LI_RUI) {
-			ruip = (struct xfs_rui_log_item *)lip;
-			if (ruip->rui_format.rui_id == rui_id) {
-				/*
-				 * Drop the RUD reference to the RUI. This
-				 * removes the RUI from the AIL and frees it.
-				 */
-				spin_unlock(&ailp->ail_lock);
-				xfs_rui_release(ruip);
-				spin_lock(&ailp->ail_lock);
-				break;
-			}
-		}
-		lip = xfs_trans_ail_cursor_next(ailp, &cur);
-	}
-
-	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->ail_lock);
 
+	xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id);
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3f6a79108991..3e8808bb07c5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -78,6 +78,7 @@ struct xfs_item_ops {
 	xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
 	void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
 	int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
+	bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
 };
 
 /*


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 22/28] xfs: refactor adding recovered intent items to the log
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (20 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 21/28] xfs: refactor releasing finished intents during log recovery Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-06 15:31   ` Christoph Hellwig
  2020-05-05  1:12 ` [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item Darrick J. Wong
                   ` (5 subsequent siblings)
  27 siblings, 1 reply; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: Christoph Hellwig, linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

During recovery, every intent that we recover from the log has to be
added to the AIL.  Replace the open-coded addition with a helper.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_log_recover.h |    2 ++
 fs/xfs/xfs_bmap_item.c          |   10 +---------
 fs/xfs/xfs_extfree_item.c       |   10 +---------
 fs/xfs/xfs_log_recover.c        |   17 +++++++++++++++++
 fs/xfs/xfs_refcount_item.c      |   10 +---------
 fs/xfs/xfs_rmap_item.c          |   10 +---------
 6 files changed, 23 insertions(+), 36 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index b875819a1c04..d8c0eae87179 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -128,5 +128,7 @@ int xlog_recover_process_unlinked(struct xlog *log);
 
 void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
 		uint64_t intent_id);
+void xlog_recover_insert_ail(struct xlog *log, struct xfs_log_item *lip,
+		xfs_lsn_t lsn);
 
 #endif	/* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 96627ea800c8..090dc1c53c92 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -651,15 +651,7 @@ xlog_recover_bmap_intent_commit_pass2(
 		return error;
 	}
 	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The RUI has two references. One for the RUD and one for RUI to ensure
-	 * it makes it into the AIL. Insert the RUI into the AIL directly and
-	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
+	xlog_recover_insert_ail(log, &buip->bui_item, lsn);
 	xfs_bui_release(buip);
 	return 0;
 }
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 4e1b10ab17a5..dc6ebb5fb8d3 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -711,15 +711,7 @@ xlog_recover_extfree_intent_commit_pass2(
 		return error;
 	}
 	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The EFI has two references. One for the EFD and one for EFI to ensure
-	 * it makes it into the AIL. Insert the EFI into the AIL directly and
-	 * drop the EFI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
+	xlog_recover_insert_ail(log, &efip->efi_item, lsn);
 	xfs_efi_release(efip);
 	return 0;
 }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 55477b9b9311..a2c03d87c374 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1811,6 +1811,23 @@ xlog_recover_release_intent(
 	spin_unlock(&ailp->ail_lock);
 }
 
+/* Insert a recovered intent item into the AIL. */
+void
+xlog_recover_insert_ail(
+	struct xlog		*log,
+	struct xfs_log_item	*lip,
+	xfs_lsn_t		lsn)
+{
+	/*
+	 * The intent has two references. One for the done item and one for the
+	 * intent to ensure it makes it into the AIL. Insert the intent into
+	 * the AIL directly and drop the intent reference. Note that
+	 * xfs_trans_ail_update() drops the AIL lock.
+	 */
+	spin_lock(&log->l_ailp->ail_lock);
+	xfs_trans_ail_update(log->l_ailp, lip, lsn);
+}
+
 /******************************************************************************
  *
  *		Log recover routines
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 27126b136b5a..fdc18576a023 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -660,15 +660,7 @@ xlog_recover_refcount_intent_commit_pass2(
 		return error;
 	}
 	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The CUI has two references. One for the CUD and one for CUI to ensure
-	 * it makes it into the AIL. Insert the CUI into the AIL directly and
-	 * drop the CUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
+	xlog_recover_insert_ail(log, &cuip->cui_item, lsn);
 	xfs_cui_release(cuip);
 	return 0;
 }
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 3987f217415c..f9cd3ff18736 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -651,15 +651,7 @@ xlog_recover_rmap_intent_commit_pass2(
 		return error;
 	}
 	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
-
-	spin_lock(&log->l_ailp->ail_lock);
-	/*
-	 * The RUI has two references. One for the RUD and one for RUI to ensure
-	 * it makes it into the AIL. Insert the RUI into the AIL directly and
-	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
-	 * AIL lock.
-	 */
-	xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
+	xlog_recover_insert_ail(log, &ruip->rui_item, lsn);
 	xfs_rui_release(ruip);
 	return 0;
 }


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (21 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 22/28] xfs: refactor adding recovered intent items to the log Darrick J. Wong
@ 2020-05-05  1:12 ` Darrick J. Wong
  2020-05-06  4:45   ` Chandan Babu R
  2020-05-06 15:32   ` Christoph Hellwig
  2020-05-05  1:13 ` [PATCH 24/28] xfs: refactor intent item iop_recover calls Darrick J. Wong
                   ` (4 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Rename XFS_{EFI,BUI,RUI,CUI}_RECOVERED to XFS_LI_RECOVERED so that we
track recovery status in the log item, then get rid of the now unused
flags fields in each of those log item types.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c     |   10 +++++-----
 fs/xfs/xfs_bmap_item.h     |    6 ------
 fs/xfs/xfs_extfree_item.c  |    8 ++++----
 fs/xfs/xfs_extfree_item.h  |    6 ------
 fs/xfs/xfs_refcount_item.c |    8 ++++----
 fs/xfs/xfs_refcount_item.h |    6 ------
 fs/xfs/xfs_rmap_item.c     |    8 ++++----
 fs/xfs/xfs_rmap_item.h     |    6 ------
 fs/xfs/xfs_trans.h         |    4 +++-
 9 files changed, 20 insertions(+), 42 deletions(-)


diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 090dc1c53c92..8dd157fc44fa 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -441,11 +441,11 @@ xfs_bui_recover(
 	struct xfs_bmbt_irec		irec;
 	struct xfs_mount		*mp = parent_tp->t_mountp;
 
-	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
+	ASSERT(!test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags));
 
 	/* Only one mapping operation per BUI... */
 	if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
-		set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
+		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
 		xfs_bui_release(buip);
 		return -EFSCORRUPTED;
 	}
@@ -479,7 +479,7 @@ xfs_bui_recover(
 		 * This will pull the BUI from the AIL and
 		 * free the memory associated with it.
 		 */
-		set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
+		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
 		xfs_bui_release(buip);
 		return -EFSCORRUPTED;
 	}
@@ -537,7 +537,7 @@ xfs_bui_recover(
 		xfs_bmap_unmap_extent(tp, ip, &irec);
 	}
 
-	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
+	set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
 	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -568,7 +568,7 @@ xfs_bui_item_recover(
 	/*
 	 * Skip BUIs that we've already processed.
 	 */
-	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
+	if (test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags))
 		return 0;
 
 	spin_unlock(&ailp->ail_lock);
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index 44d06e62f8f9..b9be62f8bd52 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -32,11 +32,6 @@ struct kmem_zone;
  */
 #define	XFS_BUI_MAX_FAST_EXTENTS	1
 
-/*
- * Define BUI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_BUI_RECOVERED		1
-
 /*
  * This is the "bmap update intent" log item.  It is used to log the fact that
  * some reverse mappings need to change.  It is used in conjunction with the
@@ -49,7 +44,6 @@ struct xfs_bui_log_item {
 	struct xfs_log_item		bui_item;
 	atomic_t			bui_refcount;
 	atomic_t			bui_next_extent;
-	unsigned long			bui_flags;	/* misc flags */
 	struct xfs_bui_log_format	bui_format;
 };
 
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index dc6ebb5fb8d3..635c99fdda85 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -592,7 +592,7 @@ xfs_efi_recover(
 	xfs_extent_t		*extp;
 	xfs_fsblock_t		startblock_fsb;
 
-	ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
+	ASSERT(!test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags));
 
 	/*
 	 * First check the validity of the extents described by the
@@ -611,7 +611,7 @@ xfs_efi_recover(
 			 * This will pull the EFI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
+			set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
 			xfs_efi_release(efip);
 			return -EFSCORRUPTED;
 		}
@@ -632,7 +632,7 @@ xfs_efi_recover(
 
 	}
 
-	set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
+	set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
 	error = xfs_trans_commit(tp);
 	return error;
 
@@ -655,7 +655,7 @@ xfs_efi_item_recover(
 	 * Skip EFIs that we've already processed.
 	 */
 	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
+	if (test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags))
 		return 0;
 
 	spin_unlock(&ailp->ail_lock);
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 4b2c2c5c5985..cd2860c875bf 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -16,11 +16,6 @@ struct kmem_zone;
  */
 #define	XFS_EFI_MAX_FAST_EXTENTS	16
 
-/*
- * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_EFI_RECOVERED	1
-
 /*
  * This is the "extent free intention" log item.  It is used to log the fact
  * that some extents need to be free.  It is used in conjunction with the
@@ -54,7 +49,6 @@ struct xfs_efi_log_item {
 	struct xfs_log_item	efi_item;
 	atomic_t		efi_refcount;
 	atomic_t		efi_next_extent;
-	unsigned long		efi_flags;	/* misc flags */
 	xfs_efi_log_format_t	efi_format;
 };
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fdc18576a023..4b242b3b33a3 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -441,7 +441,7 @@ xfs_cui_recover(
 	bool				requeue_only = false;
 	struct xfs_mount		*mp = parent_tp->t_mountp;
 
-	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
+	ASSERT(!test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags));
 
 	/*
 	 * First check the validity of the extents described by the
@@ -472,7 +472,7 @@ xfs_cui_recover(
 			 * This will pull the CUI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
+			set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
 			xfs_cui_release(cuip);
 			return -EFSCORRUPTED;
 		}
@@ -556,7 +556,7 @@ xfs_cui_recover(
 	}
 
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
-	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
+	set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
 	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	return error;
@@ -581,7 +581,7 @@ xfs_cui_item_recover(
 	/*
 	 * Skip CUIs that we've already processed.
 	 */
-	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
+	if (test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags))
 		return 0;
 
 	spin_unlock(&ailp->ail_lock);
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index cfaa857673a6..f4f2e836540b 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -32,11 +32,6 @@ struct kmem_zone;
  */
 #define	XFS_CUI_MAX_FAST_EXTENTS	16
 
-/*
- * Define CUI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_CUI_RECOVERED		1
-
 /*
  * This is the "refcount update intent" log item.  It is used to log
  * the fact that some reverse mappings need to change.  It is used in
@@ -51,7 +46,6 @@ struct xfs_cui_log_item {
 	struct xfs_log_item		cui_item;
 	atomic_t			cui_refcount;
 	atomic_t			cui_next_extent;
-	unsigned long			cui_flags;	/* misc flags */
 	struct xfs_cui_log_format	cui_format;
 };
 
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index f9cd3ff18736..625eaf954d74 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -480,7 +480,7 @@ xfs_rui_recover(
 	struct xfs_trans		*tp;
 	struct xfs_btree_cur		*rcur = NULL;
 
-	ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
+	ASSERT(!test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags));
 
 	/*
 	 * First check the validity of the extents described by the
@@ -515,7 +515,7 @@ xfs_rui_recover(
 			 * This will pull the RUI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
+			set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
 			xfs_rui_release(ruip);
 			return -EFSCORRUPTED;
 		}
@@ -573,7 +573,7 @@ xfs_rui_recover(
 	}
 
 	xfs_rmap_finish_one_cleanup(tp, rcur, error);
-	set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
+	set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
 	error = xfs_trans_commit(tp);
 	return error;
 
@@ -596,7 +596,7 @@ xfs_rui_item_recover(
 	/*
 	 * Skip RUIs that we've already processed.
 	 */
-	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
+	if (test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags))
 		return 0;
 
 	spin_unlock(&ailp->ail_lock);
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 48a77a6f5c94..31e6cdfff71f 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -35,11 +35,6 @@ struct kmem_zone;
  */
 #define	XFS_RUI_MAX_FAST_EXTENTS	16
 
-/*
- * Define RUI flag bits. Manipulated by set/clear/test_bit operators.
- */
-#define	XFS_RUI_RECOVERED		1
-
 /*
  * This is the "rmap update intent" log item.  It is used to log the fact that
  * some reverse mappings need to change.  It is used in conjunction with the
@@ -52,7 +47,6 @@ struct xfs_rui_log_item {
 	struct xfs_log_item		rui_item;
 	atomic_t			rui_refcount;
 	atomic_t			rui_next_extent;
-	unsigned long			rui_flags;	/* misc flags */
 	struct xfs_rui_log_format	rui_format;
 };
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3e8808bb07c5..8308bf6d7e40 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -59,12 +59,14 @@ struct xfs_log_item {
 #define	XFS_LI_ABORTED	1
 #define	XFS_LI_FAILED	2
 #define	XFS_LI_DIRTY	3	/* log item dirty in transaction */
+#define	XFS_LI_RECOVERED 4	/* log intent item has been recovered */
 
 #define XFS_LI_FLAGS \
 	{ (1 << XFS_LI_IN_AIL),		"IN_AIL" }, \
 	{ (1 << XFS_LI_ABORTED),	"ABORTED" }, \
 	{ (1 << XFS_LI_FAILED),		"FAILED" }, \
-	{ (1 << XFS_LI_DIRTY),		"DIRTY" }
+	{ (1 << XFS_LI_DIRTY),		"DIRTY" }, \
+	{ (1 << XFS_LI_RECOVERED),	"RECOVERED" }
 
 struct xfs_item_ops {
 	unsigned flags;


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 24/28] xfs: refactor intent item iop_recover calls
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (22 preceding siblings ...)
  2020-05-05  1:12 ` [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item Darrick J. Wong
@ 2020-05-05  1:13 ` Darrick J. Wong
  2020-05-06  5:14   ` Chandan Babu R
  2020-05-06 15:34   ` Christoph Hellwig
  2020-05-05  1:13 ` [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller Darrick J. Wong
                   ` (3 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:13 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Now that we've made the recovered item tests all the same, we can hoist
the test and the ail locking code to the ->iop_recover caller and call
the recovery function directly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c     |   48 ++++++++++++--------------------------------
 fs/xfs/xfs_extfree_item.c  |   44 ++++++++++------------------------------
 fs/xfs/xfs_log_recover.c   |    8 ++++++-
 fs/xfs/xfs_refcount_item.c |   46 +++++++++++-------------------------------
 fs/xfs/xfs_rmap_item.c     |   45 +++++++++++------------------------------
 5 files changed, 54 insertions(+), 137 deletions(-)


diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 8dd157fc44fa..8f0dc6d550d1 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -421,25 +421,26 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
  * We need to update some inode's bmbt.
  */
 STATIC int
-xfs_bui_recover(
-	struct xfs_trans		*parent_tp,
-	struct xfs_bui_log_item		*buip)
+xfs_bui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	int				error = 0;
-	unsigned int			bui_type;
+	struct xfs_bmbt_irec		irec;
+	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);
+	struct xfs_trans		*tp;
+	struct xfs_inode		*ip = NULL;
+	struct xfs_mount		*mp = parent_tp->t_mountp;
 	struct xfs_map_extent		*bmap;
+	struct xfs_bud_log_item		*budp;
 	xfs_fsblock_t			startblock_fsb;
 	xfs_fsblock_t			inode_fsb;
 	xfs_filblks_t			count;
-	bool				op_ok;
-	struct xfs_bud_log_item		*budp;
+	xfs_exntst_t			state;
 	enum xfs_bmap_intent_type	type;
+	bool				op_ok;
+	unsigned int			bui_type;
 	int				whichfork;
-	xfs_exntst_t			state;
-	struct xfs_trans		*tp;
-	struct xfs_inode		*ip = NULL;
-	struct xfs_bmbt_irec		irec;
-	struct xfs_mount		*mp = parent_tp->t_mountp;
+	int				error = 0;
 
 	ASSERT(!test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags));
 
@@ -555,29 +556,6 @@ xfs_bui_recover(
 	return error;
 }
 
-/* Recover the BUI if necessary. */
-STATIC int
-xfs_bui_item_recover(
-	struct xfs_log_item		*lip,
-	struct xfs_trans		*tp)
-{
-	struct xfs_ail			*ailp = lip->li_ailp;
-	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);
-	int				error;
-
-	/*
-	 * Skip BUIs that we've already processed.
-	 */
-	if (test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_bui_recover(tp, buip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
 STATIC bool
 xfs_bui_item_match(
 	struct xfs_log_item	*lip,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 635c99fdda85..ec8a79fe6cab 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -581,16 +581,18 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
  * the log.  We need to free the extents that it describes.
  */
 STATIC int
-xfs_efi_recover(
-	struct xfs_mount	*mp,
-	struct xfs_efi_log_item	*efip)
+xfs_efi_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	struct xfs_efd_log_item	*efdp;
-	struct xfs_trans	*tp;
-	int			i;
-	int			error = 0;
-	xfs_extent_t		*extp;
-	xfs_fsblock_t		startblock_fsb;
+	struct xfs_efi_log_item		*efip = EFI_ITEM(lip);
+	struct xfs_mount		*mp = parent_tp->t_mountp;
+	struct xfs_efd_log_item		*efdp;
+	struct xfs_trans		*tp;
+	struct xfs_extent		*extp;
+	xfs_fsblock_t			startblock_fsb;
+	int				i;
+	int				error = 0;
 
 	ASSERT(!test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags));
 
@@ -641,30 +643,6 @@ xfs_efi_recover(
 	return error;
 }
 
-/* Recover the EFI if necessary. */
-STATIC int
-xfs_efi_item_recover(
-	struct xfs_log_item		*lip,
-	struct xfs_trans		*tp)
-{
-	struct xfs_ail			*ailp = lip->li_ailp;
-	struct xfs_efi_log_item		*efip;
-	int				error;
-
-	/*
-	 * Skip EFIs that we've already processed.
-	 */
-	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-	if (test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_efi_recover(tp->t_mountp, efip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
 STATIC bool
 xfs_efi_item_match(
 	struct xfs_log_item	*lip,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a2c03d87c374..8ff957da2845 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2667,7 +2667,7 @@ xlog_recover_process_intents(
 	struct xfs_ail_cursor	cur;
 	struct xfs_log_item	*lip;
 	struct xfs_ail		*ailp;
-	int			error;
+	int			error = 0;
 #if defined(DEBUG) || defined(XFS_WARN)
 	xfs_lsn_t		last_lsn;
 #endif
@@ -2717,7 +2717,11 @@ xlog_recover_process_intents(
 		 * this routine or else those subsequent intents will get
 		 * replayed in the wrong order!
 		 */
-		error = lip->li_ops->iop_recover(lip, parent_tp);
+		if (!test_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
+			spin_unlock(&ailp->ail_lock);
+			error = lip->li_ops->iop_recover(lip, parent_tp);
+			spin_lock(&ailp->ail_lock);
+		}
 		if (error)
 			goto out;
 		lip = xfs_trans_ail_cursor_next(ailp, &cur);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 4b242b3b33a3..fab821fce76b 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -421,25 +421,26 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
  * We need to update the refcountbt.
  */
 STATIC int
-xfs_cui_recover(
-	struct xfs_trans		*parent_tp,
-	struct xfs_cui_log_item		*cuip)
+xfs_cui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	int				i;
-	int				error = 0;
-	unsigned int			refc_type;
+	struct xfs_bmbt_irec		irec;
+	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
 	struct xfs_phys_extent		*refc;
-	xfs_fsblock_t			startblock_fsb;
-	bool				op_ok;
 	struct xfs_cud_log_item		*cudp;
 	struct xfs_trans		*tp;
 	struct xfs_btree_cur		*rcur = NULL;
-	enum xfs_refcount_intent_type	type;
+	struct xfs_mount		*mp = parent_tp->t_mountp;
+	xfs_fsblock_t			startblock_fsb;
 	xfs_fsblock_t			new_fsb;
 	xfs_extlen_t			new_len;
-	struct xfs_bmbt_irec		irec;
+	unsigned int			refc_type;
+	bool				op_ok;
 	bool				requeue_only = false;
-	struct xfs_mount		*mp = parent_tp->t_mountp;
+	enum xfs_refcount_intent_type	type;
+	int				i;
+	int				error = 0;
 
 	ASSERT(!test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags));
 
@@ -568,29 +569,6 @@ xfs_cui_recover(
 	return error;
 }
 
-/* Recover the CUI if necessary. */
-STATIC int
-xfs_cui_item_recover(
-	struct xfs_log_item		*lip,
-	struct xfs_trans		*tp)
-{
-	struct xfs_ail			*ailp = lip->li_ailp;
-	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
-	int				error;
-
-	/*
-	 * Skip CUIs that we've already processed.
-	 */
-	if (test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_cui_recover(tp, cuip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
 STATIC bool
 xfs_cui_item_match(
 	struct xfs_log_item	*lip,
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 625eaf954d74..c9233a220551 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -464,21 +464,23 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
  * We need to update the rmapbt.
  */
 STATIC int
-xfs_rui_recover(
-	struct xfs_mount		*mp,
-	struct xfs_rui_log_item		*ruip)
+xfs_rui_item_recover(
+	struct xfs_log_item		*lip,
+	struct xfs_trans		*parent_tp)
 {
-	int				i;
-	int				error = 0;
+	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);
 	struct xfs_map_extent		*rmap;
-	xfs_fsblock_t			startblock_fsb;
-	bool				op_ok;
 	struct xfs_rud_log_item		*rudp;
-	enum xfs_rmap_intent_type	type;
-	int				whichfork;
-	xfs_exntst_t			state;
 	struct xfs_trans		*tp;
 	struct xfs_btree_cur		*rcur = NULL;
+	struct xfs_mount		*mp = parent_tp->t_mountp;
+	xfs_fsblock_t			startblock_fsb;
+	enum xfs_rmap_intent_type	type;
+	xfs_exntst_t			state;
+	bool				op_ok;
+	int				i;
+	int				whichfork;
+	int				error = 0;
 
 	ASSERT(!test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags));
 
@@ -583,29 +585,6 @@ xfs_rui_recover(
 	return error;
 }
 
-/* Recover the RUI if necessary. */
-STATIC int
-xfs_rui_item_recover(
-	struct xfs_log_item		*lip,
-	struct xfs_trans		*tp)
-{
-	struct xfs_ail			*ailp = lip->li_ailp;
-	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);
-	int				error;
-
-	/*
-	 * Skip RUIs that we've already processed.
-	 */
-	if (test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags))
-		return 0;
-
-	spin_unlock(&ailp->ail_lock);
-	error = xfs_rui_recover(tp->t_mountp, ruip);
-	spin_lock(&ailp->ail_lock);
-
-	return error;
-}
-
 STATIC bool
 xfs_rui_item_match(
 	struct xfs_log_item	*lip,


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (23 preceding siblings ...)
  2020-05-05  1:13 ` [PATCH 24/28] xfs: refactor intent item iop_recover calls Darrick J. Wong
@ 2020-05-05  1:13 ` Darrick J. Wong
  2020-05-06  5:34   ` Chandan Babu R
  2020-05-06 15:35   ` Christoph Hellwig
  2020-05-05  1:13 ` [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c Darrick J. Wong
                   ` (2 subsequent siblings)
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:13 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

The only purpose of XFS_LI_RECOVERED is to prevent log recovery from
trying to replay recovered intents more than once.  Therefore, we can
move the bit setting up to the ->iop_recover caller.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c     |    5 -----
 fs/xfs/xfs_extfree_item.c  |    4 ----
 fs/xfs/xfs_log_recover.c   |    2 +-
 fs/xfs/xfs_refcount_item.c |    4 ----
 fs/xfs/xfs_rmap_item.c     |    4 ----
 5 files changed, 1 insertion(+), 18 deletions(-)


diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 8f0dc6d550d1..0793c317defb 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -442,11 +442,8 @@ xfs_bui_item_recover(
 	int				whichfork;
 	int				error = 0;
 
-	ASSERT(!test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags));
-
 	/* Only one mapping operation per BUI... */
 	if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
-		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
 		xfs_bui_release(buip);
 		return -EFSCORRUPTED;
 	}
@@ -480,7 +477,6 @@ xfs_bui_item_recover(
 		 * This will pull the BUI from the AIL and
 		 * free the memory associated with it.
 		 */
-		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
 		xfs_bui_release(buip);
 		return -EFSCORRUPTED;
 	}
@@ -538,7 +534,6 @@ xfs_bui_item_recover(
 		xfs_bmap_unmap_extent(tp, ip, &irec);
 	}
 
-	set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
 	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index ec8a79fe6cab..b92678bede24 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -594,8 +594,6 @@ xfs_efi_item_recover(
 	int				i;
 	int				error = 0;
 
-	ASSERT(!test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags));
-
 	/*
 	 * First check the validity of the extents described by the
 	 * EFI.  If any are bad, then assume that all are bad and
@@ -613,7 +611,6 @@ xfs_efi_item_recover(
 			 * This will pull the EFI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
 			xfs_efi_release(efip);
 			return -EFSCORRUPTED;
 		}
@@ -634,7 +631,6 @@ xfs_efi_item_recover(
 
 	}
 
-	set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
 	error = xfs_trans_commit(tp);
 	return error;
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ff957da2845..a49435db3be0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2717,7 +2717,7 @@ xlog_recover_process_intents(
 		 * this routine or else those subsequent intents will get
 		 * replayed in the wrong order!
 		 */
-		if (!test_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
+		if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
 			spin_unlock(&ailp->ail_lock);
 			error = lip->li_ops->iop_recover(lip, parent_tp);
 			spin_lock(&ailp->ail_lock);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fab821fce76b..e6d355a09bb3 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -442,8 +442,6 @@ xfs_cui_item_recover(
 	int				i;
 	int				error = 0;
 
-	ASSERT(!test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags));
-
 	/*
 	 * First check the validity of the extents described by the
 	 * CUI.  If any are bad, then assume that all are bad and
@@ -473,7 +471,6 @@ xfs_cui_item_recover(
 			 * This will pull the CUI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
 			xfs_cui_release(cuip);
 			return -EFSCORRUPTED;
 		}
@@ -557,7 +554,6 @@ xfs_cui_item_recover(
 	}
 
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
-	set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
 	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	return error;
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index c9233a220551..4a5e2b1cf75a 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -482,8 +482,6 @@ xfs_rui_item_recover(
 	int				whichfork;
 	int				error = 0;
 
-	ASSERT(!test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags));
-
 	/*
 	 * First check the validity of the extents described by the
 	 * RUI.  If any are bad, then assume that all are bad and
@@ -517,7 +515,6 @@ xfs_rui_item_recover(
 			 * This will pull the RUI from the AIL and
 			 * free the memory associated with it.
 			 */
-			set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
 			xfs_rui_release(ruip);
 			return -EFSCORRUPTED;
 		}
@@ -575,7 +572,6 @@ xfs_rui_item_recover(
 	}
 
 	xfs_rmap_finish_one_cleanup(tp, rcur, error);
-	set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
 	error = xfs_trans_commit(tp);
 	return error;
 


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (24 preceding siblings ...)
  2020-05-05  1:13 ` [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller Darrick J. Wong
@ 2020-05-05  1:13 ` Darrick J. Wong
  2020-05-06  6:42   ` Chandan Babu R
  2020-05-06 15:35   ` Christoph Hellwig
  2020-05-05  1:13 ` [PATCH 27/28] xfs: remove unnecessary includes from xfs_log_recover.c Darrick J. Wong
  2020-05-05  1:13 ` [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata Darrick J. Wong
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:13 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Move the helpers that handle incore buffer cancellation records to
xfs_buf_item_recover.c since they're not directly related to the main
log recovery machinery.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf_item_recover.c |  104 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_recover.c      |  102 ----------------------------------------
 2 files changed, 104 insertions(+), 102 deletions(-)


diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 4ca6d47d6c95..99ec6ebbc7f4 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -23,6 +23,110 @@
 #include "xfs_dir2.h"
 #include "xfs_quota.h"
 
+/*
+ * This structure is used during recovery to record the buf log items which
+ * have been canceled and should not be replayed.
+ */
+struct xfs_buf_cancel {
+	xfs_daddr_t		bc_blkno;
+	uint			bc_len;
+	int			bc_refcount;
+	struct list_head	bc_list;
+};
+
+static struct xfs_buf_cancel *
+xlog_find_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	struct list_head	*bucket;
+	struct xfs_buf_cancel	*bcp;
+
+	if (!log->l_buf_cancel_table)
+		return NULL;
+
+	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
+	list_for_each_entry(bcp, bucket, bc_list) {
+		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
+			return bcp;
+	}
+
+	return NULL;
+}
+
+bool
+xlog_add_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	struct xfs_buf_cancel	*bcp;
+
+	/*
+	 * If we find an existing cancel record, this indicates that the buffer
+	 * was cancelled multiple times.  To ensure that during pass 2 we keep
+	 * the record in the table until we reach its last occurrence in the
+	 * log, a reference count is kept to tell how many times we expect to
+	 * see this record during the second pass.
+	 */
+	bcp = xlog_find_buffer_cancelled(log, blkno, len);
+	if (bcp) {
+		bcp->bc_refcount++;
+		return false;
+	}
+
+	bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
+	bcp->bc_blkno = blkno;
+	bcp->bc_len = len;
+	bcp->bc_refcount = 1;
+	list_add_tail(&bcp->bc_list, XLOG_BUF_CANCEL_BUCKET(log, blkno));
+	return true;
+}
+
+/*
+ * Check if there is and entry for blkno, len in the buffer cancel record table.
+ */
+bool
+xlog_is_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	return xlog_find_buffer_cancelled(log, blkno, len) != NULL;
+}
+
+/*
+ * Check if there is and entry for blkno, len in the buffer cancel record table,
+ * and decremented the reference count on it if there is one.
+ *
+ * Remove the cancel record once the refcount hits zero, so that if the same
+ * buffer is re-used again after its last cancellation we actually replay the
+ * changes made at that point.
+ */
+bool
+xlog_put_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len)
+{
+	struct xfs_buf_cancel	*bcp;
+
+	bcp = xlog_find_buffer_cancelled(log, blkno, len);
+	if (!bcp) {
+		ASSERT(0);
+		return false;
+	}
+
+	if (--bcp->bc_refcount == 0) {
+		list_del(&bcp->bc_list);
+		kmem_free(bcp);
+	}
+	return true;
+}
+
+/* log buffer item recovery */
+
 STATIC enum xlog_recover_reorder
 xlog_recover_buf_reorder(
 	struct xlog_recover_item	*item)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a49435db3be0..0c8a1f4bf4ad 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -55,17 +55,6 @@ STATIC int
 xlog_do_recovery_pass(
         struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
 
-/*
- * This structure is used during recovery to record the buf log items which
- * have been canceled and should not be replayed.
- */
-struct xfs_buf_cancel {
-	xfs_daddr_t		bc_blkno;
-	uint			bc_len;
-	int			bc_refcount;
-	struct list_head	bc_list;
-};
-
 /*
  * Sector aligned buffer routines for buffer create/read/write/access
  */
@@ -1981,97 +1970,6 @@ xlog_recover_reorder_trans(
 	return error;
 }
 
-static struct xfs_buf_cancel *
-xlog_find_buffer_cancelled(
-	struct xlog		*log,
-	xfs_daddr_t		blkno,
-	uint			len)
-{
-	struct list_head	*bucket;
-	struct xfs_buf_cancel	*bcp;
-
-	if (!log->l_buf_cancel_table)
-		return NULL;
-
-	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
-	list_for_each_entry(bcp, bucket, bc_list) {
-		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
-			return bcp;
-	}
-
-	return NULL;
-}
-
-bool
-xlog_add_buffer_cancelled(
-	struct xlog		*log,
-	xfs_daddr_t		blkno,
-	uint			len)
-{
-	struct xfs_buf_cancel	*bcp;
-
-	/*
-	 * If we find an existing cancel record, this indicates that the buffer
-	 * was cancelled multiple times.  To ensure that during pass 2 we keep
-	 * the record in the table until we reach its last occurrence in the
-	 * log, a reference count is kept to tell how many times we expect to
-	 * see this record during the second pass.
-	 */
-	bcp = xlog_find_buffer_cancelled(log, blkno, len);
-	if (bcp) {
-		bcp->bc_refcount++;
-		return false;
-	}
-
-	bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
-	bcp->bc_blkno = blkno;
-	bcp->bc_len = len;
-	bcp->bc_refcount = 1;
-	list_add_tail(&bcp->bc_list, XLOG_BUF_CANCEL_BUCKET(log, blkno));
-	return true;
-}
-
-/*
- * Check if there is and entry for blkno, len in the buffer cancel record table.
- */
-bool
-xlog_is_buffer_cancelled(
-	struct xlog		*log,
-	xfs_daddr_t		blkno,
-	uint			len)
-{
-	return xlog_find_buffer_cancelled(log, blkno, len) != NULL;
-}
-
-/*
- * Check if there is and entry for blkno, len in the buffer cancel record table,
- * and decremented the reference count on it if there is one.
- *
- * Remove the cancel record once the refcount hits zero, so that if the same
- * buffer is re-used again after its last cancellation we actually replay the
- * changes made at that point.
- */
-bool
-xlog_put_buffer_cancelled(
-	struct xlog		*log,
-	xfs_daddr_t		blkno,
-	uint			len)
-{
-	struct xfs_buf_cancel	*bcp;
-
-	bcp = xlog_find_buffer_cancelled(log, blkno, len);
-	if (!bcp) {
-		ASSERT(0);
-		return false;
-	}
-
-	if (--bcp->bc_refcount == 0) {
-		list_del(&bcp->bc_list);
-		kmem_free(bcp);
-	}
-	return true;
-}
-
 void
 xlog_buf_readahead(
 	struct xlog		*log,


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 27/28] xfs: remove unnecessary includes from xfs_log_recover.c
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (25 preceding siblings ...)
  2020-05-05  1:13 ` [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c Darrick J. Wong
@ 2020-05-05  1:13 ` Darrick J. Wong
  2020-05-06  7:21   ` Chandan Babu R
  2020-05-05  1:13 ` [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata Darrick J. Wong
  27 siblings, 1 reply; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:13 UTC (permalink / raw)
  To: darrick.wong; +Cc: Christoph Hellwig, linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Remove unnecessary includes from the log recovery code.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_recover.c |    8 --------
 1 file changed, 8 deletions(-)


diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0c8a1f4bf4ad..a9cc546535e0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -18,21 +18,13 @@
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-#include "xfs_extfree_item.h"
 #include "xfs_trans_priv.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
-#include "xfs_quota.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_rmap_item.h"
 #include "xfs_buf_item.h"
-#include "xfs_refcount_item.h"
-#include "xfs_bmap_item.h"
 
 #define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
 


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata
  2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
                   ` (26 preceding siblings ...)
  2020-05-05  1:13 ` [PATCH 27/28] xfs: remove unnecessary includes from xfs_log_recover.c Darrick J. Wong
@ 2020-05-05  1:13 ` Darrick J. Wong
  2020-05-06  7:57   ` Chandan Babu R
  2020-05-06 15:36   ` Christoph Hellwig
  27 siblings, 2 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-05  1:13 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Run all the unlinked metadata clearing work in parallel so that we can
take advantage of higher-performance storage devices.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_unlink_recover.c |   42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)


diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
index fe7fa3d623f2..92ea81969e02 100644
--- a/fs/xfs/xfs_unlink_recover.c
+++ b/fs/xfs/xfs_unlink_recover.c
@@ -21,6 +21,7 @@
 #include "xfs_trans_priv.h"
 #include "xfs_ialloc.h"
 #include "xfs_icache.h"
+#include "xfs_pwork.h"
 
 /*
  * This routine performs a transaction to null out a bad inode pointer
@@ -195,19 +196,54 @@ xlog_recover_process_iunlinked(
 	return 0;
 }
 
+struct xlog_recover_unlinked {
+	struct xfs_pwork	pwork;
+	xfs_agnumber_t		agno;
+};
+
+static int
+xlog_recover_process_unlinked_ag(
+	struct xfs_mount		*mp,
+	struct xfs_pwork		*pwork)
+{
+	struct xlog_recover_unlinked	*ru;
+	int				error = 0;
+
+	ru = container_of(pwork, struct xlog_recover_unlinked, pwork);
+	if (xfs_pwork_want_abort(pwork))
+		goto out;
+
+	error = xlog_recover_process_iunlinked(mp, ru->agno);
+out:
+	kmem_free(ru);
+	return error;
+}
+
 int
 xlog_recover_process_unlinked(
 	struct xlog		*log)
 {
 	struct xfs_mount	*mp = log->l_mp;
+	struct xfs_pwork_ctl	pctl;
+	struct xlog_recover_unlinked *ru;
+	unsigned int		nr_threads;
 	xfs_agnumber_t		agno;
 	int			error;
 
+	nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+	error = xfs_pwork_init(mp, &pctl, xlog_recover_process_unlinked_ag,
+			"xlog_recover", nr_threads);
+	if (error)
+		return error;
+
 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
-		error = xlog_recover_process_iunlinked(mp, agno);
-		if (error)
+		if (xfs_pwork_ctl_want_abort(&pctl))
 			break;
+
+		ru = kmem_zalloc(sizeof(struct xlog_recover_unlinked), 0);
+		ru->agno = agno;
+		xfs_pwork_queue(&pctl, &ru->pwork);
 	}
 
-	return error;
+	return xfs_pwork_destroy(&pctl);
 }


^ permalink raw reply related	[flat|nested] 94+ messages in thread

* Re: [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item
  2020-05-05  1:10 ` [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item Darrick J. Wong
@ 2020-05-05  3:33   ` Chandan Babu R
  2020-05-06 14:59   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  3:33 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:40:39 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Remove the old typedefs.
>

Straight forward change.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_log_recover.h |    4 ++--
>  fs/xfs/xfs_log_recover.c        |   26 ++++++++++++++------------
>  2 files changed, 16 insertions(+), 14 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index 3bf671637a91..148e0cb5d379 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -22,13 +22,13 @@
>  /*
>   * item headers are in ri_buf[0].  Additional buffers follow.
>   */
> -typedef struct xlog_recover_item {
> +struct xlog_recover_item {
>  	struct list_head	ri_list;
>  	int			ri_type;
>  	int			ri_cnt;	/* count of regions found */
>  	int			ri_total;	/* total regions */
>  	xfs_log_iovec_t		*ri_buf;	/* ptr to regions buffer */
> -} xlog_recover_item_t;
> +};
>  
>  struct xlog_recover {
>  	struct hlist_node	r_list;
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index d0e2dd81de53..c2c06f70fb8a 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -1841,7 +1841,7 @@ xlog_recover_reorder_trans(
>  	struct xlog_recover	*trans,
>  	int			pass)
>  {
> -	xlog_recover_item_t	*item, *n;
> +	struct xlog_recover_item *item, *n;
>  	int			error = 0;
>  	LIST_HEAD(sort_list);
>  	LIST_HEAD(cancel_list);
> @@ -2056,7 +2056,7 @@ xlog_recover_buffer_pass1(
>  STATIC int
>  xlog_recover_do_inode_buffer(
>  	struct xfs_mount	*mp,
> -	xlog_recover_item_t	*item,
> +	struct xlog_recover_item *item,
>  	struct xfs_buf		*bp,
>  	xfs_buf_log_format_t	*buf_f)
>  {
> @@ -2561,7 +2561,7 @@ xlog_recover_validate_buf_type(
>  STATIC void
>  xlog_recover_do_reg_buffer(
>  	struct xfs_mount	*mp,
> -	xlog_recover_item_t	*item,
> +	struct xlog_recover_item *item,
>  	struct xfs_buf		*bp,
>  	xfs_buf_log_format_t	*buf_f,
>  	xfs_lsn_t		current_lsn)
> @@ -3759,7 +3759,7 @@ STATIC int
>  xlog_recover_do_icreate_pass2(
>  	struct xlog		*log,
>  	struct list_head	*buffer_list,
> -	xlog_recover_item_t	*item)
> +	struct xlog_recover_item *item)
>  {
>  	struct xfs_mount	*mp = log->l_mp;
>  	struct xfs_icreate_log	*icl;
> @@ -4134,9 +4134,9 @@ STATIC void
>  xlog_recover_add_item(
>  	struct list_head	*head)
>  {
> -	xlog_recover_item_t	*item;
> +	struct xlog_recover_item *item;
>  
> -	item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
> +	item = kmem_zalloc(sizeof(struct xlog_recover_item), 0);
>  	INIT_LIST_HEAD(&item->ri_list);
>  	list_add_tail(&item->ri_list, head);
>  }
> @@ -4148,7 +4148,7 @@ xlog_recover_add_to_cont_trans(
>  	char			*dp,
>  	int			len)
>  {
> -	xlog_recover_item_t	*item;
> +	struct xlog_recover_item *item;
>  	char			*ptr, *old_ptr;
>  	int			old_len;
>  
> @@ -4171,7 +4171,8 @@ xlog_recover_add_to_cont_trans(
>  	}
>  
>  	/* take the tail entry */
> -	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
> +	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
> +			  ri_list);
>  
>  	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
>  	old_len = item->ri_buf[item->ri_cnt-1].i_len;
> @@ -4205,7 +4206,7 @@ xlog_recover_add_to_trans(
>  	int			len)
>  {
>  	struct xfs_inode_log_format	*in_f;			/* any will do */
> -	xlog_recover_item_t	*item;
> +	struct xlog_recover_item *item;
>  	char			*ptr;
>  
>  	if (!len)
> @@ -4241,13 +4242,14 @@ xlog_recover_add_to_trans(
>  	in_f = (struct xfs_inode_log_format *)ptr;
>  
>  	/* take the tail entry */
> -	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
> +	item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
> +			  ri_list);
>  	if (item->ri_total != 0 &&
>  	     item->ri_total == item->ri_cnt) {
>  		/* tail item is in use, get a new one */
>  		xlog_recover_add_item(&trans->r_itemq);
>  		item = list_entry(trans->r_itemq.prev,
> -					xlog_recover_item_t, ri_list);
> +					struct xlog_recover_item, ri_list);
>  	}
>  
>  	if (item->ri_total == 0) {		/* first region to be added */
> @@ -4293,7 +4295,7 @@ STATIC void
>  xlog_recover_free_trans(
>  	struct xlog_recover	*trans)
>  {
> -	xlog_recover_item_t	*item, *n;
> +	struct xlog_recover_item *item, *n;
>  	int			i;
>  
>  	hlist_del_init(&trans->r_list);
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure
  2020-05-05  1:10 ` [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure Darrick J. Wong
@ 2020-05-05  4:11   ` Chandan Babu R
  2020-05-06 15:03   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  4:11 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:40:45 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Create a generic dispatch structure to delegate recovery of different
> log item types into various code modules.  This will enable us to move
> code specific to a particular log item type out of xfs_log_recover.c and
> into the log item source.
> 
> The first operation we virtualize is the log item sorting.
>

The sorted list order is the maintained as it was done before.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/Makefile                 |    3 +
>  fs/xfs/libxfs/xfs_log_recover.h |   45 ++++++++++++++++++-
>  fs/xfs/xfs_bmap_item.c          |    9 ++++
>  fs/xfs/xfs_buf_item_recover.c   |   38 ++++++++++++++++
>  fs/xfs/xfs_dquot_item_recover.c |   29 ++++++++++++
>  fs/xfs/xfs_extfree_item.c       |    9 ++++
>  fs/xfs/xfs_icreate_item.c       |   20 ++++++++
>  fs/xfs/xfs_inode_item_recover.c |   26 +++++++++++
>  fs/xfs/xfs_log_recover.c        |   93 +++++++++++++++++++++++----------------
>  fs/xfs/xfs_refcount_item.c      |    9 ++++
>  fs/xfs/xfs_rmap_item.c          |    9 ++++
>  11 files changed, 251 insertions(+), 39 deletions(-)
>  create mode 100644 fs/xfs/xfs_buf_item_recover.c
>  create mode 100644 fs/xfs/xfs_dquot_item_recover.c
>  create mode 100644 fs/xfs/xfs_inode_item_recover.c
> 
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index ff94fb90a2ee..04611a1068b4 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -99,9 +99,12 @@ xfs-y				+= xfs_log.o \
>  				   xfs_log_cil.o \
>  				   xfs_bmap_item.o \
>  				   xfs_buf_item.o \
> +				   xfs_buf_item_recover.o \
> +				   xfs_dquot_item_recover.o \
>  				   xfs_extfree_item.o \
>  				   xfs_icreate_item.o \
>  				   xfs_inode_item.o \
> +				   xfs_inode_item_recover.o \
>  				   xfs_refcount_item.o \
>  				   xfs_rmap_item.o \
>  				   xfs_log_recover.o \
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index 148e0cb5d379..271b0741f1e1 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -6,6 +6,47 @@
>  #ifndef	__XFS_LOG_RECOVER_H__
>  #define __XFS_LOG_RECOVER_H__
>  
> +/*
> + * Each log item type (XFS_LI_*) gets its own xlog_recover_item_ops to
> + * define how recovery should work for that type of log item.
> + */
> +struct xlog_recover_item;
> +
> +/* Sorting hat for log items as they're read in. */
> +enum xlog_recover_reorder {
> +	XLOG_REORDER_BUFFER_LIST,
> +	XLOG_REORDER_ITEM_LIST,
> +	XLOG_REORDER_INODE_BUFFER_LIST,
> +	XLOG_REORDER_CANCEL_LIST,
> +};
> +
> +struct xlog_recover_item_ops {
> +	uint16_t	item_type;	/* XFS_LI_* type code. */
> +
> +	/*
> +	 * Help sort recovered log items into the order required to replay them
> +	 * correctly.  Log item types that always use XLOG_REORDER_ITEM_LIST do
> +	 * not have to supply a function here.  See the comment preceding
> +	 * xlog_recover_reorder_trans for more details about what the return
> +	 * values mean.
> +	 */
> +	enum xlog_recover_reorder (*reorder)(struct xlog_recover_item *item);
> +};
> +
> +extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
> +extern const struct xlog_recover_item_ops xlog_buf_item_ops;
> +extern const struct xlog_recover_item_ops xlog_inode_item_ops;
> +extern const struct xlog_recover_item_ops xlog_dquot_item_ops;
> +extern const struct xlog_recover_item_ops xlog_quotaoff_item_ops;
> +extern const struct xlog_recover_item_ops xlog_bmap_intent_item_ops;
> +extern const struct xlog_recover_item_ops xlog_bmap_done_item_ops;
> +extern const struct xlog_recover_item_ops xlog_extfree_intent_item_ops;
> +extern const struct xlog_recover_item_ops xlog_extfree_done_item_ops;
> +extern const struct xlog_recover_item_ops xlog_rmap_intent_item_ops;
> +extern const struct xlog_recover_item_ops xlog_rmap_done_item_ops;
> +extern const struct xlog_recover_item_ops xlog_refcount_intent_item_ops;
> +extern const struct xlog_recover_item_ops xlog_refcount_done_item_ops;
> +
>  /*
>   * Macros, structures, prototypes for internal log manager use.
>   */
> @@ -24,10 +65,10 @@
>   */
>  struct xlog_recover_item {
>  	struct list_head	ri_list;
> -	int			ri_type;
>  	int			ri_cnt;	/* count of regions found */
>  	int			ri_total;	/* total regions */
> -	xfs_log_iovec_t		*ri_buf;	/* ptr to regions buffer */
> +	struct xfs_log_iovec	*ri_buf;	/* ptr to regions buffer */
> +	const struct xlog_recover_item_ops *ri_ops;
>  };
>  
>  struct xlog_recover {
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 7768fb2b7135..42354403fec7 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -22,6 +22,7 @@
>  #include "xfs_bmap_btree.h"
>  #include "xfs_trans_space.h"
>  #include "xfs_error.h"
> +#include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_bui_zone;
>  kmem_zone_t	*xfs_bud_zone;
> @@ -557,3 +558,11 @@ xfs_bui_recover(
>  	}
>  	return error;
>  }
> +
> +const struct xlog_recover_item_ops xlog_bmap_intent_item_ops = {
> +	.item_type		= XFS_LI_BUI,
> +};
> +
> +const struct xlog_recover_item_ops xlog_bmap_done_item_ops = {
> +	.item_type		= XFS_LI_BUD,
> +};
> diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
> new file mode 100644
> index 000000000000..def19025512e
> --- /dev/null
> +++ b/fs/xfs/xfs_buf_item_recover.c
> @@ -0,0 +1,38 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2000-2006 Silicon Graphics, Inc.
> + * All Rights Reserved.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_bit.h"
> +#include "xfs_mount.h"
> +#include "xfs_trans.h"
> +#include "xfs_buf_item.h"
> +#include "xfs_trans_priv.h"
> +#include "xfs_trace.h"
> +#include "xfs_log.h"
> +#include "xfs_log_priv.h"
> +#include "xfs_log_recover.h"
> +
> +STATIC enum xlog_recover_reorder
> +xlog_recover_buf_reorder(
> +	struct xlog_recover_item	*item)
> +{
> +	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
> +
> +	if (buf_f->blf_flags & XFS_BLF_CANCEL)
> +		return XLOG_REORDER_CANCEL_LIST;
> +	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
> +		return XLOG_REORDER_INODE_BUFFER_LIST;
> +	return XLOG_REORDER_BUFFER_LIST;
> +}
> +
> +const struct xlog_recover_item_ops xlog_buf_item_ops = {
> +	.item_type		= XFS_LI_BUF,
> +	.reorder		= xlog_recover_buf_reorder,
> +};
> diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
> new file mode 100644
> index 000000000000..78fe644e9907
> --- /dev/null
> +++ b/fs/xfs/xfs_dquot_item_recover.c
> @@ -0,0 +1,29 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2000-2006 Silicon Graphics, Inc.
> + * All Rights Reserved.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_inode.h"
> +#include "xfs_quota.h"
> +#include "xfs_trans.h"
> +#include "xfs_buf_item.h"
> +#include "xfs_trans_priv.h"
> +#include "xfs_qm.h"
> +#include "xfs_log.h"
> +#include "xfs_log_priv.h"
> +#include "xfs_log_recover.h"
> +
> +const struct xlog_recover_item_ops xlog_dquot_item_ops = {
> +	.item_type		= XFS_LI_DQUOT,
> +};
> +
> +const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
> +	.item_type		= XFS_LI_QUOTAOFF,
> +};
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index c8cde4122a0f..b43bb087aef3 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -22,6 +22,7 @@
>  #include "xfs_bmap.h"
>  #include "xfs_trace.h"
>  #include "xfs_error.h"
> +#include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_efi_zone;
>  kmem_zone_t	*xfs_efd_zone;
> @@ -644,3 +645,11 @@ xfs_efi_recover(
>  	xfs_trans_cancel(tp);
>  	return error;
>  }
> +
> +const struct xlog_recover_item_ops xlog_extfree_intent_item_ops = {
> +	.item_type		= XFS_LI_EFI,
> +};
> +
> +const struct xlog_recover_item_ops xlog_extfree_done_item_ops = {
> +	.item_type		= XFS_LI_EFD,
> +};
> diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
> index 490fee22b878..366c1e722a29 100644
> --- a/fs/xfs/xfs_icreate_item.c
> +++ b/fs/xfs/xfs_icreate_item.c
> @@ -11,6 +11,8 @@
>  #include "xfs_trans_priv.h"
>  #include "xfs_icreate_item.h"
>  #include "xfs_log.h"
> +#include "xfs_log_priv.h"
> +#include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */
>  
> @@ -107,3 +109,21 @@ xfs_icreate_log(
>  	tp->t_flags |= XFS_TRANS_DIRTY;
>  	set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags);
>  }
> +
> +static enum xlog_recover_reorder
> +xlog_recover_icreate_reorder(
> +		struct xlog_recover_item *item)
> +{
> +	/*
> +	 * Inode allocation buffers must be replayed before subsequent inode
> +	 * items try to modify those buffers.  ICREATE items are the logical
> +	 * equivalent of logging a newly initialized inode buffer, so recover
> +	 * these at the same time that we recover logged buffers.
> +	 */
> +	return XLOG_REORDER_BUFFER_LIST;
> +}
> +
> +const struct xlog_recover_item_ops xlog_icreate_item_ops = {
> +	.item_type		= XFS_LI_ICREATE,
> +	.reorder		= xlog_recover_icreate_reorder,
> +};
> diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
> new file mode 100644
> index 000000000000..b19a151efb10
> --- /dev/null
> +++ b/fs/xfs/xfs_inode_item_recover.c
> @@ -0,0 +1,26 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2000-2006 Silicon Graphics, Inc.
> + * All Rights Reserved.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_inode.h"
> +#include "xfs_trans.h"
> +#include "xfs_inode_item.h"
> +#include "xfs_trace.h"
> +#include "xfs_trans_priv.h"
> +#include "xfs_buf_item.h"
> +#include "xfs_log.h"
> +#include "xfs_error.h"
> +#include "xfs_log_priv.h"
> +#include "xfs_log_recover.h"
> +
> +const struct xlog_recover_item_ops xlog_inode_item_ops = {
> +	.item_type		= XFS_LI_INODE,
> +};
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index c2c06f70fb8a..0ef0d81fd190 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -1785,6 +1785,34 @@ xlog_clear_stale_blocks(
>   *
>   ******************************************************************************
>   */
> +static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
> +	&xlog_buf_item_ops,
> +	&xlog_inode_item_ops,
> +	&xlog_dquot_item_ops,
> +	&xlog_quotaoff_item_ops,
> +	&xlog_icreate_item_ops,
> +	&xlog_extfree_intent_item_ops,
> +	&xlog_extfree_done_item_ops,
> +	&xlog_rmap_intent_item_ops,
> +	&xlog_rmap_done_item_ops,
> +	&xlog_refcount_intent_item_ops,
> +	&xlog_refcount_done_item_ops,
> +	&xlog_bmap_intent_item_ops,
> +	&xlog_bmap_done_item_ops,
> +};
> +
> +static const struct xlog_recover_item_ops *
> +xlog_find_item_ops(
> +	struct xlog_recover_item		*item)
> +{
> +	unsigned int				i;
> +
> +	for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++)
> +		if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type)
> +			return xlog_recover_item_ops[i];
> +
> +	return NULL;
> +}
>  
>  /*
>   * Sort the log items in the transaction.
> @@ -1851,41 +1879,10 @@ xlog_recover_reorder_trans(
>  
>  	list_splice_init(&trans->r_itemq, &sort_list);
>  	list_for_each_entry_safe(item, n, &sort_list, ri_list) {
> -		xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
> +		enum xlog_recover_reorder	fate = XLOG_REORDER_ITEM_LIST;
>  
> -		switch (ITEM_TYPE(item)) {
> -		case XFS_LI_ICREATE:
> -			list_move_tail(&item->ri_list, &buffer_list);
> -			break;
> -		case XFS_LI_BUF:
> -			if (buf_f->blf_flags & XFS_BLF_CANCEL) {
> -				trace_xfs_log_recover_item_reorder_head(log,
> -							trans, item, pass);
> -				list_move(&item->ri_list, &cancel_list);
> -				break;
> -			}
> -			if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
> -				list_move(&item->ri_list, &inode_buffer_list);
> -				break;
> -			}
> -			list_move_tail(&item->ri_list, &buffer_list);
> -			break;
> -		case XFS_LI_INODE:
> -		case XFS_LI_DQUOT:
> -		case XFS_LI_QUOTAOFF:
> -		case XFS_LI_EFD:
> -		case XFS_LI_EFI:
> -		case XFS_LI_RUI:
> -		case XFS_LI_RUD:
> -		case XFS_LI_CUI:
> -		case XFS_LI_CUD:
> -		case XFS_LI_BUI:
> -		case XFS_LI_BUD:
> -			trace_xfs_log_recover_item_reorder_tail(log,
> -							trans, item, pass);
> -			list_move_tail(&item->ri_list, &item_list);
> -			break;
> -		default:
> +		item->ri_ops = xlog_find_item_ops(item);
> +		if (!item->ri_ops) {
>  			xfs_warn(log->l_mp,
>  				"%s: unrecognized type of log operation (%d)",
>  				__func__, ITEM_TYPE(item));
> @@ -1896,11 +1893,33 @@ xlog_recover_reorder_trans(
>  			 */
>  			if (!list_empty(&sort_list))
>  				list_splice_init(&sort_list, &trans->r_itemq);
> -			error = -EIO;
> -			goto out;
> +			error = -EFSCORRUPTED;
> +			break;
> +		}
> +
> +		if (item->ri_ops->reorder)
> +			fate = item->ri_ops->reorder(item);
> +
> +		switch (fate) {
> +		case XLOG_REORDER_BUFFER_LIST:
> +			list_move_tail(&item->ri_list, &buffer_list);
> +			break;
> +		case XLOG_REORDER_CANCEL_LIST:
> +			trace_xfs_log_recover_item_reorder_head(log,
> +					trans, item, pass);
> +			list_move(&item->ri_list, &cancel_list);
> +			break;
> +		case XLOG_REORDER_INODE_BUFFER_LIST:
> +			list_move(&item->ri_list, &inode_buffer_list);
> +			break;
> +		case XLOG_REORDER_ITEM_LIST:
> +			trace_xfs_log_recover_item_reorder_tail(log,
> +							trans, item, pass);
> +			list_move_tail(&item->ri_list, &item_list);
> +			break;
>  		}
>  	}
> -out:
> +
>  	ASSERT(list_empty(&sort_list));
>  	if (!list_empty(&buffer_list))
>  		list_splice(&buffer_list, &trans->r_itemq);
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index 0316eab2fc35..0e8e8bab4344 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -18,6 +18,7 @@
>  #include "xfs_log.h"
>  #include "xfs_refcount.h"
>  #include "xfs_error.h"
> +#include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_cui_zone;
>  kmem_zone_t	*xfs_cud_zone;
> @@ -570,3 +571,11 @@ xfs_cui_recover(
>  	xfs_trans_cancel(tp);
>  	return error;
>  }
> +
> +const struct xlog_recover_item_ops xlog_refcount_intent_item_ops = {
> +	.item_type		= XFS_LI_CUI,
> +};
> +
> +const struct xlog_recover_item_ops xlog_refcount_done_item_ops = {
> +	.item_type		= XFS_LI_CUD,
> +};
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index e3bba2aec868..3eb538674cb9 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -18,6 +18,7 @@
>  #include "xfs_log.h"
>  #include "xfs_rmap.h"
>  #include "xfs_error.h"
> +#include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_rui_zone;
>  kmem_zone_t	*xfs_rud_zone;
> @@ -585,3 +586,11 @@ xfs_rui_recover(
>  	xfs_trans_cancel(tp);
>  	return error;
>  }
> +
> +const struct xlog_recover_item_ops xlog_rmap_intent_item_ops = {
> +	.item_type		= XFS_LI_RUI,
> +};
> +
> +const struct xlog_recover_item_ops xlog_rmap_done_item_ops = {
> +	.item_type		= XFS_LI_RUD,
> +};
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions
  2020-05-05  1:10 ` [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions Darrick J. Wong
@ 2020-05-05  4:32   ` Chandan Babu R
  2020-05-06 15:04   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  4:32 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:40:51 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the pass2 readhead code into the per-item source code files and use
> the dispatch function to call them.
>

Buf, Inode and Dquot items were read ahead before the patch modifications.
These are the only items on which readahead is started in this patch.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_log_recover.h |    6 ++
>  fs/xfs/xfs_buf_item_recover.c   |   11 +++++
>  fs/xfs/xfs_dquot_item_recover.c |   34 ++++++++++++++
>  fs/xfs/xfs_inode_item_recover.c |   19 ++++++++
>  fs/xfs/xfs_log_recover.c        |   95 +--------------------------------------
>  5 files changed, 73 insertions(+), 92 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index 271b0741f1e1..ff80871138bb 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -31,6 +31,9 @@ struct xlog_recover_item_ops {
>  	 * values mean.
>  	 */
>  	enum xlog_recover_reorder (*reorder)(struct xlog_recover_item *item);
> +
> +	/* Start readahead for pass2, if provided. */
> +	void (*ra_pass2)(struct xlog *log, struct xlog_recover_item *item);
>  };
>  
>  extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
> @@ -92,4 +95,7 @@ struct xlog_recover {
>  #define	XLOG_RECOVER_PASS1	1
>  #define	XLOG_RECOVER_PASS2	2
>  
> +void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
> +		const struct xfs_buf_ops *ops);
> +
>  #endif	/* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
> index def19025512e..a1327196b690 100644
> --- a/fs/xfs/xfs_buf_item_recover.c
> +++ b/fs/xfs/xfs_buf_item_recover.c
> @@ -32,7 +32,18 @@ xlog_recover_buf_reorder(
>  	return XLOG_REORDER_BUFFER_LIST;
>  }
>  
> +STATIC void
> +xlog_recover_buf_ra_pass2(
> +	struct xlog                     *log,
> +	struct xlog_recover_item        *item)
> +{
> +	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
> +
> +	xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
> +}
> +
>  const struct xlog_recover_item_ops xlog_buf_item_ops = {
>  	.item_type		= XFS_LI_BUF,
>  	.reorder		= xlog_recover_buf_reorder,
> +	.ra_pass2		= xlog_recover_buf_ra_pass2,
>  };
> diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
> index 78fe644e9907..215274173b70 100644
> --- a/fs/xfs/xfs_dquot_item_recover.c
> +++ b/fs/xfs/xfs_dquot_item_recover.c
> @@ -20,8 +20,42 @@
>  #include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
>  
> +STATIC void
> +xlog_recover_dquot_ra_pass2(
> +	struct xlog			*log,
> +	struct xlog_recover_item	*item)
> +{
> +	struct xfs_mount	*mp = log->l_mp;
> +	struct xfs_disk_dquot	*recddq;
> +	struct xfs_dq_logformat	*dq_f;
> +	uint			type;
> +
> +	if (mp->m_qflags == 0)
> +		return;
> +
> +	recddq = item->ri_buf[1].i_addr;
> +	if (recddq == NULL)
> +		return;
> +	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
> +		return;
> +
> +	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
> +	ASSERT(type);
> +	if (log->l_quotaoffs_flag & type)
> +		return;
> +
> +	dq_f = item->ri_buf[0].i_addr;
> +	ASSERT(dq_f);
> +	ASSERT(dq_f->qlf_len == 1);
> +
> +	xlog_buf_readahead(log, dq_f->qlf_blkno,
> +			XFS_FSB_TO_BB(mp, dq_f->qlf_len),
> +			&xfs_dquot_buf_ra_ops);
> +}
> +
>  const struct xlog_recover_item_ops xlog_dquot_item_ops = {
>  	.item_type		= XFS_LI_DQUOT,
> +	.ra_pass2		= xlog_recover_dquot_ra_pass2,
>  };
>  
>  const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
> diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
> index b19a151efb10..a132cacd8d48 100644
> --- a/fs/xfs/xfs_inode_item_recover.c
> +++ b/fs/xfs/xfs_inode_item_recover.c
> @@ -21,6 +21,25 @@
>  #include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
>  
> +STATIC void
> +xlog_recover_inode_ra_pass2(
> +	struct xlog                     *log,
> +	struct xlog_recover_item        *item)
> +{
> +	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
> +		struct xfs_inode_log_format	*ilfp = item->ri_buf[0].i_addr;
> +
> +		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
> +				   &xfs_inode_buf_ra_ops);
> +	} else {
> +		struct xfs_inode_log_format_32	*ilfp = item->ri_buf[0].i_addr;
> +
> +		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
> +				   &xfs_inode_buf_ra_ops);
> +	}
> +}
> +
>  const struct xlog_recover_item_ops xlog_inode_item_ops = {
>  	.item_type		= XFS_LI_INODE,
> +	.ra_pass2		= xlog_recover_inode_ra_pass2,
>  };
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 0ef0d81fd190..ea566747d8e1 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2023,7 +2023,7 @@ xlog_put_buffer_cancelled(
>  	return true;
>  }
>  
> -static void
> +void
>  xlog_buf_readahead(
>  	struct xlog		*log,
>  	xfs_daddr_t		blkno,
> @@ -3890,96 +3890,6 @@ xlog_recover_do_icreate_pass2(
>  				     length, be32_to_cpu(icl->icl_gen));
>  }
>  
> -STATIC void
> -xlog_recover_buffer_ra_pass2(
> -	struct xlog                     *log,
> -	struct xlog_recover_item        *item)
> -{
> -	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
> -
> -	xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
> -}
> -
> -STATIC void
> -xlog_recover_inode_ra_pass2(
> -	struct xlog                     *log,
> -	struct xlog_recover_item        *item)
> -{
> -	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
> -		struct xfs_inode_log_format	*ilfp = item->ri_buf[0].i_addr;
> -
> -		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
> -				   &xfs_inode_buf_ra_ops);
> -	} else {
> -		struct xfs_inode_log_format_32	*ilfp = item->ri_buf[0].i_addr;
> -
> -		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
> -				   &xfs_inode_buf_ra_ops);
> -	}
> -}
> -
> -STATIC void
> -xlog_recover_dquot_ra_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	struct xfs_mount	*mp = log->l_mp;
> -	struct xfs_disk_dquot	*recddq;
> -	struct xfs_dq_logformat	*dq_f;
> -	uint			type;
> -
> -	if (mp->m_qflags == 0)
> -		return;
> -
> -	recddq = item->ri_buf[1].i_addr;
> -	if (recddq == NULL)
> -		return;
> -	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
> -		return;
> -
> -	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
> -	ASSERT(type);
> -	if (log->l_quotaoffs_flag & type)
> -		return;
> -
> -	dq_f = item->ri_buf[0].i_addr;
> -	ASSERT(dq_f);
> -	ASSERT(dq_f->qlf_len == 1);
> -
> -	xlog_buf_readahead(log, dq_f->qlf_blkno,
> -			XFS_FSB_TO_BB(mp, dq_f->qlf_len),
> -			&xfs_dquot_buf_ra_ops);
> -}
> -
> -STATIC void
> -xlog_recover_ra_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_BUF:
> -		xlog_recover_buffer_ra_pass2(log, item);
> -		break;
> -	case XFS_LI_INODE:
> -		xlog_recover_inode_ra_pass2(log, item);
> -		break;
> -	case XFS_LI_DQUOT:
> -		xlog_recover_dquot_ra_pass2(log, item);
> -		break;
> -	case XFS_LI_EFI:
> -	case XFS_LI_EFD:
> -	case XFS_LI_QUOTAOFF:
> -	case XFS_LI_RUI:
> -	case XFS_LI_RUD:
> -	case XFS_LI_CUI:
> -	case XFS_LI_CUD:
> -	case XFS_LI_BUI:
> -	case XFS_LI_BUD:
> -	default:
> -		break;
> -	}
> -}
> -
>  STATIC int
>  xlog_recover_commit_pass1(
>  	struct xlog			*log,
> @@ -4116,7 +4026,8 @@ xlog_recover_commit_trans(
>  			error = xlog_recover_commit_pass1(log, trans, item);
>  			break;
>  		case XLOG_RECOVER_PASS2:
> -			xlog_recover_ra_pass2(log, item);
> +			if (item->ri_ops->ra_pass2)
> +				item->ri_ops->ra_pass2(log, item);
>  			list_move_tail(&item->ri_list, &ra_list);
>  			items_queued++;
>  			if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions
  2020-05-05  1:10 ` [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions Darrick J. Wong
@ 2020-05-05  4:40   ` Chandan Babu R
  2020-05-06 15:07   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  4:40 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:40:57 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the pass1 commit code into the per-item source code files and use
> the dispatch function to call them.
>

Buf and Quotaoff items need to be processed during pass1's commit phase. This
is correctly done in this patch.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_log_recover.h |    4 ++
>  fs/xfs/xfs_buf_item_recover.c   |   27 ++++++++++
>  fs/xfs/xfs_dquot_item_recover.c |   28 +++++++++++
>  fs/xfs/xfs_log_recover.c        |  101 +++++----------------------------------
>  4 files changed, 71 insertions(+), 89 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index ff80871138bb..384b70d58993 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -34,6 +34,9 @@ struct xlog_recover_item_ops {
>  
>  	/* Start readahead for pass2, if provided. */
>  	void (*ra_pass2)(struct xlog *log, struct xlog_recover_item *item);
> +
> +	/* Do whatever work we need to do for pass1, if provided. */
> +	int (*commit_pass1)(struct xlog *log, struct xlog_recover_item *item);
>  };
>  
>  extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
> @@ -97,5 +100,6 @@ struct xlog_recover {
>  
>  void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
>  		const struct xfs_buf_ops *ops);
> +bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  
>  #endif	/* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
> index a1327196b690..802f2206516d 100644
> --- a/fs/xfs/xfs_buf_item_recover.c
> +++ b/fs/xfs/xfs_buf_item_recover.c
> @@ -42,8 +42,35 @@ xlog_recover_buf_ra_pass2(
>  	xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
>  }
>  
> +/*
> + * Build up the table of buf cancel records so that we don't replay cancelled
> + * data in the second pass.
> + */
> +static int
> +xlog_recover_buf_commit_pass1(
> +	struct xlog			*log,
> +	struct xlog_recover_item	*item)
> +{
> +	struct xfs_buf_log_format	*bf = item->ri_buf[0].i_addr;
> +
> +	if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
> +		xfs_err(log->l_mp, "bad buffer log item size (%d)",
> +				item->ri_buf[0].i_len);
> +		return -EFSCORRUPTED;
> +	}
> +
> +	if (!(bf->blf_flags & XFS_BLF_CANCEL))
> +		trace_xfs_log_recover_buf_not_cancel(log, bf);
> +	else if (xlog_add_buffer_cancelled(log, bf->blf_blkno, bf->blf_len))
> +		trace_xfs_log_recover_buf_cancel_add(log, bf);
> +	else
> +		trace_xfs_log_recover_buf_cancel_ref_inc(log, bf);
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_buf_item_ops = {
>  	.item_type		= XFS_LI_BUF,
>  	.reorder		= xlog_recover_buf_reorder,
>  	.ra_pass2		= xlog_recover_buf_ra_pass2,
> +	.commit_pass1		= xlog_recover_buf_commit_pass1,
>  };
> diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
> index 215274173b70..ebc44c1bc2b1 100644
> --- a/fs/xfs/xfs_dquot_item_recover.c
> +++ b/fs/xfs/xfs_dquot_item_recover.c
> @@ -58,6 +58,34 @@ const struct xlog_recover_item_ops xlog_dquot_item_ops = {
>  	.ra_pass2		= xlog_recover_dquot_ra_pass2,
>  };
>  
> +/*
> + * Recover QUOTAOFF records. We simply make a note of it in the xlog
> + * structure, so that we know not to do any dquot item or dquot buffer recovery,
> + * of that type.
> + */
> +STATIC int
> +xlog_recover_quotaoff_commit_pass1(
> +	struct xlog			*log,
> +	struct xlog_recover_item	*item)
> +{
> +	struct xfs_qoff_logformat	*qoff_f = item->ri_buf[0].i_addr;
> +	ASSERT(qoff_f);
> +
> +	/*
> +	 * The logitem format's flag tells us if this was user quotaoff,
> +	 * group/project quotaoff or both.
> +	 */
> +	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
> +		log->l_quotaoffs_flag |= XFS_DQ_USER;
> +	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
> +		log->l_quotaoffs_flag |= XFS_DQ_PROJ;
> +	if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
> +		log->l_quotaoffs_flag |= XFS_DQ_GROUP;
> +
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
>  	.item_type		= XFS_LI_QUOTAOFF,
> +	.commit_pass1		= xlog_recover_quotaoff_commit_pass1,
>  };
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index ea566747d8e1..b3627ebf870e 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -1953,7 +1953,7 @@ xlog_find_buffer_cancelled(
>  	return NULL;
>  }
>  
> -static bool
> +bool
>  xlog_add_buffer_cancelled(
>  	struct xlog		*log,
>  	xfs_daddr_t		blkno,
> @@ -2034,32 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * Build up the table of buf cancel records so that we don't replay cancelled
> - * data in the second pass.
> - */
> -static int
> -xlog_recover_buffer_pass1(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	struct xfs_buf_log_format	*bf = item->ri_buf[0].i_addr;
> -
> -	if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
> -		xfs_err(log->l_mp, "bad buffer log item size (%d)",
> -				item->ri_buf[0].i_len);
> -		return -EFSCORRUPTED;
> -	}
> -
> -	if (!(bf->blf_flags & XFS_BLF_CANCEL))
> -		trace_xfs_log_recover_buf_not_cancel(log, bf);
> -	else if (xlog_add_buffer_cancelled(log, bf->blf_blkno, bf->blf_len))
> -		trace_xfs_log_recover_buf_cancel_add(log, bf);
> -	else
> -		trace_xfs_log_recover_buf_cancel_ref_inc(log, bf);
> -	return 0;
> -}
> -
>  /*
>   * Perform recovery for a buffer full of inodes.  In these buffers, the only
>   * data which should be recovered is that which corresponds to the
> @@ -3197,33 +3171,6 @@ xlog_recover_inode_pass2(
>  	return error;
>  }
>  
> -/*
> - * Recover QUOTAOFF records. We simply make a note of it in the xlog
> - * structure, so that we know not to do any dquot item or dquot buffer recovery,
> - * of that type.
> - */
> -STATIC int
> -xlog_recover_quotaoff_pass1(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	xfs_qoff_logformat_t	*qoff_f = item->ri_buf[0].i_addr;
> -	ASSERT(qoff_f);
> -
> -	/*
> -	 * The logitem format's flag tells us if this was user quotaoff,
> -	 * group/project quotaoff or both.
> -	 */
> -	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
> -		log->l_quotaoffs_flag |= XFS_DQ_USER;
> -	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
> -		log->l_quotaoffs_flag |= XFS_DQ_PROJ;
> -	if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
> -		log->l_quotaoffs_flag |= XFS_DQ_GROUP;
> -
> -	return 0;
> -}
> -
>  /*
>   * Recover a dquot record
>   */
> @@ -3890,40 +3837,6 @@ xlog_recover_do_icreate_pass2(
>  				     length, be32_to_cpu(icl->icl_gen));
>  }
>  
> -STATIC int
> -xlog_recover_commit_pass1(
> -	struct xlog			*log,
> -	struct xlog_recover		*trans,
> -	struct xlog_recover_item	*item)
> -{
> -	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
> -
> -	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_BUF:
> -		return xlog_recover_buffer_pass1(log, item);
> -	case XFS_LI_QUOTAOFF:
> -		return xlog_recover_quotaoff_pass1(log, item);
> -	case XFS_LI_INODE:
> -	case XFS_LI_EFI:
> -	case XFS_LI_EFD:
> -	case XFS_LI_DQUOT:
> -	case XFS_LI_ICREATE:
> -	case XFS_LI_RUI:
> -	case XFS_LI_RUD:
> -	case XFS_LI_CUI:
> -	case XFS_LI_CUD:
> -	case XFS_LI_BUI:
> -	case XFS_LI_BUD:
> -		/* nothing to do in pass 1 */
> -		return 0;
> -	default:
> -		xfs_warn(log->l_mp, "%s: invalid item type (%d)",
> -			__func__, ITEM_TYPE(item));
> -		ASSERT(0);
> -		return -EFSCORRUPTED;
> -	}
> -}
> -
>  STATIC int
>  xlog_recover_commit_pass2(
>  	struct xlog			*log,
> @@ -4021,9 +3934,19 @@ xlog_recover_commit_trans(
>  		return error;
>  
>  	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
> +		trace_xfs_log_recover_item_recover(log, trans, item, pass);
> +
> +		if (!item->ri_ops) {
> +			xfs_warn(log->l_mp, "%s: invalid item type (%d)",
> +				__func__, ITEM_TYPE(item));
> +			ASSERT(0);
> +			return -EFSCORRUPTED;
> +		}
> +
>  		switch (pass) {
>  		case XLOG_RECOVER_PASS1:
> -			error = xlog_recover_commit_pass1(log, trans, item);
> +			if (item->ri_ops->commit_pass1)
> +				error = item->ri_ops->commit_pass1(log, item);
>  			break;
>  		case XLOG_RECOVER_PASS2:
>  			if (item->ri_ops->ra_pass2)
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 " Darrick J. Wong
@ 2020-05-05  5:03   ` Chandan Babu R
  2020-05-06 15:09   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  5:03 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:03 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the log buffer item pass2 commit code into the per-item source code
> files and use the dispatch function to call it.  We do these one at a
> time because there's a lot of code to move.  No functional changes.
>

Buffer item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_log_recover.h |   23 +
>  fs/xfs/xfs_buf_item_recover.c   |  790 +++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_recover.c        |  798 ---------------------------------------
>  3 files changed, 820 insertions(+), 791 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index 384b70d58993..a45f6e9fa47b 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -37,6 +37,26 @@ struct xlog_recover_item_ops {
>  
>  	/* Do whatever work we need to do for pass1, if provided. */
>  	int (*commit_pass1)(struct xlog *log, struct xlog_recover_item *item);
> +
> +	/*
> +	 * This function should do whatever work is needed for pass2 of log
> +	 * recovery, if provided.
> +	 *
> +	 * If the recovered item is an intent item, this function should parse
> +	 * the recovered item to construct an in-core log intent item and
> +	 * insert it into the AIL.  The in-core log intent item should have 1
> +	 * refcount so that the item is freed either (a) when we commit the
> +	 * recovered log item for the intent-done item; (b) replay the work and
> +	 * log a new intent-done item; or (c) recovery fails and we have to
> +	 * abort.
> +	 *
> +	 * If the recovered item is an intent-done item, this function should
> +	 * parse the recovered item to find the id of the corresponding intent
> +	 * log item.  Next, it should find the in-core log intent item in the
> +	 * AIL and release it.
> +	 */
> +	int (*commit_pass2)(struct xlog *log, struct list_head *buffer_list,
> +			    struct xlog_recover_item *item, xfs_lsn_t lsn);
>  };
>  
>  extern const struct xlog_recover_item_ops xlog_icreate_item_ops;
> @@ -101,5 +121,8 @@ struct xlog_recover {
>  void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
>  		const struct xfs_buf_ops *ops);
>  bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
> +bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
> +bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
> +void xlog_recover_iodone(struct xfs_buf *bp);
>  
>  #endif	/* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
> index 802f2206516d..4ca6d47d6c95 100644
> --- a/fs/xfs/xfs_buf_item_recover.c
> +++ b/fs/xfs/xfs_buf_item_recover.c
> @@ -18,6 +18,10 @@
>  #include "xfs_log.h"
>  #include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
> +#include "xfs_error.h"
> +#include "xfs_inode.h"
> +#include "xfs_dir2.h"
> +#include "xfs_quota.h"
>  
>  STATIC enum xlog_recover_reorder
>  xlog_recover_buf_reorder(
> @@ -68,9 +72,795 @@ xlog_recover_buf_commit_pass1(
>  	return 0;
>  }
>  
> +/*
> + * Validate the recovered buffer is of the correct type and attach the
> + * appropriate buffer operations to them for writeback. Magic numbers are in a
> + * few places:
> + *	the first 16 bits of the buffer (inode buffer, dquot buffer),
> + *	the first 32 bits of the buffer (most blocks),
> + *	inside a struct xfs_da_blkinfo at the start of the buffer.
> + */
> +static void
> +xlog_recover_validate_buf_type(
> +	struct xfs_mount		*mp,
> +	struct xfs_buf			*bp,
> +	struct xfs_buf_log_format	*buf_f,
> +	xfs_lsn_t			current_lsn)
> +{
> +	struct xfs_da_blkinfo		*info = bp->b_addr;
> +	uint32_t			magic32;
> +	uint16_t			magic16;
> +	uint16_t			magicda;
> +	char				*warnmsg = NULL;
> +
> +	/*
> +	 * We can only do post recovery validation on items on CRC enabled
> +	 * fielsystems as we need to know when the buffer was written to be able
> +	 * to determine if we should have replayed the item. If we replay old
> +	 * metadata over a newer buffer, then it will enter a temporarily
> +	 * inconsistent state resulting in verification failures. Hence for now
> +	 * just avoid the verification stage for non-crc filesystems
> +	 */
> +	if (!xfs_sb_version_hascrc(&mp->m_sb))
> +		return;
> +
> +	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
> +	magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
> +	magicda = be16_to_cpu(info->magic);
> +	switch (xfs_blft_from_flags(buf_f)) {
> +	case XFS_BLFT_BTREE_BUF:
> +		switch (magic32) {
> +		case XFS_ABTB_CRC_MAGIC:
> +		case XFS_ABTB_MAGIC:
> +			bp->b_ops = &xfs_bnobt_buf_ops;
> +			break;
> +		case XFS_ABTC_CRC_MAGIC:
> +		case XFS_ABTC_MAGIC:
> +			bp->b_ops = &xfs_cntbt_buf_ops;
> +			break;
> +		case XFS_IBT_CRC_MAGIC:
> +		case XFS_IBT_MAGIC:
> +			bp->b_ops = &xfs_inobt_buf_ops;
> +			break;
> +		case XFS_FIBT_CRC_MAGIC:
> +		case XFS_FIBT_MAGIC:
> +			bp->b_ops = &xfs_finobt_buf_ops;
> +			break;
> +		case XFS_BMAP_CRC_MAGIC:
> +		case XFS_BMAP_MAGIC:
> +			bp->b_ops = &xfs_bmbt_buf_ops;
> +			break;
> +		case XFS_RMAP_CRC_MAGIC:
> +			bp->b_ops = &xfs_rmapbt_buf_ops;
> +			break;
> +		case XFS_REFC_CRC_MAGIC:
> +			bp->b_ops = &xfs_refcountbt_buf_ops;
> +			break;
> +		default:
> +			warnmsg = "Bad btree block magic!";
> +			break;
> +		}
> +		break;
> +	case XFS_BLFT_AGF_BUF:
> +		if (magic32 != XFS_AGF_MAGIC) {
> +			warnmsg = "Bad AGF block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_agf_buf_ops;
> +		break;
> +	case XFS_BLFT_AGFL_BUF:
> +		if (magic32 != XFS_AGFL_MAGIC) {
> +			warnmsg = "Bad AGFL block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_agfl_buf_ops;
> +		break;
> +	case XFS_BLFT_AGI_BUF:
> +		if (magic32 != XFS_AGI_MAGIC) {
> +			warnmsg = "Bad AGI block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_agi_buf_ops;
> +		break;
> +	case XFS_BLFT_UDQUOT_BUF:
> +	case XFS_BLFT_PDQUOT_BUF:
> +	case XFS_BLFT_GDQUOT_BUF:
> +#ifdef CONFIG_XFS_QUOTA
> +		if (magic16 != XFS_DQUOT_MAGIC) {
> +			warnmsg = "Bad DQUOT block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_dquot_buf_ops;
> +#else
> +		xfs_alert(mp,
> +	"Trying to recover dquots without QUOTA support built in!");
> +		ASSERT(0);
> +#endif
> +		break;
> +	case XFS_BLFT_DINO_BUF:
> +		if (magic16 != XFS_DINODE_MAGIC) {
> +			warnmsg = "Bad INODE block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_inode_buf_ops;
> +		break;
> +	case XFS_BLFT_SYMLINK_BUF:
> +		if (magic32 != XFS_SYMLINK_MAGIC) {
> +			warnmsg = "Bad symlink block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_symlink_buf_ops;
> +		break;
> +	case XFS_BLFT_DIR_BLOCK_BUF:
> +		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
> +		    magic32 != XFS_DIR3_BLOCK_MAGIC) {
> +			warnmsg = "Bad dir block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_dir3_block_buf_ops;
> +		break;
> +	case XFS_BLFT_DIR_DATA_BUF:
> +		if (magic32 != XFS_DIR2_DATA_MAGIC &&
> +		    magic32 != XFS_DIR3_DATA_MAGIC) {
> +			warnmsg = "Bad dir data magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_dir3_data_buf_ops;
> +		break;
> +	case XFS_BLFT_DIR_FREE_BUF:
> +		if (magic32 != XFS_DIR2_FREE_MAGIC &&
> +		    magic32 != XFS_DIR3_FREE_MAGIC) {
> +			warnmsg = "Bad dir3 free magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_dir3_free_buf_ops;
> +		break;
> +	case XFS_BLFT_DIR_LEAF1_BUF:
> +		if (magicda != XFS_DIR2_LEAF1_MAGIC &&
> +		    magicda != XFS_DIR3_LEAF1_MAGIC) {
> +			warnmsg = "Bad dir leaf1 magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
> +		break;
> +	case XFS_BLFT_DIR_LEAFN_BUF:
> +		if (magicda != XFS_DIR2_LEAFN_MAGIC &&
> +		    magicda != XFS_DIR3_LEAFN_MAGIC) {
> +			warnmsg = "Bad dir leafn magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_dir3_leafn_buf_ops;
> +		break;
> +	case XFS_BLFT_DA_NODE_BUF:
> +		if (magicda != XFS_DA_NODE_MAGIC &&
> +		    magicda != XFS_DA3_NODE_MAGIC) {
> +			warnmsg = "Bad da node magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_da3_node_buf_ops;
> +		break;
> +	case XFS_BLFT_ATTR_LEAF_BUF:
> +		if (magicda != XFS_ATTR_LEAF_MAGIC &&
> +		    magicda != XFS_ATTR3_LEAF_MAGIC) {
> +			warnmsg = "Bad attr leaf magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_attr3_leaf_buf_ops;
> +		break;
> +	case XFS_BLFT_ATTR_RMT_BUF:
> +		if (magic32 != XFS_ATTR3_RMT_MAGIC) {
> +			warnmsg = "Bad attr remote magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_attr3_rmt_buf_ops;
> +		break;
> +	case XFS_BLFT_SB_BUF:
> +		if (magic32 != XFS_SB_MAGIC) {
> +			warnmsg = "Bad SB block magic!";
> +			break;
> +		}
> +		bp->b_ops = &xfs_sb_buf_ops;
> +		break;
> +#ifdef CONFIG_XFS_RT
> +	case XFS_BLFT_RTBITMAP_BUF:
> +	case XFS_BLFT_RTSUMMARY_BUF:
> +		/* no magic numbers for verification of RT buffers */
> +		bp->b_ops = &xfs_rtbuf_ops;
> +		break;
> +#endif /* CONFIG_XFS_RT */
> +	default:
> +		xfs_warn(mp, "Unknown buffer type %d!",
> +			 xfs_blft_from_flags(buf_f));
> +		break;
> +	}
> +
> +	/*
> +	 * Nothing else to do in the case of a NULL current LSN as this means
> +	 * the buffer is more recent than the change in the log and will be
> +	 * skipped.
> +	 */
> +	if (current_lsn == NULLCOMMITLSN)
> +		return;
> +
> +	if (warnmsg) {
> +		xfs_warn(mp, warnmsg);
> +		ASSERT(0);
> +	}
> +
> +	/*
> +	 * We must update the metadata LSN of the buffer as it is written out to
> +	 * ensure that older transactions never replay over this one and corrupt
> +	 * the buffer. This can occur if log recovery is interrupted at some
> +	 * point after the current transaction completes, at which point a
> +	 * subsequent mount starts recovery from the beginning.
> +	 *
> +	 * Write verifiers update the metadata LSN from log items attached to
> +	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
> +	 * the verifier. We'll clean it up in our ->iodone() callback.
> +	 */
> +	if (bp->b_ops) {
> +		struct xfs_buf_log_item	*bip;
> +
> +		ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
> +		bp->b_iodone = xlog_recover_iodone;
> +		xfs_buf_item_init(bp, mp);
> +		bip = bp->b_log_item;
> +		bip->bli_item.li_lsn = current_lsn;
> +	}
> +}
> +
> +/*
> + * Perform a 'normal' buffer recovery.  Each logged region of the
> + * buffer should be copied over the corresponding region in the
> + * given buffer.  The bitmap in the buf log format structure indicates
> + * where to place the logged data.
> + */
> +STATIC void
> +xlog_recover_do_reg_buffer(
> +	struct xfs_mount		*mp,
> +	struct xlog_recover_item	*item,
> +	struct xfs_buf			*bp,
> +	struct xfs_buf_log_format	*buf_f,
> +	xfs_lsn_t			current_lsn)
> +{
> +	int			i;
> +	int			bit;
> +	int			nbits;
> +	xfs_failaddr_t		fa;
> +	const size_t		size_disk_dquot = sizeof(struct xfs_disk_dquot);
> +
> +	trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
> +
> +	bit = 0;
> +	i = 1;  /* 0 is the buf format structure */
> +	while (1) {
> +		bit = xfs_next_bit(buf_f->blf_data_map,
> +				   buf_f->blf_map_size, bit);
> +		if (bit == -1)
> +			break;
> +		nbits = xfs_contig_bits(buf_f->blf_data_map,
> +					buf_f->blf_map_size, bit);
> +		ASSERT(nbits > 0);
> +		ASSERT(item->ri_buf[i].i_addr != NULL);
> +		ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
> +		ASSERT(BBTOB(bp->b_length) >=
> +		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
> +
> +		/*
> +		 * The dirty regions logged in the buffer, even though
> +		 * contiguous, may span multiple chunks. This is because the
> +		 * dirty region may span a physical page boundary in a buffer
> +		 * and hence be split into two separate vectors for writing into
> +		 * the log. Hence we need to trim nbits back to the length of
> +		 * the current region being copied out of the log.
> +		 */
> +		if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
> +			nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
> +
> +		/*
> +		 * Do a sanity check if this is a dquot buffer. Just checking
> +		 * the first dquot in the buffer should do. XXXThis is
> +		 * probably a good thing to do for other buf types also.
> +		 */
> +		fa = NULL;
> +		if (buf_f->blf_flags &
> +		   (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> +			if (item->ri_buf[i].i_addr == NULL) {
> +				xfs_alert(mp,
> +					"XFS: NULL dquot in %s.", __func__);
> +				goto next;
> +			}
> +			if (item->ri_buf[i].i_len < size_disk_dquot) {
> +				xfs_alert(mp,
> +					"XFS: dquot too small (%d) in %s.",
> +					item->ri_buf[i].i_len, __func__);
> +				goto next;
> +			}
> +			fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
> +					       -1, 0);
> +			if (fa) {
> +				xfs_alert(mp,
> +	"dquot corrupt at %pS trying to replay into block 0x%llx",
> +					fa, bp->b_bn);
> +				goto next;
> +			}
> +		}
> +
> +		memcpy(xfs_buf_offset(bp,
> +			(uint)bit << XFS_BLF_SHIFT),	/* dest */
> +			item->ri_buf[i].i_addr,		/* source */
> +			nbits<<XFS_BLF_SHIFT);		/* length */
> + next:
> +		i++;
> +		bit += nbits;
> +	}
> +
> +	/* Shouldn't be any more regions */
> +	ASSERT(i == item->ri_total);
> +
> +	xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
> +}
> +
> +/*
> + * Perform a dquot buffer recovery.
> + * Simple algorithm: if we have found a QUOTAOFF log item of the same type
> + * (ie. USR or GRP), then just toss this buffer away; don't recover it.
> + * Else, treat it as a regular buffer and do recovery.
> + *
> + * Return false if the buffer was tossed and true if we recovered the buffer to
> + * indicate to the caller if the buffer needs writing.
> + */
> +STATIC bool
> +xlog_recover_do_dquot_buffer(
> +	struct xfs_mount		*mp,
> +	struct xlog			*log,
> +	struct xlog_recover_item	*item,
> +	struct xfs_buf			*bp,
> +	struct xfs_buf_log_format	*buf_f)
> +{
> +	uint			type;
> +
> +	trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
> +
> +	/*
> +	 * Filesystems are required to send in quota flags at mount time.
> +	 */
> +	if (!mp->m_qflags)
> +		return false;
> +
> +	type = 0;
> +	if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
> +		type |= XFS_DQ_USER;
> +	if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
> +		type |= XFS_DQ_PROJ;
> +	if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
> +		type |= XFS_DQ_GROUP;
> +	/*
> +	 * This type of quotas was turned off, so ignore this buffer
> +	 */
> +	if (log->l_quotaoffs_flag & type)
> +		return false;
> +
> +	xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
> +	return true;
> +}
> +
> +/*
> + * Perform recovery for a buffer full of inodes.  In these buffers, the only
> + * data which should be recovered is that which corresponds to the
> + * di_next_unlinked pointers in the on disk inode structures.  The rest of the
> + * data for the inodes is always logged through the inodes themselves rather
> + * than the inode buffer and is recovered in xlog_recover_inode_pass2().
> + *
> + * The only time when buffers full of inodes are fully recovered is when the
> + * buffer is full of newly allocated inodes.  In this case the buffer will
> + * not be marked as an inode buffer and so will be sent to
> + * xlog_recover_do_reg_buffer() below during recovery.
> + */
> +STATIC int
> +xlog_recover_do_inode_buffer(
> +	struct xfs_mount		*mp,
> +	struct xlog_recover_item	*item,
> +	struct xfs_buf			*bp,
> +	struct xfs_buf_log_format	*buf_f)
> +{
> +	int				i;
> +	int				item_index = 0;
> +	int				bit = 0;
> +	int				nbits = 0;
> +	int				reg_buf_offset = 0;
> +	int				reg_buf_bytes = 0;
> +	int				next_unlinked_offset;
> +	int				inodes_per_buf;
> +	xfs_agino_t			*logged_nextp;
> +	xfs_agino_t			*buffer_nextp;
> +
> +	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
> +
> +	/*
> +	 * Post recovery validation only works properly on CRC enabled
> +	 * filesystems.
> +	 */
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		bp->b_ops = &xfs_inode_buf_ops;
> +
> +	inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
> +	for (i = 0; i < inodes_per_buf; i++) {
> +		next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
> +			offsetof(xfs_dinode_t, di_next_unlinked);
> +
> +		while (next_unlinked_offset >=
> +		       (reg_buf_offset + reg_buf_bytes)) {
> +			/*
> +			 * The next di_next_unlinked field is beyond
> +			 * the current logged region.  Find the next
> +			 * logged region that contains or is beyond
> +			 * the current di_next_unlinked field.
> +			 */
> +			bit += nbits;
> +			bit = xfs_next_bit(buf_f->blf_data_map,
> +					   buf_f->blf_map_size, bit);
> +
> +			/*
> +			 * If there are no more logged regions in the
> +			 * buffer, then we're done.
> +			 */
> +			if (bit == -1)
> +				return 0;
> +
> +			nbits = xfs_contig_bits(buf_f->blf_data_map,
> +						buf_f->blf_map_size, bit);
> +			ASSERT(nbits > 0);
> +			reg_buf_offset = bit << XFS_BLF_SHIFT;
> +			reg_buf_bytes = nbits << XFS_BLF_SHIFT;
> +			item_index++;
> +		}
> +
> +		/*
> +		 * If the current logged region starts after the current
> +		 * di_next_unlinked field, then move on to the next
> +		 * di_next_unlinked field.
> +		 */
> +		if (next_unlinked_offset < reg_buf_offset)
> +			continue;
> +
> +		ASSERT(item->ri_buf[item_index].i_addr != NULL);
> +		ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
> +		ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
> +
> +		/*
> +		 * The current logged region contains a copy of the
> +		 * current di_next_unlinked field.  Extract its value
> +		 * and copy it to the buffer copy.
> +		 */
> +		logged_nextp = item->ri_buf[item_index].i_addr +
> +				next_unlinked_offset - reg_buf_offset;
> +		if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
> +			xfs_alert(mp,
> +		"Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
> +		"Trying to replay bad (0) inode di_next_unlinked field.",
> +				item, bp);
> +			return -EFSCORRUPTED;
> +		}
> +
> +		buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
> +		*buffer_nextp = *logged_nextp;
> +
> +		/*
> +		 * If necessary, recalculate the CRC in the on-disk inode. We
> +		 * have to leave the inode in a consistent state for whoever
> +		 * reads it next....
> +		 */
> +		xfs_dinode_calc_crc(mp,
> +				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
> +
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * V5 filesystems know the age of the buffer on disk being recovered. We can
> + * have newer objects on disk than we are replaying, and so for these cases we
> + * don't want to replay the current change as that will make the buffer contents
> + * temporarily invalid on disk.
> + *
> + * The magic number might not match the buffer type we are going to recover
> + * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
> + * extract the LSN of the existing object in the buffer based on it's current
> + * magic number.  If we don't recognise the magic number in the buffer, then
> + * return a LSN of -1 so that the caller knows it was an unrecognised block and
> + * so can recover the buffer.
> + *
> + * Note: we cannot rely solely on magic number matches to determine that the
> + * buffer has a valid LSN - we also need to verify that it belongs to this
> + * filesystem, so we need to extract the object's LSN and compare it to that
> + * which we read from the superblock. If the UUIDs don't match, then we've got a
> + * stale metadata block from an old filesystem instance that we need to recover
> + * over the top of.
> + */
> +static xfs_lsn_t
> +xlog_recover_get_buf_lsn(
> +	struct xfs_mount	*mp,
> +	struct xfs_buf		*bp)
> +{
> +	uint32_t		magic32;
> +	uint16_t		magic16;
> +	uint16_t		magicda;
> +	void			*blk = bp->b_addr;
> +	uuid_t			*uuid;
> +	xfs_lsn_t		lsn = -1;
> +
> +	/* v4 filesystems always recover immediately */
> +	if (!xfs_sb_version_hascrc(&mp->m_sb))
> +		goto recover_immediately;
> +
> +	magic32 = be32_to_cpu(*(__be32 *)blk);
> +	switch (magic32) {
> +	case XFS_ABTB_CRC_MAGIC:
> +	case XFS_ABTC_CRC_MAGIC:
> +	case XFS_ABTB_MAGIC:
> +	case XFS_ABTC_MAGIC:
> +	case XFS_RMAP_CRC_MAGIC:
> +	case XFS_REFC_CRC_MAGIC:
> +	case XFS_IBT_CRC_MAGIC:
> +	case XFS_IBT_MAGIC: {
> +		struct xfs_btree_block *btb = blk;
> +
> +		lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
> +		uuid = &btb->bb_u.s.bb_uuid;
> +		break;
> +	}
> +	case XFS_BMAP_CRC_MAGIC:
> +	case XFS_BMAP_MAGIC: {
> +		struct xfs_btree_block *btb = blk;
> +
> +		lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
> +		uuid = &btb->bb_u.l.bb_uuid;
> +		break;
> +	}
> +	case XFS_AGF_MAGIC:
> +		lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
> +		uuid = &((struct xfs_agf *)blk)->agf_uuid;
> +		break;
> +	case XFS_AGFL_MAGIC:
> +		lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
> +		uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
> +		break;
> +	case XFS_AGI_MAGIC:
> +		lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
> +		uuid = &((struct xfs_agi *)blk)->agi_uuid;
> +		break;
> +	case XFS_SYMLINK_MAGIC:
> +		lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
> +		uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
> +		break;
> +	case XFS_DIR3_BLOCK_MAGIC:
> +	case XFS_DIR3_DATA_MAGIC:
> +	case XFS_DIR3_FREE_MAGIC:
> +		lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
> +		uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
> +		break;
> +	case XFS_ATTR3_RMT_MAGIC:
> +		/*
> +		 * Remote attr blocks are written synchronously, rather than
> +		 * being logged. That means they do not contain a valid LSN
> +		 * (i.e. transactionally ordered) in them, and hence any time we
> +		 * see a buffer to replay over the top of a remote attribute
> +		 * block we should simply do so.
> +		 */
> +		goto recover_immediately;
> +	case XFS_SB_MAGIC:
> +		/*
> +		 * superblock uuids are magic. We may or may not have a
> +		 * sb_meta_uuid on disk, but it will be set in the in-core
> +		 * superblock. We set the uuid pointer for verification
> +		 * according to the superblock feature mask to ensure we check
> +		 * the relevant UUID in the superblock.
> +		 */
> +		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
> +		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
> +			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
> +		else
> +			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (lsn != (xfs_lsn_t)-1) {
> +		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
> +			goto recover_immediately;
> +		return lsn;
> +	}
> +
> +	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
> +	switch (magicda) {
> +	case XFS_DIR3_LEAF1_MAGIC:
> +	case XFS_DIR3_LEAFN_MAGIC:
> +	case XFS_DA3_NODE_MAGIC:
> +		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
> +		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (lsn != (xfs_lsn_t)-1) {
> +		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
> +			goto recover_immediately;
> +		return lsn;
> +	}
> +
> +	/*
> +	 * We do individual object checks on dquot and inode buffers as they
> +	 * have their own individual LSN records. Also, we could have a stale
> +	 * buffer here, so we have to at least recognise these buffer types.
> +	 *
> +	 * A notd complexity here is inode unlinked list processing - it logs
> +	 * the inode directly in the buffer, but we don't know which inodes have
> +	 * been modified, and there is no global buffer LSN. Hence we need to
> +	 * recover all inode buffer types immediately. This problem will be
> +	 * fixed by logical logging of the unlinked list modifications.
> +	 */
> +	magic16 = be16_to_cpu(*(__be16 *)blk);
> +	switch (magic16) {
> +	case XFS_DQUOT_MAGIC:
> +	case XFS_DINODE_MAGIC:
> +		goto recover_immediately;
> +	default:
> +		break;
> +	}
> +
> +	/* unknown buffer contents, recover immediately */
> +
> +recover_immediately:
> +	return (xfs_lsn_t)-1;
> +
> +}
> +
> +/*
> + * This routine replays a modification made to a buffer at runtime.
> + * There are actually two types of buffer, regular and inode, which
> + * are handled differently.  Inode buffers are handled differently
> + * in that we only recover a specific set of data from them, namely
> + * the inode di_next_unlinked fields.  This is because all other inode
> + * data is actually logged via inode records and any data we replay
> + * here which overlaps that may be stale.
> + *
> + * When meta-data buffers are freed at run time we log a buffer item
> + * with the XFS_BLF_CANCEL bit set to indicate that previous copies
> + * of the buffer in the log should not be replayed at recovery time.
> + * This is so that if the blocks covered by the buffer are reused for
> + * file data before we crash we don't end up replaying old, freed
> + * meta-data into a user's file.
> + *
> + * To handle the cancellation of buffer log items, we make two passes
> + * over the log during recovery.  During the first we build a table of
> + * those buffers which have been cancelled, and during the second we
> + * only replay those buffers which do not have corresponding cancel
> + * records in the table.  See xlog_recover_buf_pass[1,2] above
> + * for more details on the implementation of the table of cancel records.
> + */
> +STATIC int
> +xlog_recover_buf_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			current_lsn)
> +{
> +	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_buf			*bp;
> +	int				error;
> +	uint				buf_flags;
> +	xfs_lsn_t			lsn;
> +
> +	/*
> +	 * In this pass we only want to recover all the buffers which have
> +	 * not been cancelled and are not cancellation buffers themselves.
> +	 */
> +	if (buf_f->blf_flags & XFS_BLF_CANCEL) {
> +		if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno,
> +				buf_f->blf_len))
> +			goto cancelled;
> +	} else {
> +
> +		if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno,
> +				buf_f->blf_len))
> +			goto cancelled;
> +	}
> +
> +	trace_xfs_log_recover_buf_recover(log, buf_f);
> +
> +	buf_flags = 0;
> +	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
> +		buf_flags |= XBF_UNMAPPED;
> +
> +	error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
> +			  buf_flags, &bp, NULL);
> +	if (error)
> +		return error;
> +
> +	/*
> +	 * Recover the buffer only if we get an LSN from it and it's less than
> +	 * the lsn of the transaction we are replaying.
> +	 *
> +	 * Note that we have to be extremely careful of readahead here.
> +	 * Readahead does not attach verfiers to the buffers so if we don't
> +	 * actually do any replay after readahead because of the LSN we found
> +	 * in the buffer if more recent than that current transaction then we
> +	 * need to attach the verifier directly. Failure to do so can lead to
> +	 * future recovery actions (e.g. EFI and unlinked list recovery) can
> +	 * operate on the buffers and they won't get the verifier attached. This
> +	 * can lead to blocks on disk having the correct content but a stale
> +	 * CRC.
> +	 *
> +	 * It is safe to assume these clean buffers are currently up to date.
> +	 * If the buffer is dirtied by a later transaction being replayed, then
> +	 * the verifier will be reset to match whatever recover turns that
> +	 * buffer into.
> +	 */
> +	lsn = xlog_recover_get_buf_lsn(mp, bp);
> +	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> +		trace_xfs_log_recover_buf_skip(log, buf_f);
> +		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
> +		goto out_release;
> +	}
> +
> +	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
> +		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
> +		if (error)
> +			goto out_release;
> +	} else if (buf_f->blf_flags &
> +		  (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> +		bool	dirty;
> +
> +		dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
> +		if (!dirty)
> +			goto out_release;
> +	} else {
> +		xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
> +	}
> +
> +	/*
> +	 * Perform delayed write on the buffer.  Asynchronous writes will be
> +	 * slower when taking into account all the buffers to be flushed.
> +	 *
> +	 * Also make sure that only inode buffers with good sizes stay in
> +	 * the buffer cache.  The kernel moves inodes in buffers of 1 block
> +	 * or inode_cluster_size bytes, whichever is bigger.  The inode
> +	 * buffers in the log can be a different size if the log was generated
> +	 * by an older kernel using unclustered inode buffers or a newer kernel
> +	 * running with a different inode cluster size.  Regardless, if the
> +	 * the inode buffer size isn't max(blocksize, inode_cluster_size)
> +	 * for *our* value of inode_cluster_size, then we need to keep
> +	 * the buffer out of the buffer cache so that the buffer won't
> +	 * overlap with future reads of those inodes.
> +	 */
> +	if (XFS_DINODE_MAGIC ==
> +	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
> +	    (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
> +		xfs_buf_stale(bp);
> +		error = xfs_bwrite(bp);
> +	} else {
> +		ASSERT(bp->b_mount == mp);
> +		bp->b_iodone = xlog_recover_iodone;
> +		xfs_buf_delwri_queue(bp, buffer_list);
> +	}
> +
> +out_release:
> +	xfs_buf_relse(bp);
> +	return error;
> +cancelled:
> +	trace_xfs_log_recover_buf_cancel(log, buf_f);
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_buf_item_ops = {
>  	.item_type		= XFS_LI_BUF,
>  	.reorder		= xlog_recover_buf_reorder,
>  	.ra_pass2		= xlog_recover_buf_ra_pass2,
>  	.commit_pass1		= xlog_recover_buf_commit_pass1,
> +	.commit_pass2		= xlog_recover_buf_commit_pass2,
>  };
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index b3627ebf870e..d65dc3895a62 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -284,7 +284,7 @@ xlog_header_check_mount(
>  	return 0;
>  }
>  
> -STATIC void
> +void
>  xlog_recover_iodone(
>  	struct xfs_buf	*bp)
>  {
> @@ -1985,7 +1985,7 @@ xlog_add_buffer_cancelled(
>  /*
>   * Check if there is and entry for blkno, len in the buffer cancel record table.
>   */
> -static bool
> +bool
>  xlog_is_buffer_cancelled(
>  	struct xlog		*log,
>  	xfs_daddr_t		blkno,
> @@ -2002,7 +2002,7 @@ xlog_is_buffer_cancelled(
>   * buffer is re-used again after its last cancellation we actually replay the
>   * changes made at that point.
>   */
> -static bool
> +bool
>  xlog_put_buffer_cancelled(
>  	struct xlog		*log,
>  	xfs_daddr_t		blkno,
> @@ -2034,791 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * Perform recovery for a buffer full of inodes.  In these buffers, the only
> - * data which should be recovered is that which corresponds to the
> - * di_next_unlinked pointers in the on disk inode structures.  The rest of the
> - * data for the inodes is always logged through the inodes themselves rather
> - * than the inode buffer and is recovered in xlog_recover_inode_pass2().
> - *
> - * The only time when buffers full of inodes are fully recovered is when the
> - * buffer is full of newly allocated inodes.  In this case the buffer will
> - * not be marked as an inode buffer and so will be sent to
> - * xlog_recover_do_reg_buffer() below during recovery.
> - */
> -STATIC int
> -xlog_recover_do_inode_buffer(
> -	struct xfs_mount	*mp,
> -	struct xlog_recover_item *item,
> -	struct xfs_buf		*bp,
> -	xfs_buf_log_format_t	*buf_f)
> -{
> -	int			i;
> -	int			item_index = 0;
> -	int			bit = 0;
> -	int			nbits = 0;
> -	int			reg_buf_offset = 0;
> -	int			reg_buf_bytes = 0;
> -	int			next_unlinked_offset;
> -	int			inodes_per_buf;
> -	xfs_agino_t		*logged_nextp;
> -	xfs_agino_t		*buffer_nextp;
> -
> -	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
> -
> -	/*
> -	 * Post recovery validation only works properly on CRC enabled
> -	 * filesystems.
> -	 */
> -	if (xfs_sb_version_hascrc(&mp->m_sb))
> -		bp->b_ops = &xfs_inode_buf_ops;
> -
> -	inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
> -	for (i = 0; i < inodes_per_buf; i++) {
> -		next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
> -			offsetof(xfs_dinode_t, di_next_unlinked);
> -
> -		while (next_unlinked_offset >=
> -		       (reg_buf_offset + reg_buf_bytes)) {
> -			/*
> -			 * The next di_next_unlinked field is beyond
> -			 * the current logged region.  Find the next
> -			 * logged region that contains or is beyond
> -			 * the current di_next_unlinked field.
> -			 */
> -			bit += nbits;
> -			bit = xfs_next_bit(buf_f->blf_data_map,
> -					   buf_f->blf_map_size, bit);
> -
> -			/*
> -			 * If there are no more logged regions in the
> -			 * buffer, then we're done.
> -			 */
> -			if (bit == -1)
> -				return 0;
> -
> -			nbits = xfs_contig_bits(buf_f->blf_data_map,
> -						buf_f->blf_map_size, bit);
> -			ASSERT(nbits > 0);
> -			reg_buf_offset = bit << XFS_BLF_SHIFT;
> -			reg_buf_bytes = nbits << XFS_BLF_SHIFT;
> -			item_index++;
> -		}
> -
> -		/*
> -		 * If the current logged region starts after the current
> -		 * di_next_unlinked field, then move on to the next
> -		 * di_next_unlinked field.
> -		 */
> -		if (next_unlinked_offset < reg_buf_offset)
> -			continue;
> -
> -		ASSERT(item->ri_buf[item_index].i_addr != NULL);
> -		ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
> -		ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
> -
> -		/*
> -		 * The current logged region contains a copy of the
> -		 * current di_next_unlinked field.  Extract its value
> -		 * and copy it to the buffer copy.
> -		 */
> -		logged_nextp = item->ri_buf[item_index].i_addr +
> -				next_unlinked_offset - reg_buf_offset;
> -		if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
> -			xfs_alert(mp,
> -		"Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
> -		"Trying to replay bad (0) inode di_next_unlinked field.",
> -				item, bp);
> -			return -EFSCORRUPTED;
> -		}
> -
> -		buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
> -		*buffer_nextp = *logged_nextp;
> -
> -		/*
> -		 * If necessary, recalculate the CRC in the on-disk inode. We
> -		 * have to leave the inode in a consistent state for whoever
> -		 * reads it next....
> -		 */
> -		xfs_dinode_calc_crc(mp,
> -				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
> -
> -	}
> -
> -	return 0;
> -}
> -
> -/*
> - * V5 filesystems know the age of the buffer on disk being recovered. We can
> - * have newer objects on disk than we are replaying, and so for these cases we
> - * don't want to replay the current change as that will make the buffer contents
> - * temporarily invalid on disk.
> - *
> - * The magic number might not match the buffer type we are going to recover
> - * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
> - * extract the LSN of the existing object in the buffer based on it's current
> - * magic number.  If we don't recognise the magic number in the buffer, then
> - * return a LSN of -1 so that the caller knows it was an unrecognised block and
> - * so can recover the buffer.
> - *
> - * Note: we cannot rely solely on magic number matches to determine that the
> - * buffer has a valid LSN - we also need to verify that it belongs to this
> - * filesystem, so we need to extract the object's LSN and compare it to that
> - * which we read from the superblock. If the UUIDs don't match, then we've got a
> - * stale metadata block from an old filesystem instance that we need to recover
> - * over the top of.
> - */
> -static xfs_lsn_t
> -xlog_recover_get_buf_lsn(
> -	struct xfs_mount	*mp,
> -	struct xfs_buf		*bp)
> -{
> -	uint32_t		magic32;
> -	uint16_t		magic16;
> -	uint16_t		magicda;
> -	void			*blk = bp->b_addr;
> -	uuid_t			*uuid;
> -	xfs_lsn_t		lsn = -1;
> -
> -	/* v4 filesystems always recover immediately */
> -	if (!xfs_sb_version_hascrc(&mp->m_sb))
> -		goto recover_immediately;
> -
> -	magic32 = be32_to_cpu(*(__be32 *)blk);
> -	switch (magic32) {
> -	case XFS_ABTB_CRC_MAGIC:
> -	case XFS_ABTC_CRC_MAGIC:
> -	case XFS_ABTB_MAGIC:
> -	case XFS_ABTC_MAGIC:
> -	case XFS_RMAP_CRC_MAGIC:
> -	case XFS_REFC_CRC_MAGIC:
> -	case XFS_IBT_CRC_MAGIC:
> -	case XFS_IBT_MAGIC: {
> -		struct xfs_btree_block *btb = blk;
> -
> -		lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
> -		uuid = &btb->bb_u.s.bb_uuid;
> -		break;
> -	}
> -	case XFS_BMAP_CRC_MAGIC:
> -	case XFS_BMAP_MAGIC: {
> -		struct xfs_btree_block *btb = blk;
> -
> -		lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
> -		uuid = &btb->bb_u.l.bb_uuid;
> -		break;
> -	}
> -	case XFS_AGF_MAGIC:
> -		lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
> -		uuid = &((struct xfs_agf *)blk)->agf_uuid;
> -		break;
> -	case XFS_AGFL_MAGIC:
> -		lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
> -		uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
> -		break;
> -	case XFS_AGI_MAGIC:
> -		lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
> -		uuid = &((struct xfs_agi *)blk)->agi_uuid;
> -		break;
> -	case XFS_SYMLINK_MAGIC:
> -		lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
> -		uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
> -		break;
> -	case XFS_DIR3_BLOCK_MAGIC:
> -	case XFS_DIR3_DATA_MAGIC:
> -	case XFS_DIR3_FREE_MAGIC:
> -		lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
> -		uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
> -		break;
> -	case XFS_ATTR3_RMT_MAGIC:
> -		/*
> -		 * Remote attr blocks are written synchronously, rather than
> -		 * being logged. That means they do not contain a valid LSN
> -		 * (i.e. transactionally ordered) in them, and hence any time we
> -		 * see a buffer to replay over the top of a remote attribute
> -		 * block we should simply do so.
> -		 */
> -		goto recover_immediately;
> -	case XFS_SB_MAGIC:
> -		/*
> -		 * superblock uuids are magic. We may or may not have a
> -		 * sb_meta_uuid on disk, but it will be set in the in-core
> -		 * superblock. We set the uuid pointer for verification
> -		 * according to the superblock feature mask to ensure we check
> -		 * the relevant UUID in the superblock.
> -		 */
> -		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
> -		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
> -			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
> -		else
> -			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
> -		break;
> -	default:
> -		break;
> -	}
> -
> -	if (lsn != (xfs_lsn_t)-1) {
> -		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
> -			goto recover_immediately;
> -		return lsn;
> -	}
> -
> -	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
> -	switch (magicda) {
> -	case XFS_DIR3_LEAF1_MAGIC:
> -	case XFS_DIR3_LEAFN_MAGIC:
> -	case XFS_DA3_NODE_MAGIC:
> -		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
> -		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
> -		break;
> -	default:
> -		break;
> -	}
> -
> -	if (lsn != (xfs_lsn_t)-1) {
> -		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
> -			goto recover_immediately;
> -		return lsn;
> -	}
> -
> -	/*
> -	 * We do individual object checks on dquot and inode buffers as they
> -	 * have their own individual LSN records. Also, we could have a stale
> -	 * buffer here, so we have to at least recognise these buffer types.
> -	 *
> -	 * A notd complexity here is inode unlinked list processing - it logs
> -	 * the inode directly in the buffer, but we don't know which inodes have
> -	 * been modified, and there is no global buffer LSN. Hence we need to
> -	 * recover all inode buffer types immediately. This problem will be
> -	 * fixed by logical logging of the unlinked list modifications.
> -	 */
> -	magic16 = be16_to_cpu(*(__be16 *)blk);
> -	switch (magic16) {
> -	case XFS_DQUOT_MAGIC:
> -	case XFS_DINODE_MAGIC:
> -		goto recover_immediately;
> -	default:
> -		break;
> -	}
> -
> -	/* unknown buffer contents, recover immediately */
> -
> -recover_immediately:
> -	return (xfs_lsn_t)-1;
> -
> -}
> -
> -/*
> - * Validate the recovered buffer is of the correct type and attach the
> - * appropriate buffer operations to them for writeback. Magic numbers are in a
> - * few places:
> - *	the first 16 bits of the buffer (inode buffer, dquot buffer),
> - *	the first 32 bits of the buffer (most blocks),
> - *	inside a struct xfs_da_blkinfo at the start of the buffer.
> - */
> -static void
> -xlog_recover_validate_buf_type(
> -	struct xfs_mount	*mp,
> -	struct xfs_buf		*bp,
> -	xfs_buf_log_format_t	*buf_f,
> -	xfs_lsn_t		current_lsn)
> -{
> -	struct xfs_da_blkinfo	*info = bp->b_addr;
> -	uint32_t		magic32;
> -	uint16_t		magic16;
> -	uint16_t		magicda;
> -	char			*warnmsg = NULL;
> -
> -	/*
> -	 * We can only do post recovery validation on items on CRC enabled
> -	 * fielsystems as we need to know when the buffer was written to be able
> -	 * to determine if we should have replayed the item. If we replay old
> -	 * metadata over a newer buffer, then it will enter a temporarily
> -	 * inconsistent state resulting in verification failures. Hence for now
> -	 * just avoid the verification stage for non-crc filesystems
> -	 */
> -	if (!xfs_sb_version_hascrc(&mp->m_sb))
> -		return;
> -
> -	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
> -	magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
> -	magicda = be16_to_cpu(info->magic);
> -	switch (xfs_blft_from_flags(buf_f)) {
> -	case XFS_BLFT_BTREE_BUF:
> -		switch (magic32) {
> -		case XFS_ABTB_CRC_MAGIC:
> -		case XFS_ABTB_MAGIC:
> -			bp->b_ops = &xfs_bnobt_buf_ops;
> -			break;
> -		case XFS_ABTC_CRC_MAGIC:
> -		case XFS_ABTC_MAGIC:
> -			bp->b_ops = &xfs_cntbt_buf_ops;
> -			break;
> -		case XFS_IBT_CRC_MAGIC:
> -		case XFS_IBT_MAGIC:
> -			bp->b_ops = &xfs_inobt_buf_ops;
> -			break;
> -		case XFS_FIBT_CRC_MAGIC:
> -		case XFS_FIBT_MAGIC:
> -			bp->b_ops = &xfs_finobt_buf_ops;
> -			break;
> -		case XFS_BMAP_CRC_MAGIC:
> -		case XFS_BMAP_MAGIC:
> -			bp->b_ops = &xfs_bmbt_buf_ops;
> -			break;
> -		case XFS_RMAP_CRC_MAGIC:
> -			bp->b_ops = &xfs_rmapbt_buf_ops;
> -			break;
> -		case XFS_REFC_CRC_MAGIC:
> -			bp->b_ops = &xfs_refcountbt_buf_ops;
> -			break;
> -		default:
> -			warnmsg = "Bad btree block magic!";
> -			break;
> -		}
> -		break;
> -	case XFS_BLFT_AGF_BUF:
> -		if (magic32 != XFS_AGF_MAGIC) {
> -			warnmsg = "Bad AGF block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_agf_buf_ops;
> -		break;
> -	case XFS_BLFT_AGFL_BUF:
> -		if (magic32 != XFS_AGFL_MAGIC) {
> -			warnmsg = "Bad AGFL block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_agfl_buf_ops;
> -		break;
> -	case XFS_BLFT_AGI_BUF:
> -		if (magic32 != XFS_AGI_MAGIC) {
> -			warnmsg = "Bad AGI block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_agi_buf_ops;
> -		break;
> -	case XFS_BLFT_UDQUOT_BUF:
> -	case XFS_BLFT_PDQUOT_BUF:
> -	case XFS_BLFT_GDQUOT_BUF:
> -#ifdef CONFIG_XFS_QUOTA
> -		if (magic16 != XFS_DQUOT_MAGIC) {
> -			warnmsg = "Bad DQUOT block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_dquot_buf_ops;
> -#else
> -		xfs_alert(mp,
> -	"Trying to recover dquots without QUOTA support built in!");
> -		ASSERT(0);
> -#endif
> -		break;
> -	case XFS_BLFT_DINO_BUF:
> -		if (magic16 != XFS_DINODE_MAGIC) {
> -			warnmsg = "Bad INODE block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_inode_buf_ops;
> -		break;
> -	case XFS_BLFT_SYMLINK_BUF:
> -		if (magic32 != XFS_SYMLINK_MAGIC) {
> -			warnmsg = "Bad symlink block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_symlink_buf_ops;
> -		break;
> -	case XFS_BLFT_DIR_BLOCK_BUF:
> -		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
> -		    magic32 != XFS_DIR3_BLOCK_MAGIC) {
> -			warnmsg = "Bad dir block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_dir3_block_buf_ops;
> -		break;
> -	case XFS_BLFT_DIR_DATA_BUF:
> -		if (magic32 != XFS_DIR2_DATA_MAGIC &&
> -		    magic32 != XFS_DIR3_DATA_MAGIC) {
> -			warnmsg = "Bad dir data magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_dir3_data_buf_ops;
> -		break;
> -	case XFS_BLFT_DIR_FREE_BUF:
> -		if (magic32 != XFS_DIR2_FREE_MAGIC &&
> -		    magic32 != XFS_DIR3_FREE_MAGIC) {
> -			warnmsg = "Bad dir3 free magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_dir3_free_buf_ops;
> -		break;
> -	case XFS_BLFT_DIR_LEAF1_BUF:
> -		if (magicda != XFS_DIR2_LEAF1_MAGIC &&
> -		    magicda != XFS_DIR3_LEAF1_MAGIC) {
> -			warnmsg = "Bad dir leaf1 magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
> -		break;
> -	case XFS_BLFT_DIR_LEAFN_BUF:
> -		if (magicda != XFS_DIR2_LEAFN_MAGIC &&
> -		    magicda != XFS_DIR3_LEAFN_MAGIC) {
> -			warnmsg = "Bad dir leafn magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_dir3_leafn_buf_ops;
> -		break;
> -	case XFS_BLFT_DA_NODE_BUF:
> -		if (magicda != XFS_DA_NODE_MAGIC &&
> -		    magicda != XFS_DA3_NODE_MAGIC) {
> -			warnmsg = "Bad da node magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_da3_node_buf_ops;
> -		break;
> -	case XFS_BLFT_ATTR_LEAF_BUF:
> -		if (magicda != XFS_ATTR_LEAF_MAGIC &&
> -		    magicda != XFS_ATTR3_LEAF_MAGIC) {
> -			warnmsg = "Bad attr leaf magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_attr3_leaf_buf_ops;
> -		break;
> -	case XFS_BLFT_ATTR_RMT_BUF:
> -		if (magic32 != XFS_ATTR3_RMT_MAGIC) {
> -			warnmsg = "Bad attr remote magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_attr3_rmt_buf_ops;
> -		break;
> -	case XFS_BLFT_SB_BUF:
> -		if (magic32 != XFS_SB_MAGIC) {
> -			warnmsg = "Bad SB block magic!";
> -			break;
> -		}
> -		bp->b_ops = &xfs_sb_buf_ops;
> -		break;
> -#ifdef CONFIG_XFS_RT
> -	case XFS_BLFT_RTBITMAP_BUF:
> -	case XFS_BLFT_RTSUMMARY_BUF:
> -		/* no magic numbers for verification of RT buffers */
> -		bp->b_ops = &xfs_rtbuf_ops;
> -		break;
> -#endif /* CONFIG_XFS_RT */
> -	default:
> -		xfs_warn(mp, "Unknown buffer type %d!",
> -			 xfs_blft_from_flags(buf_f));
> -		break;
> -	}
> -
> -	/*
> -	 * Nothing else to do in the case of a NULL current LSN as this means
> -	 * the buffer is more recent than the change in the log and will be
> -	 * skipped.
> -	 */
> -	if (current_lsn == NULLCOMMITLSN)
> -		return;
> -
> -	if (warnmsg) {
> -		xfs_warn(mp, warnmsg);
> -		ASSERT(0);
> -	}
> -
> -	/*
> -	 * We must update the metadata LSN of the buffer as it is written out to
> -	 * ensure that older transactions never replay over this one and corrupt
> -	 * the buffer. This can occur if log recovery is interrupted at some
> -	 * point after the current transaction completes, at which point a
> -	 * subsequent mount starts recovery from the beginning.
> -	 *
> -	 * Write verifiers update the metadata LSN from log items attached to
> -	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
> -	 * the verifier. We'll clean it up in our ->iodone() callback.
> -	 */
> -	if (bp->b_ops) {
> -		struct xfs_buf_log_item	*bip;
> -
> -		ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
> -		bp->b_iodone = xlog_recover_iodone;
> -		xfs_buf_item_init(bp, mp);
> -		bip = bp->b_log_item;
> -		bip->bli_item.li_lsn = current_lsn;
> -	}
> -}
> -
> -/*
> - * Perform a 'normal' buffer recovery.  Each logged region of the
> - * buffer should be copied over the corresponding region in the
> - * given buffer.  The bitmap in the buf log format structure indicates
> - * where to place the logged data.
> - */
> -STATIC void
> -xlog_recover_do_reg_buffer(
> -	struct xfs_mount	*mp,
> -	struct xlog_recover_item *item,
> -	struct xfs_buf		*bp,
> -	xfs_buf_log_format_t	*buf_f,
> -	xfs_lsn_t		current_lsn)
> -{
> -	int			i;
> -	int			bit;
> -	int			nbits;
> -	xfs_failaddr_t		fa;
> -	const size_t		size_disk_dquot = sizeof(struct xfs_disk_dquot);
> -
> -	trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
> -
> -	bit = 0;
> -	i = 1;  /* 0 is the buf format structure */
> -	while (1) {
> -		bit = xfs_next_bit(buf_f->blf_data_map,
> -				   buf_f->blf_map_size, bit);
> -		if (bit == -1)
> -			break;
> -		nbits = xfs_contig_bits(buf_f->blf_data_map,
> -					buf_f->blf_map_size, bit);
> -		ASSERT(nbits > 0);
> -		ASSERT(item->ri_buf[i].i_addr != NULL);
> -		ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
> -		ASSERT(BBTOB(bp->b_length) >=
> -		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
> -
> -		/*
> -		 * The dirty regions logged in the buffer, even though
> -		 * contiguous, may span multiple chunks. This is because the
> -		 * dirty region may span a physical page boundary in a buffer
> -		 * and hence be split into two separate vectors for writing into
> -		 * the log. Hence we need to trim nbits back to the length of
> -		 * the current region being copied out of the log.
> -		 */
> -		if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
> -			nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
> -
> -		/*
> -		 * Do a sanity check if this is a dquot buffer. Just checking
> -		 * the first dquot in the buffer should do. XXXThis is
> -		 * probably a good thing to do for other buf types also.
> -		 */
> -		fa = NULL;
> -		if (buf_f->blf_flags &
> -		   (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> -			if (item->ri_buf[i].i_addr == NULL) {
> -				xfs_alert(mp,
> -					"XFS: NULL dquot in %s.", __func__);
> -				goto next;
> -			}
> -			if (item->ri_buf[i].i_len < size_disk_dquot) {
> -				xfs_alert(mp,
> -					"XFS: dquot too small (%d) in %s.",
> -					item->ri_buf[i].i_len, __func__);
> -				goto next;
> -			}
> -			fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
> -					       -1, 0);
> -			if (fa) {
> -				xfs_alert(mp,
> -	"dquot corrupt at %pS trying to replay into block 0x%llx",
> -					fa, bp->b_bn);
> -				goto next;
> -			}
> -		}
> -
> -		memcpy(xfs_buf_offset(bp,
> -			(uint)bit << XFS_BLF_SHIFT),	/* dest */
> -			item->ri_buf[i].i_addr,		/* source */
> -			nbits<<XFS_BLF_SHIFT);		/* length */
> - next:
> -		i++;
> -		bit += nbits;
> -	}
> -
> -	/* Shouldn't be any more regions */
> -	ASSERT(i == item->ri_total);
> -
> -	xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn);
> -}
> -
> -/*
> - * Perform a dquot buffer recovery.
> - * Simple algorithm: if we have found a QUOTAOFF log item of the same type
> - * (ie. USR or GRP), then just toss this buffer away; don't recover it.
> - * Else, treat it as a regular buffer and do recovery.
> - *
> - * Return false if the buffer was tossed and true if we recovered the buffer to
> - * indicate to the caller if the buffer needs writing.
> - */
> -STATIC bool
> -xlog_recover_do_dquot_buffer(
> -	struct xfs_mount		*mp,
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item,
> -	struct xfs_buf			*bp,
> -	struct xfs_buf_log_format	*buf_f)
> -{
> -	uint			type;
> -
> -	trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
> -
> -	/*
> -	 * Filesystems are required to send in quota flags at mount time.
> -	 */
> -	if (!mp->m_qflags)
> -		return false;
> -
> -	type = 0;
> -	if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
> -		type |= XFS_DQ_USER;
> -	if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
> -		type |= XFS_DQ_PROJ;
> -	if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
> -		type |= XFS_DQ_GROUP;
> -	/*
> -	 * This type of quotas was turned off, so ignore this buffer
> -	 */
> -	if (log->l_quotaoffs_flag & type)
> -		return false;
> -
> -	xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN);
> -	return true;
> -}
> -
> -/*
> - * This routine replays a modification made to a buffer at runtime.
> - * There are actually two types of buffer, regular and inode, which
> - * are handled differently.  Inode buffers are handled differently
> - * in that we only recover a specific set of data from them, namely
> - * the inode di_next_unlinked fields.  This is because all other inode
> - * data is actually logged via inode records and any data we replay
> - * here which overlaps that may be stale.
> - *
> - * When meta-data buffers are freed at run time we log a buffer item
> - * with the XFS_BLF_CANCEL bit set to indicate that previous copies
> - * of the buffer in the log should not be replayed at recovery time.
> - * This is so that if the blocks covered by the buffer are reused for
> - * file data before we crash we don't end up replaying old, freed
> - * meta-data into a user's file.
> - *
> - * To handle the cancellation of buffer log items, we make two passes
> - * over the log during recovery.  During the first we build a table of
> - * those buffers which have been cancelled, and during the second we
> - * only replay those buffers which do not have corresponding cancel
> - * records in the table.  See xlog_recover_buffer_pass[1,2] above
> - * for more details on the implementation of the table of cancel records.
> - */
> -STATIC int
> -xlog_recover_buffer_pass2(
> -	struct xlog			*log,
> -	struct list_head		*buffer_list,
> -	struct xlog_recover_item	*item,
> -	xfs_lsn_t			current_lsn)
> -{
> -	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
> -	xfs_mount_t		*mp = log->l_mp;
> -	xfs_buf_t		*bp;
> -	int			error;
> -	uint			buf_flags;
> -	xfs_lsn_t		lsn;
> -
> -	/*
> -	 * In this pass we only want to recover all the buffers which have
> -	 * not been cancelled and are not cancellation buffers themselves.
> -	 */
> -	if (buf_f->blf_flags & XFS_BLF_CANCEL) {
> -		if (xlog_put_buffer_cancelled(log, buf_f->blf_blkno,
> -				buf_f->blf_len))
> -			goto cancelled;
> -	} else {
> -
> -		if (xlog_is_buffer_cancelled(log, buf_f->blf_blkno,
> -				buf_f->blf_len))
> -			goto cancelled;
> -	}
> -
> -	trace_xfs_log_recover_buf_recover(log, buf_f);
> -
> -	buf_flags = 0;
> -	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
> -		buf_flags |= XBF_UNMAPPED;
> -
> -	error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
> -			  buf_flags, &bp, NULL);
> -	if (error)
> -		return error;
> -
> -	/*
> -	 * Recover the buffer only if we get an LSN from it and it's less than
> -	 * the lsn of the transaction we are replaying.
> -	 *
> -	 * Note that we have to be extremely careful of readahead here.
> -	 * Readahead does not attach verfiers to the buffers so if we don't
> -	 * actually do any replay after readahead because of the LSN we found
> -	 * in the buffer if more recent than that current transaction then we
> -	 * need to attach the verifier directly. Failure to do so can lead to
> -	 * future recovery actions (e.g. EFI and unlinked list recovery) can
> -	 * operate on the buffers and they won't get the verifier attached. This
> -	 * can lead to blocks on disk having the correct content but a stale
> -	 * CRC.
> -	 *
> -	 * It is safe to assume these clean buffers are currently up to date.
> -	 * If the buffer is dirtied by a later transaction being replayed, then
> -	 * the verifier will be reset to match whatever recover turns that
> -	 * buffer into.
> -	 */
> -	lsn = xlog_recover_get_buf_lsn(mp, bp);
> -	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> -		trace_xfs_log_recover_buf_skip(log, buf_f);
> -		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
> -		goto out_release;
> -	}
> -
> -	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
> -		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
> -		if (error)
> -			goto out_release;
> -	} else if (buf_f->blf_flags &
> -		  (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
> -		bool	dirty;
> -
> -		dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
> -		if (!dirty)
> -			goto out_release;
> -	} else {
> -		xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
> -	}
> -
> -	/*
> -	 * Perform delayed write on the buffer.  Asynchronous writes will be
> -	 * slower when taking into account all the buffers to be flushed.
> -	 *
> -	 * Also make sure that only inode buffers with good sizes stay in
> -	 * the buffer cache.  The kernel moves inodes in buffers of 1 block
> -	 * or inode_cluster_size bytes, whichever is bigger.  The inode
> -	 * buffers in the log can be a different size if the log was generated
> -	 * by an older kernel using unclustered inode buffers or a newer kernel
> -	 * running with a different inode cluster size.  Regardless, if the
> -	 * the inode buffer size isn't max(blocksize, inode_cluster_size)
> -	 * for *our* value of inode_cluster_size, then we need to keep
> -	 * the buffer out of the buffer cache so that the buffer won't
> -	 * overlap with future reads of those inodes.
> -	 */
> -	if (XFS_DINODE_MAGIC ==
> -	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
> -	    (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
> -		xfs_buf_stale(bp);
> -		error = xfs_bwrite(bp);
> -	} else {
> -		ASSERT(bp->b_mount == mp);
> -		bp->b_iodone = xlog_recover_iodone;
> -		xfs_buf_delwri_queue(bp, buffer_list);
> -	}
> -
> -out_release:
> -	xfs_buf_relse(bp);
> -	return error;
> -cancelled:
> -	trace_xfs_log_recover_buf_cancel(log, buf_f);
> -	return 0;
> -}
> -
>  /*
>   * Inode fork owner changes
>   *
> @@ -3846,10 +3061,11 @@ xlog_recover_commit_pass2(
>  {
>  	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
>  
> +	if (item->ri_ops && item->ri_ops->commit_pass2)
> +		return item->ri_ops->commit_pass2(log, buffer_list, item,
> +				trans->r_lsn);
> +
>  	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_BUF:
> -		return xlog_recover_buffer_pass2(log, buffer_list, item,
> -						 trans->r_lsn);
>  	case XFS_LI_INODE:
>  		return xlog_recover_inode_pass2(log, buffer_list, item,
>  						 trans->r_lsn);
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 06/28] xfs: refactor log recovery inode item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 06/28] xfs: refactor log recovery inode " Darrick J. Wong
@ 2020-05-05  5:09   ` Chandan Babu R
  2020-05-06 15:10   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  5:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:10 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the log inode item pass2 commit code into the per-item source code
> files and use the dispatch function to call it.  We do these one at a
> time because there's a lot of code to move.  No functional changes.
>

Inode item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_inode_item_recover.c |  355 +++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_recover.c        |  355 ---------------------------------------
>  2 files changed, 355 insertions(+), 355 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
> index a132cacd8d48..2bdba612aa71 100644
> --- a/fs/xfs/xfs_inode_item_recover.c
> +++ b/fs/xfs/xfs_inode_item_recover.c
> @@ -20,6 +20,8 @@
>  #include "xfs_error.h"
>  #include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
> +#include "xfs_icache.h"
> +#include "xfs_bmap_btree.h"
>  
>  STATIC void
>  xlog_recover_inode_ra_pass2(
> @@ -39,7 +41,360 @@ xlog_recover_inode_ra_pass2(
>  	}
>  }
>  
> +/*
> + * Inode fork owner changes
> + *
> + * If we have been told that we have to reparent the inode fork, it's because an
> + * extent swap operation on a CRC enabled filesystem has been done and we are
> + * replaying it. We need to walk the BMBT of the appropriate fork and change the
> + * owners of it.
> + *
> + * The complexity here is that we don't have an inode context to work with, so
> + * after we've replayed the inode we need to instantiate one.  This is where the
> + * fun begins.
> + *
> + * We are in the middle of log recovery, so we can't run transactions. That
> + * means we cannot use cache coherent inode instantiation via xfs_iget(), as
> + * that will result in the corresponding iput() running the inode through
> + * xfs_inactive(). If we've just replayed an inode core that changes the link
> + * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
> + * transactions (bad!).
> + *
> + * So, to avoid this, we instantiate an inode directly from the inode core we've
> + * just recovered. We have the buffer still locked, and all we really need to
> + * instantiate is the inode core and the forks being modified. We can do this
> + * manually, then run the inode btree owner change, and then tear down the
> + * xfs_inode without having to run any transactions at all.
> + *
> + * Also, because we don't have a transaction context available here but need to
> + * gather all the buffers we modify for writeback so we pass the buffer_list
> + * instead for the operation to use.
> + */
> +
> +STATIC int
> +xfs_recover_inode_owner_change(
> +	struct xfs_mount	*mp,
> +	struct xfs_dinode	*dip,
> +	struct xfs_inode_log_format *in_f,
> +	struct list_head	*buffer_list)
> +{
> +	struct xfs_inode	*ip;
> +	int			error;
> +
> +	ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
> +
> +	ip = xfs_inode_alloc(mp, in_f->ilf_ino);
> +	if (!ip)
> +		return -ENOMEM;
> +
> +	/* instantiate the inode */
> +	ASSERT(dip->di_version >= 3);
> +	xfs_inode_from_disk(ip, dip);
> +
> +	error = xfs_iformat_fork(ip, dip);
> +	if (error)
> +		goto out_free_ip;
> +
> +	if (!xfs_inode_verify_forks(ip)) {
> +		error = -EFSCORRUPTED;
> +		goto out_free_ip;
> +	}
> +
> +	if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
> +		ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
> +		error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
> +					      ip->i_ino, buffer_list);
> +		if (error)
> +			goto out_free_ip;
> +	}
> +
> +	if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
> +		ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
> +		error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
> +					      ip->i_ino, buffer_list);
> +		if (error)
> +			goto out_free_ip;
> +	}
> +
> +out_free_ip:
> +	xfs_inode_free(ip);
> +	return error;
> +}
> +
> +STATIC int
> +xlog_recover_inode_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			current_lsn)
> +{
> +	struct xfs_inode_log_format	*in_f;
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_buf			*bp;
> +	struct xfs_dinode		*dip;
> +	int				len;
> +	char				*src;
> +	char				*dest;
> +	int				error;
> +	int				attr_index;
> +	uint				fields;
> +	struct xfs_log_dinode		*ldip;
> +	uint				isize;
> +	int				need_free = 0;
> +
> +	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
> +		in_f = item->ri_buf[0].i_addr;
> +	} else {
> +		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
> +		need_free = 1;
> +		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
> +		if (error)
> +			goto error;
> +	}
> +
> +	/*
> +	 * Inode buffers can be freed, look out for it,
> +	 * and do not replay the inode.
> +	 */
> +	if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
> +		error = 0;
> +		trace_xfs_log_recover_inode_cancel(log, in_f);
> +		goto error;
> +	}
> +	trace_xfs_log_recover_inode_recover(log, in_f);
> +
> +	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
> +			0, &bp, &xfs_inode_buf_ops);
> +	if (error)
> +		goto error;
> +	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
> +	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
> +
> +	/*
> +	 * Make sure the place we're flushing out to really looks
> +	 * like an inode!
> +	 */
> +	if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
> +		xfs_alert(mp,
> +	"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
> +			__func__, dip, bp, in_f->ilf_ino);
> +		error = -EFSCORRUPTED;
> +		goto out_release;
> +	}
> +	ldip = item->ri_buf[1].i_addr;
> +	if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
> +		xfs_alert(mp,
> +			"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
> +			__func__, item, in_f->ilf_ino);
> +		error = -EFSCORRUPTED;
> +		goto out_release;
> +	}
> +
> +	/*
> +	 * If the inode has an LSN in it, recover the inode only if it's less
> +	 * than the lsn of the transaction we are replaying. Note: we still
> +	 * need to replay an owner change even though the inode is more recent
> +	 * than the transaction as there is no guarantee that all the btree
> +	 * blocks are more recent than this transaction, too.
> +	 */
> +	if (dip->di_version >= 3) {
> +		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
> +
> +		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> +			trace_xfs_log_recover_inode_skip(log, in_f);
> +			error = 0;
> +			goto out_owner_change;
> +		}
> +	}
> +
> +	/*
> +	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
> +	 * are transactional and if ordering is necessary we can determine that
> +	 * more accurately by the LSN field in the V3 inode core. Don't trust
> +	 * the inode versions we might be changing them here - use the
> +	 * superblock flag to determine whether we need to look at di_flushiter
> +	 * to skip replay when the on disk inode is newer than the log one
> +	 */
> +	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
> +	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
> +		/*
> +		 * Deal with the wrap case, DI_MAX_FLUSH is less
> +		 * than smaller numbers
> +		 */
> +		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
> +		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
> +			/* do nothing */
> +		} else {
> +			trace_xfs_log_recover_inode_skip(log, in_f);
> +			error = 0;
> +			goto out_release;
> +		}
> +	}
> +
> +	/* Take the opportunity to reset the flush iteration count */
> +	ldip->di_flushiter = 0;
> +
> +	if (unlikely(S_ISREG(ldip->di_mode))) {
> +		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
> +		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
> +			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
> +					 XFS_ERRLEVEL_LOW, mp, ldip,
> +					 sizeof(*ldip));
> +			xfs_alert(mp,
> +		"%s: Bad regular inode log record, rec ptr "PTR_FMT", "
> +		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
> +				__func__, item, dip, bp, in_f->ilf_ino);
> +			error = -EFSCORRUPTED;
> +			goto out_release;
> +		}
> +	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
> +		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
> +		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
> +		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
> +			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
> +					     XFS_ERRLEVEL_LOW, mp, ldip,
> +					     sizeof(*ldip));
> +			xfs_alert(mp,
> +		"%s: Bad dir inode log record, rec ptr "PTR_FMT", "
> +		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
> +				__func__, item, dip, bp, in_f->ilf_ino);
> +			error = -EFSCORRUPTED;
> +			goto out_release;
> +		}
> +	}
> +	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
> +				     XFS_ERRLEVEL_LOW, mp, ldip,
> +				     sizeof(*ldip));
> +		xfs_alert(mp,
> +	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
> +	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
> +			__func__, item, dip, bp, in_f->ilf_ino,
> +			ldip->di_nextents + ldip->di_anextents,
> +			ldip->di_nblocks);
> +		error = -EFSCORRUPTED;
> +		goto out_release;
> +	}
> +	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
> +				     XFS_ERRLEVEL_LOW, mp, ldip,
> +				     sizeof(*ldip));
> +		xfs_alert(mp,
> +	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
> +	"dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
> +			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
> +		error = -EFSCORRUPTED;
> +		goto out_release;
> +	}
> +	isize = xfs_log_dinode_size(mp);
> +	if (unlikely(item->ri_buf[1].i_len > isize)) {
> +		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
> +				     XFS_ERRLEVEL_LOW, mp, ldip,
> +				     sizeof(*ldip));
> +		xfs_alert(mp,
> +			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
> +			__func__, item->ri_buf[1].i_len, item);
> +		error = -EFSCORRUPTED;
> +		goto out_release;
> +	}
> +
> +	/* recover the log dinode inode into the on disk inode */
> +	xfs_log_dinode_to_disk(ldip, dip);
> +
> +	fields = in_f->ilf_fields;
> +	if (fields & XFS_ILOG_DEV)
> +		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
> +
> +	if (in_f->ilf_size == 2)
> +		goto out_owner_change;
> +	len = item->ri_buf[2].i_len;
> +	src = item->ri_buf[2].i_addr;
> +	ASSERT(in_f->ilf_size <= 4);
> +	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
> +	ASSERT(!(fields & XFS_ILOG_DFORK) ||
> +	       (len == in_f->ilf_dsize));
> +
> +	switch (fields & XFS_ILOG_DFORK) {
> +	case XFS_ILOG_DDATA:
> +	case XFS_ILOG_DEXT:
> +		memcpy(XFS_DFORK_DPTR(dip), src, len);
> +		break;
> +
> +	case XFS_ILOG_DBROOT:
> +		xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
> +				 (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
> +				 XFS_DFORK_DSIZE(dip, mp));
> +		break;
> +
> +	default:
> +		/*
> +		 * There are no data fork flags set.
> +		 */
> +		ASSERT((fields & XFS_ILOG_DFORK) == 0);
> +		break;
> +	}
> +
> +	/*
> +	 * If we logged any attribute data, recover it.  There may or
> +	 * may not have been any other non-core data logged in this
> +	 * transaction.
> +	 */
> +	if (in_f->ilf_fields & XFS_ILOG_AFORK) {
> +		if (in_f->ilf_fields & XFS_ILOG_DFORK) {
> +			attr_index = 3;
> +		} else {
> +			attr_index = 2;
> +		}
> +		len = item->ri_buf[attr_index].i_len;
> +		src = item->ri_buf[attr_index].i_addr;
> +		ASSERT(len == in_f->ilf_asize);
> +
> +		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
> +		case XFS_ILOG_ADATA:
> +		case XFS_ILOG_AEXT:
> +			dest = XFS_DFORK_APTR(dip);
> +			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
> +			memcpy(dest, src, len);
> +			break;
> +
> +		case XFS_ILOG_ABROOT:
> +			dest = XFS_DFORK_APTR(dip);
> +			xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
> +					 len, (struct xfs_bmdr_block *)dest,
> +					 XFS_DFORK_ASIZE(dip, mp));
> +			break;
> +
> +		default:
> +			xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
> +			ASSERT(0);
> +			error = -EFSCORRUPTED;
> +			goto out_release;
> +		}
> +	}
> +
> +out_owner_change:
> +	/* Recover the swapext owner change unless inode has been deleted */
> +	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
> +	    (dip->di_mode != 0))
> +		error = xfs_recover_inode_owner_change(mp, dip, in_f,
> +						       buffer_list);
> +	/* re-generate the checksum. */
> +	xfs_dinode_calc_crc(log->l_mp, dip);
> +
> +	ASSERT(bp->b_mount == mp);
> +	bp->b_iodone = xlog_recover_iodone;
> +	xfs_buf_delwri_queue(bp, buffer_list);
> +
> +out_release:
> +	xfs_buf_relse(bp);
> +error:
> +	if (need_free)
> +		kmem_free(in_f);
> +	return error;
> +}
> +
>  const struct xlog_recover_item_ops xlog_inode_item_ops = {
>  	.item_type		= XFS_LI_INODE,
>  	.ra_pass2		= xlog_recover_inode_ra_pass2,
> +	.commit_pass2		= xlog_recover_inode_commit_pass2,
>  };
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index d65dc3895a62..cb5902550e8c 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2034,358 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * Inode fork owner changes
> - *
> - * If we have been told that we have to reparent the inode fork, it's because an
> - * extent swap operation on a CRC enabled filesystem has been done and we are
> - * replaying it. We need to walk the BMBT of the appropriate fork and change the
> - * owners of it.
> - *
> - * The complexity here is that we don't have an inode context to work with, so
> - * after we've replayed the inode we need to instantiate one.  This is where the
> - * fun begins.
> - *
> - * We are in the middle of log recovery, so we can't run transactions. That
> - * means we cannot use cache coherent inode instantiation via xfs_iget(), as
> - * that will result in the corresponding iput() running the inode through
> - * xfs_inactive(). If we've just replayed an inode core that changes the link
> - * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
> - * transactions (bad!).
> - *
> - * So, to avoid this, we instantiate an inode directly from the inode core we've
> - * just recovered. We have the buffer still locked, and all we really need to
> - * instantiate is the inode core and the forks being modified. We can do this
> - * manually, then run the inode btree owner change, and then tear down the
> - * xfs_inode without having to run any transactions at all.
> - *
> - * Also, because we don't have a transaction context available here but need to
> - * gather all the buffers we modify for writeback so we pass the buffer_list
> - * instead for the operation to use.
> - */
> -
> -STATIC int
> -xfs_recover_inode_owner_change(
> -	struct xfs_mount	*mp,
> -	struct xfs_dinode	*dip,
> -	struct xfs_inode_log_format *in_f,
> -	struct list_head	*buffer_list)
> -{
> -	struct xfs_inode	*ip;
> -	int			error;
> -
> -	ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
> -
> -	ip = xfs_inode_alloc(mp, in_f->ilf_ino);
> -	if (!ip)
> -		return -ENOMEM;
> -
> -	/* instantiate the inode */
> -	ASSERT(dip->di_version >= 3);
> -	xfs_inode_from_disk(ip, dip);
> -
> -	error = xfs_iformat_fork(ip, dip);
> -	if (error)
> -		goto out_free_ip;
> -
> -	if (!xfs_inode_verify_forks(ip)) {
> -		error = -EFSCORRUPTED;
> -		goto out_free_ip;
> -	}
> -
> -	if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
> -		ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
> -		error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
> -					      ip->i_ino, buffer_list);
> -		if (error)
> -			goto out_free_ip;
> -	}
> -
> -	if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
> -		ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
> -		error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
> -					      ip->i_ino, buffer_list);
> -		if (error)
> -			goto out_free_ip;
> -	}
> -
> -out_free_ip:
> -	xfs_inode_free(ip);
> -	return error;
> -}
> -
> -STATIC int
> -xlog_recover_inode_pass2(
> -	struct xlog			*log,
> -	struct list_head		*buffer_list,
> -	struct xlog_recover_item	*item,
> -	xfs_lsn_t			current_lsn)
> -{
> -	struct xfs_inode_log_format	*in_f;
> -	xfs_mount_t		*mp = log->l_mp;
> -	xfs_buf_t		*bp;
> -	xfs_dinode_t		*dip;
> -	int			len;
> -	char			*src;
> -	char			*dest;
> -	int			error;
> -	int			attr_index;
> -	uint			fields;
> -	struct xfs_log_dinode	*ldip;
> -	uint			isize;
> -	int			need_free = 0;
> -
> -	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
> -		in_f = item->ri_buf[0].i_addr;
> -	} else {
> -		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
> -		need_free = 1;
> -		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
> -		if (error)
> -			goto error;
> -	}
> -
> -	/*
> -	 * Inode buffers can be freed, look out for it,
> -	 * and do not replay the inode.
> -	 */
> -	if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
> -		error = 0;
> -		trace_xfs_log_recover_inode_cancel(log, in_f);
> -		goto error;
> -	}
> -	trace_xfs_log_recover_inode_recover(log, in_f);
> -
> -	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
> -			0, &bp, &xfs_inode_buf_ops);
> -	if (error)
> -		goto error;
> -	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
> -	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
> -
> -	/*
> -	 * Make sure the place we're flushing out to really looks
> -	 * like an inode!
> -	 */
> -	if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
> -		xfs_alert(mp,
> -	"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
> -			__func__, dip, bp, in_f->ilf_ino);
> -		error = -EFSCORRUPTED;
> -		goto out_release;
> -	}
> -	ldip = item->ri_buf[1].i_addr;
> -	if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
> -		xfs_alert(mp,
> -			"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
> -			__func__, item, in_f->ilf_ino);
> -		error = -EFSCORRUPTED;
> -		goto out_release;
> -	}
> -
> -	/*
> -	 * If the inode has an LSN in it, recover the inode only if it's less
> -	 * than the lsn of the transaction we are replaying. Note: we still
> -	 * need to replay an owner change even though the inode is more recent
> -	 * than the transaction as there is no guarantee that all the btree
> -	 * blocks are more recent than this transaction, too.
> -	 */
> -	if (dip->di_version >= 3) {
> -		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
> -
> -		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> -			trace_xfs_log_recover_inode_skip(log, in_f);
> -			error = 0;
> -			goto out_owner_change;
> -		}
> -	}
> -
> -	/*
> -	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
> -	 * are transactional and if ordering is necessary we can determine that
> -	 * more accurately by the LSN field in the V3 inode core. Don't trust
> -	 * the inode versions we might be changing them here - use the
> -	 * superblock flag to determine whether we need to look at di_flushiter
> -	 * to skip replay when the on disk inode is newer than the log one
> -	 */
> -	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
> -	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
> -		/*
> -		 * Deal with the wrap case, DI_MAX_FLUSH is less
> -		 * than smaller numbers
> -		 */
> -		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
> -		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
> -			/* do nothing */
> -		} else {
> -			trace_xfs_log_recover_inode_skip(log, in_f);
> -			error = 0;
> -			goto out_release;
> -		}
> -	}
> -
> -	/* Take the opportunity to reset the flush iteration count */
> -	ldip->di_flushiter = 0;
> -
> -	if (unlikely(S_ISREG(ldip->di_mode))) {
> -		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
> -		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
> -			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
> -					 XFS_ERRLEVEL_LOW, mp, ldip,
> -					 sizeof(*ldip));
> -			xfs_alert(mp,
> -		"%s: Bad regular inode log record, rec ptr "PTR_FMT", "
> -		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
> -				__func__, item, dip, bp, in_f->ilf_ino);
> -			error = -EFSCORRUPTED;
> -			goto out_release;
> -		}
> -	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
> -		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
> -		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
> -		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
> -			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
> -					     XFS_ERRLEVEL_LOW, mp, ldip,
> -					     sizeof(*ldip));
> -			xfs_alert(mp,
> -		"%s: Bad dir inode log record, rec ptr "PTR_FMT", "
> -		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
> -				__func__, item, dip, bp, in_f->ilf_ino);
> -			error = -EFSCORRUPTED;
> -			goto out_release;
> -		}
> -	}
> -	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
> -				     XFS_ERRLEVEL_LOW, mp, ldip,
> -				     sizeof(*ldip));
> -		xfs_alert(mp,
> -	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
> -	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
> -			__func__, item, dip, bp, in_f->ilf_ino,
> -			ldip->di_nextents + ldip->di_anextents,
> -			ldip->di_nblocks);
> -		error = -EFSCORRUPTED;
> -		goto out_release;
> -	}
> -	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
> -				     XFS_ERRLEVEL_LOW, mp, ldip,
> -				     sizeof(*ldip));
> -		xfs_alert(mp,
> -	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
> -	"dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
> -			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
> -		error = -EFSCORRUPTED;
> -		goto out_release;
> -	}
> -	isize = xfs_log_dinode_size(mp);
> -	if (unlikely(item->ri_buf[1].i_len > isize)) {
> -		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
> -				     XFS_ERRLEVEL_LOW, mp, ldip,
> -				     sizeof(*ldip));
> -		xfs_alert(mp,
> -			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
> -			__func__, item->ri_buf[1].i_len, item);
> -		error = -EFSCORRUPTED;
> -		goto out_release;
> -	}
> -
> -	/* recover the log dinode inode into the on disk inode */
> -	xfs_log_dinode_to_disk(ldip, dip);
> -
> -	fields = in_f->ilf_fields;
> -	if (fields & XFS_ILOG_DEV)
> -		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
> -
> -	if (in_f->ilf_size == 2)
> -		goto out_owner_change;
> -	len = item->ri_buf[2].i_len;
> -	src = item->ri_buf[2].i_addr;
> -	ASSERT(in_f->ilf_size <= 4);
> -	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
> -	ASSERT(!(fields & XFS_ILOG_DFORK) ||
> -	       (len == in_f->ilf_dsize));
> -
> -	switch (fields & XFS_ILOG_DFORK) {
> -	case XFS_ILOG_DDATA:
> -	case XFS_ILOG_DEXT:
> -		memcpy(XFS_DFORK_DPTR(dip), src, len);
> -		break;
> -
> -	case XFS_ILOG_DBROOT:
> -		xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
> -				 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
> -				 XFS_DFORK_DSIZE(dip, mp));
> -		break;
> -
> -	default:
> -		/*
> -		 * There are no data fork flags set.
> -		 */
> -		ASSERT((fields & XFS_ILOG_DFORK) == 0);
> -		break;
> -	}
> -
> -	/*
> -	 * If we logged any attribute data, recover it.  There may or
> -	 * may not have been any other non-core data logged in this
> -	 * transaction.
> -	 */
> -	if (in_f->ilf_fields & XFS_ILOG_AFORK) {
> -		if (in_f->ilf_fields & XFS_ILOG_DFORK) {
> -			attr_index = 3;
> -		} else {
> -			attr_index = 2;
> -		}
> -		len = item->ri_buf[attr_index].i_len;
> -		src = item->ri_buf[attr_index].i_addr;
> -		ASSERT(len == in_f->ilf_asize);
> -
> -		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
> -		case XFS_ILOG_ADATA:
> -		case XFS_ILOG_AEXT:
> -			dest = XFS_DFORK_APTR(dip);
> -			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
> -			memcpy(dest, src, len);
> -			break;
> -
> -		case XFS_ILOG_ABROOT:
> -			dest = XFS_DFORK_APTR(dip);
> -			xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
> -					 len, (xfs_bmdr_block_t*)dest,
> -					 XFS_DFORK_ASIZE(dip, mp));
> -			break;
> -
> -		default:
> -			xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
> -			ASSERT(0);
> -			error = -EFSCORRUPTED;
> -			goto out_release;
> -		}
> -	}
> -
> -out_owner_change:
> -	/* Recover the swapext owner change unless inode has been deleted */
> -	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
> -	    (dip->di_mode != 0))
> -		error = xfs_recover_inode_owner_change(mp, dip, in_f,
> -						       buffer_list);
> -	/* re-generate the checksum. */
> -	xfs_dinode_calc_crc(log->l_mp, dip);
> -
> -	ASSERT(bp->b_mount == mp);
> -	bp->b_iodone = xlog_recover_iodone;
> -	xfs_buf_delwri_queue(bp, buffer_list);
> -
> -out_release:
> -	xfs_buf_relse(bp);
> -error:
> -	if (need_free)
> -		kmem_free(in_f);
> -	return error;
> -}
> -
>  /*
>   * Recover a dquot record
>   */
> @@ -3066,9 +2714,6 @@ xlog_recover_commit_pass2(
>  				trans->r_lsn);
>  
>  	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_INODE:
> -		return xlog_recover_inode_pass2(log, buffer_list, item,
> -						 trans->r_lsn);
>  	case XFS_LI_EFI:
>  		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
>  	case XFS_LI_EFD:
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 07/28] xfs: refactor log recovery dquot item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 07/28] xfs: refactor log recovery dquot " Darrick J. Wong
@ 2020-05-05  5:13   ` Chandan Babu R
  2020-05-06 15:11   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  5:13 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:16 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the log dquot item pass2 commit code into the per-item source code
> files and use the dispatch function to call it.  We do these one at a
> time because there's a lot of code to move.  No functional changes.
>

Dquot item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_dquot_item_recover.c |  109 ++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_recover.c        |  112 ---------------------------------------
>  2 files changed, 109 insertions(+), 112 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
> index ebc44c1bc2b1..07ff943972a3 100644
> --- a/fs/xfs/xfs_dquot_item_recover.c
> +++ b/fs/xfs/xfs_dquot_item_recover.c
> @@ -53,9 +53,118 @@ xlog_recover_dquot_ra_pass2(
>  			&xfs_dquot_buf_ra_ops);
>  }
>  
> +/*
> + * Recover a dquot record
> + */
> +STATIC int
> +xlog_recover_dquot_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			current_lsn)
> +{
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_buf			*bp;
> +	struct xfs_disk_dquot		*ddq, *recddq;
> +	struct xfs_dq_logformat		*dq_f;
> +	xfs_failaddr_t			fa;
> +	int				error;
> +	uint				type;
> +
> +	/*
> +	 * Filesystems are required to send in quota flags at mount time.
> +	 */
> +	if (mp->m_qflags == 0)
> +		return 0;
> +
> +	recddq = item->ri_buf[1].i_addr;
> +	if (recddq == NULL) {
> +		xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
> +		return -EFSCORRUPTED;
> +	}
> +	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
> +		xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
> +			item->ri_buf[1].i_len, __func__);
> +		return -EFSCORRUPTED;
> +	}
> +
> +	/*
> +	 * This type of quotas was turned off, so ignore this record.
> +	 */
> +	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
> +	ASSERT(type);
> +	if (log->l_quotaoffs_flag & type)
> +		return 0;
> +
> +	/*
> +	 * At this point we know that quota was _not_ turned off.
> +	 * Since the mount flags are not indicating to us otherwise, this
> +	 * must mean that quota is on, and the dquot needs to be replayed.
> +	 * Remember that we may not have fully recovered the superblock yet,
> +	 * so we can't do the usual trick of looking at the SB quota bits.
> +	 *
> +	 * The other possibility, of course, is that the quota subsystem was
> +	 * removed since the last mount - ENOSYS.
> +	 */
> +	dq_f = item->ri_buf[0].i_addr;
> +	ASSERT(dq_f);
> +	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
> +	if (fa) {
> +		xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
> +				dq_f->qlf_id, fa);
> +		return -EFSCORRUPTED;
> +	}
> +	ASSERT(dq_f->qlf_len == 1);
> +
> +	/*
> +	 * At this point we are assuming that the dquots have been allocated
> +	 * and hence the buffer has valid dquots stamped in it. It should,
> +	 * therefore, pass verifier validation. If the dquot is bad, then the
> +	 * we'll return an error here, so we don't need to specifically check
> +	 * the dquot in the buffer after the verifier has run.
> +	 */
> +	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
> +				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
> +				   &xfs_dquot_buf_ops);
> +	if (error)
> +		return error;
> +
> +	ASSERT(bp);
> +	ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
> +
> +	/*
> +	 * If the dquot has an LSN in it, recover the dquot only if it's less
> +	 * than the lsn of the transaction we are replaying.
> +	 */
> +	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
> +		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
> +
> +		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> +			goto out_release;
> +		}
> +	}
> +
> +	memcpy(ddq, recddq, item->ri_buf[1].i_len);
> +	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
> +				 XFS_DQUOT_CRC_OFF);
> +	}
> +
> +	ASSERT(dq_f->qlf_size == 2);
> +	ASSERT(bp->b_mount == mp);
> +	bp->b_iodone = xlog_recover_iodone;
> +	xfs_buf_delwri_queue(bp, buffer_list);
> +
> +out_release:
> +	xfs_buf_relse(bp);
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_dquot_item_ops = {
>  	.item_type		= XFS_LI_DQUOT,
>  	.ra_pass2		= xlog_recover_dquot_ra_pass2,
> +	.commit_pass2		= xlog_recover_dquot_commit_pass2,
>  };
>  
>  /*
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index cb5902550e8c..ea2a53b614c7 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2034,115 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * Recover a dquot record
> - */
> -STATIC int
> -xlog_recover_dquot_pass2(
> -	struct xlog			*log,
> -	struct list_head		*buffer_list,
> -	struct xlog_recover_item	*item,
> -	xfs_lsn_t			current_lsn)
> -{
> -	xfs_mount_t		*mp = log->l_mp;
> -	xfs_buf_t		*bp;
> -	struct xfs_disk_dquot	*ddq, *recddq;
> -	xfs_failaddr_t		fa;
> -	int			error;
> -	xfs_dq_logformat_t	*dq_f;
> -	uint			type;
> -
> -
> -	/*
> -	 * Filesystems are required to send in quota flags at mount time.
> -	 */
> -	if (mp->m_qflags == 0)
> -		return 0;
> -
> -	recddq = item->ri_buf[1].i_addr;
> -	if (recddq == NULL) {
> -		xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
> -		return -EFSCORRUPTED;
> -	}
> -	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
> -		xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
> -			item->ri_buf[1].i_len, __func__);
> -		return -EFSCORRUPTED;
> -	}
> -
> -	/*
> -	 * This type of quotas was turned off, so ignore this record.
> -	 */
> -	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
> -	ASSERT(type);
> -	if (log->l_quotaoffs_flag & type)
> -		return 0;
> -
> -	/*
> -	 * At this point we know that quota was _not_ turned off.
> -	 * Since the mount flags are not indicating to us otherwise, this
> -	 * must mean that quota is on, and the dquot needs to be replayed.
> -	 * Remember that we may not have fully recovered the superblock yet,
> -	 * so we can't do the usual trick of looking at the SB quota bits.
> -	 *
> -	 * The other possibility, of course, is that the quota subsystem was
> -	 * removed since the last mount - ENOSYS.
> -	 */
> -	dq_f = item->ri_buf[0].i_addr;
> -	ASSERT(dq_f);
> -	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
> -	if (fa) {
> -		xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
> -				dq_f->qlf_id, fa);
> -		return -EFSCORRUPTED;
> -	}
> -	ASSERT(dq_f->qlf_len == 1);
> -
> -	/*
> -	 * At this point we are assuming that the dquots have been allocated
> -	 * and hence the buffer has valid dquots stamped in it. It should,
> -	 * therefore, pass verifier validation. If the dquot is bad, then the
> -	 * we'll return an error here, so we don't need to specifically check
> -	 * the dquot in the buffer after the verifier has run.
> -	 */
> -	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
> -				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
> -				   &xfs_dquot_buf_ops);
> -	if (error)
> -		return error;
> -
> -	ASSERT(bp);
> -	ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
> -
> -	/*
> -	 * If the dquot has an LSN in it, recover the dquot only if it's less
> -	 * than the lsn of the transaction we are replaying.
> -	 */
> -	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> -		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
> -		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
> -
> -		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
> -			goto out_release;
> -		}
> -	}
> -
> -	memcpy(ddq, recddq, item->ri_buf[1].i_len);
> -	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> -		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
> -				 XFS_DQUOT_CRC_OFF);
> -	}
> -
> -	ASSERT(dq_f->qlf_size == 2);
> -	ASSERT(bp->b_mount == mp);
> -	bp->b_iodone = xlog_recover_iodone;
> -	xfs_buf_delwri_queue(bp, buffer_list);
> -
> -out_release:
> -	xfs_buf_relse(bp);
> -	return 0;
> -}
> -
>  /*
>   * This routine is called to create an in-core extent free intent
>   * item from the efi format structure which was logged on disk.
> @@ -2730,9 +2621,6 @@ xlog_recover_commit_pass2(
>  		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
>  	case XFS_LI_BUD:
>  		return xlog_recover_bud_pass2(log, item);
> -	case XFS_LI_DQUOT:
> -		return xlog_recover_dquot_pass2(log, buffer_list, item,
> -						trans->r_lsn);
>  	case XFS_LI_ICREATE:
>  		return xlog_recover_do_icreate_pass2(log, buffer_list, item);
>  	case XFS_LI_QUOTAOFF:
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 08/28] xfs: refactor log recovery icreate item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 08/28] xfs: refactor log recovery icreate " Darrick J. Wong
@ 2020-05-05  6:10   ` Chandan Babu R
  2020-05-06 15:11   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  6:10 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:22 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the log icreate item pass2 commit code into the per-item source code
> files and use the dispatch function to call it.  We do these one at a
> time because there's a lot of code to move.  No functional changes.
>

icreate item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_icreate_item.c |  132 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_recover.c  |  126 -------------------------------------------
>  2 files changed, 132 insertions(+), 126 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
> index 366c1e722a29..287a9e5c7d75 100644
> --- a/fs/xfs/xfs_icreate_item.c
> +++ b/fs/xfs/xfs_icreate_item.c
> @@ -6,13 +6,19 @@
>  #include "xfs.h"
>  #include "xfs_fs.h"
>  #include "xfs_shared.h"
> +#include "xfs_format.h"
>  #include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_inode.h"
>  #include "xfs_trans.h"
>  #include "xfs_trans_priv.h"
>  #include "xfs_icreate_item.h"
>  #include "xfs_log.h"
>  #include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
> +#include "xfs_ialloc.h"
> +#include "xfs_trace.h"
>  
>  kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */
>  
> @@ -123,7 +129,133 @@ xlog_recover_icreate_reorder(
>  	return XLOG_REORDER_BUFFER_LIST;
>  }
>  
> +/*
> + * This routine is called when an inode create format structure is found in a
> + * committed transaction in the log.  It's purpose is to initialise the inodes
> + * being allocated on disk. This requires us to get inode cluster buffers that
> + * match the range to be initialised, stamped with inode templates and written
> + * by delayed write so that subsequent modifications will hit the cached buffer
> + * and only need writing out at the end of recovery.
> + */
> +STATIC int
> +xlog_recover_icreate_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_icreate_log		*icl;
> +	struct xfs_ino_geometry		*igeo = M_IGEO(mp);
> +	xfs_agnumber_t			agno;
> +	xfs_agblock_t			agbno;
> +	unsigned int			count;
> +	unsigned int			isize;
> +	xfs_agblock_t			length;
> +	int				bb_per_cluster;
> +	int				cancel_count;
> +	int				nbufs;
> +	int				i;
> +
> +	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
> +	if (icl->icl_type != XFS_LI_ICREATE) {
> +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
> +		return -EINVAL;
> +	}
> +
> +	if (icl->icl_size != 1) {
> +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
> +		return -EINVAL;
> +	}
> +
> +	agno = be32_to_cpu(icl->icl_ag);
> +	if (agno >= mp->m_sb.sb_agcount) {
> +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
> +		return -EINVAL;
> +	}
> +	agbno = be32_to_cpu(icl->icl_agbno);
> +	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
> +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
> +		return -EINVAL;
> +	}
> +	isize = be32_to_cpu(icl->icl_isize);
> +	if (isize != mp->m_sb.sb_inodesize) {
> +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
> +		return -EINVAL;
> +	}
> +	count = be32_to_cpu(icl->icl_count);
> +	if (!count) {
> +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
> +		return -EINVAL;
> +	}
> +	length = be32_to_cpu(icl->icl_length);
> +	if (!length || length >= mp->m_sb.sb_agblocks) {
> +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * The inode chunk is either full or sparse and we only support
> +	 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
> +	 */
> +	if (length != igeo->ialloc_blks &&
> +	    length != igeo->ialloc_min_blks) {
> +		xfs_warn(log->l_mp,
> +			 "%s: unsupported chunk length", __FUNCTION__);
> +		return -EINVAL;
> +	}
> +
> +	/* verify inode count is consistent with extent length */
> +	if ((count >> mp->m_sb.sb_inopblog) != length) {
> +		xfs_warn(log->l_mp,
> +			 "%s: inconsistent inode count and chunk length",
> +			 __FUNCTION__);
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * The icreate transaction can cover multiple cluster buffers and these
> +	 * buffers could have been freed and reused. Check the individual
> +	 * buffers for cancellation so we don't overwrite anything written after
> +	 * a cancellation.
> +	 */
> +	bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
> +	nbufs = length / igeo->blocks_per_cluster;
> +	for (i = 0, cancel_count = 0; i < nbufs; i++) {
> +		xfs_daddr_t	daddr;
> +
> +		daddr = XFS_AGB_TO_DADDR(mp, agno,
> +				agbno + i * igeo->blocks_per_cluster);
> +		if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster))
> +			cancel_count++;
> +	}
> +
> +	/*
> +	 * We currently only use icreate for a single allocation at a time. This
> +	 * means we should expect either all or none of the buffers to be
> +	 * cancelled. Be conservative and skip replay if at least one buffer is
> +	 * cancelled, but warn the user that something is awry if the buffers
> +	 * are not consistent.
> +	 *
> +	 * XXX: This must be refined to only skip cancelled clusters once we use
> +	 * icreate for multiple chunk allocations.
> +	 */
> +	ASSERT(!cancel_count || cancel_count == nbufs);
> +	if (cancel_count) {
> +		if (cancel_count != nbufs)
> +			xfs_warn(mp,
> +	"WARNING: partial inode chunk cancellation, skipped icreate.");
> +		trace_xfs_log_recover_icreate_cancel(log, icl);
> +		return 0;
> +	}
> +
> +	trace_xfs_log_recover_icreate_recover(log, icl);
> +	return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
> +				     length, be32_to_cpu(icl->icl_gen));
> +}
> +
>  const struct xlog_recover_item_ops xlog_icreate_item_ops = {
>  	.item_type		= XFS_LI_ICREATE,
>  	.reorder		= xlog_recover_icreate_reorder,
> +	.commit_pass2		= xlog_recover_icreate_commit_pass2,
>  };
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index ea2a53b614c7..86bf2da28dcd 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2467,130 +2467,6 @@ xlog_recover_bud_pass2(
>  	return 0;
>  }
>  
> -/*
> - * This routine is called when an inode create format structure is found in a
> - * committed transaction in the log.  It's purpose is to initialise the inodes
> - * being allocated on disk. This requires us to get inode cluster buffers that
> - * match the range to be initialised, stamped with inode templates and written
> - * by delayed write so that subsequent modifications will hit the cached buffer
> - * and only need writing out at the end of recovery.
> - */
> -STATIC int
> -xlog_recover_do_icreate_pass2(
> -	struct xlog		*log,
> -	struct list_head	*buffer_list,
> -	struct xlog_recover_item *item)
> -{
> -	struct xfs_mount	*mp = log->l_mp;
> -	struct xfs_icreate_log	*icl;
> -	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
> -	xfs_agnumber_t		agno;
> -	xfs_agblock_t		agbno;
> -	unsigned int		count;
> -	unsigned int		isize;
> -	xfs_agblock_t		length;
> -	int			bb_per_cluster;
> -	int			cancel_count;
> -	int			nbufs;
> -	int			i;
> -
> -	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
> -	if (icl->icl_type != XFS_LI_ICREATE) {
> -		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
> -		return -EINVAL;
> -	}
> -
> -	if (icl->icl_size != 1) {
> -		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
> -		return -EINVAL;
> -	}
> -
> -	agno = be32_to_cpu(icl->icl_ag);
> -	if (agno >= mp->m_sb.sb_agcount) {
> -		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
> -		return -EINVAL;
> -	}
> -	agbno = be32_to_cpu(icl->icl_agbno);
> -	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
> -		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
> -		return -EINVAL;
> -	}
> -	isize = be32_to_cpu(icl->icl_isize);
> -	if (isize != mp->m_sb.sb_inodesize) {
> -		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
> -		return -EINVAL;
> -	}
> -	count = be32_to_cpu(icl->icl_count);
> -	if (!count) {
> -		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
> -		return -EINVAL;
> -	}
> -	length = be32_to_cpu(icl->icl_length);
> -	if (!length || length >= mp->m_sb.sb_agblocks) {
> -		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
> -		return -EINVAL;
> -	}
> -
> -	/*
> -	 * The inode chunk is either full or sparse and we only support
> -	 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
> -	 */
> -	if (length != igeo->ialloc_blks &&
> -	    length != igeo->ialloc_min_blks) {
> -		xfs_warn(log->l_mp,
> -			 "%s: unsupported chunk length", __FUNCTION__);
> -		return -EINVAL;
> -	}
> -
> -	/* verify inode count is consistent with extent length */
> -	if ((count >> mp->m_sb.sb_inopblog) != length) {
> -		xfs_warn(log->l_mp,
> -			 "%s: inconsistent inode count and chunk length",
> -			 __FUNCTION__);
> -		return -EINVAL;
> -	}
> -
> -	/*
> -	 * The icreate transaction can cover multiple cluster buffers and these
> -	 * buffers could have been freed and reused. Check the individual
> -	 * buffers for cancellation so we don't overwrite anything written after
> -	 * a cancellation.
> -	 */
> -	bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
> -	nbufs = length / igeo->blocks_per_cluster;
> -	for (i = 0, cancel_count = 0; i < nbufs; i++) {
> -		xfs_daddr_t	daddr;
> -
> -		daddr = XFS_AGB_TO_DADDR(mp, agno,
> -				agbno + i * igeo->blocks_per_cluster);
> -		if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster))
> -			cancel_count++;
> -	}
> -
> -	/*
> -	 * We currently only use icreate for a single allocation at a time. This
> -	 * means we should expect either all or none of the buffers to be
> -	 * cancelled. Be conservative and skip replay if at least one buffer is
> -	 * cancelled, but warn the user that something is awry if the buffers
> -	 * are not consistent.
> -	 *
> -	 * XXX: This must be refined to only skip cancelled clusters once we use
> -	 * icreate for multiple chunk allocations.
> -	 */
> -	ASSERT(!cancel_count || cancel_count == nbufs);
> -	if (cancel_count) {
> -		if (cancel_count != nbufs)
> -			xfs_warn(mp,
> -	"WARNING: partial inode chunk cancellation, skipped icreate.");
> -		trace_xfs_log_recover_icreate_cancel(log, icl);
> -		return 0;
> -	}
> -
> -	trace_xfs_log_recover_icreate_recover(log, icl);
> -	return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
> -				     length, be32_to_cpu(icl->icl_gen));
> -}
> -
>  STATIC int
>  xlog_recover_commit_pass2(
>  	struct xlog			*log,
> @@ -2621,8 +2497,6 @@ xlog_recover_commit_pass2(
>  		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
>  	case XFS_LI_BUD:
>  		return xlog_recover_bud_pass2(log, item);
> -	case XFS_LI_ICREATE:
> -		return xlog_recover_do_icreate_pass2(log, buffer_list, item);
>  	case XFS_LI_QUOTAOFF:
>  		/* nothing to do in pass2 */
>  		return 0;
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 09/28] xfs: refactor log recovery EFI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 09/28] xfs: refactor log recovery EFI " Darrick J. Wong
@ 2020-05-05  6:46   ` Chandan Babu R
  2020-05-06 15:12   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  6:46 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:29 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the extent free intent and intent-done pass2 commit code into the
> per-item source code files and use dispatch functions to call them.  We
> do these one at a time because there's a lot of code to move.  No
> functional changes.
>

EFI/EFD item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_extfree_item.c |  107 ++++++++++++++++++++++++++++++++++++++++++++-
>  fs/xfs/xfs_extfree_item.h |    4 --
>  fs/xfs/xfs_log_recover.c  |  100 ------------------------------------------
>  3 files changed, 104 insertions(+), 107 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index b43bb087aef3..dca098660753 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -22,6 +22,7 @@
>  #include "xfs_bmap.h"
>  #include "xfs_trace.h"
>  #include "xfs_error.h"
> +#include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_efi_zone;
> @@ -32,7 +33,7 @@ static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
>  	return container_of(lip, struct xfs_efi_log_item, efi_item);
>  }
>  
> -void
> +STATIC void
>  xfs_efi_item_free(
>  	struct xfs_efi_log_item	*efip)
>  {
> @@ -151,7 +152,7 @@ static const struct xfs_item_ops xfs_efi_item_ops = {
>  /*
>   * Allocate and initialize an efi item with the given number of extents.
>   */
> -struct xfs_efi_log_item *
> +STATIC struct xfs_efi_log_item *
>  xfs_efi_init(
>  	struct xfs_mount	*mp,
>  	uint			nextents)
> @@ -185,7 +186,7 @@ xfs_efi_init(
>   * one of which will be the native format for this kernel.
>   * It will handle the conversion of formats if necessary.
>   */
> -int
> +STATIC int
>  xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
>  {
>  	xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
> @@ -646,10 +647,110 @@ xfs_efi_recover(
>  	return error;
>  }
>  
> +/*
> + * This routine is called to create an in-core extent free intent
> + * item from the efi format structure which was logged on disk.
> + * It allocates an in-core efi, copies the extents from the format
> + * structure into it, and adds the efi to the AIL with the given
> + * LSN.
> + */
> +STATIC int
> +xlog_recover_extfree_intent_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_efi_log_item		*efip;
> +	struct xfs_efi_log_format	*efi_formatp;
> +	int				error;
> +
> +	efi_formatp = item->ri_buf[0].i_addr;
> +
> +	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
> +	error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
> +	if (error) {
> +		xfs_efi_item_free(efip);
> +		return error;
> +	}
> +	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
> +
> +	spin_lock(&log->l_ailp->ail_lock);
> +	/*
> +	 * The EFI has two references. One for the EFD and one for EFI to ensure
> +	 * it makes it into the AIL. Insert the EFI into the AIL directly and
> +	 * drop the EFI reference. Note that xfs_trans_ail_update() drops the
> +	 * AIL lock.
> +	 */
> +	xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
> +	xfs_efi_release(efip);
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_extfree_intent_item_ops = {
>  	.item_type		= XFS_LI_EFI,
> +	.commit_pass2		= xlog_recover_extfree_intent_commit_pass2,
>  };
>  
> +/*
> + * This routine is called when an EFD format structure is found in a committed
> + * transaction in the log. Its purpose is to cancel the corresponding EFI if it
> + * was still in the log. To do this it searches the AIL for the EFI with an id
> + * equal to that in the EFD format structure. If we find it we drop the EFD
> + * reference, which removes the EFI from the AIL and frees it.
> + */
> +STATIC int
> +xlog_recover_extfree_done_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	struct xfs_ail_cursor		cur;
> +	struct xfs_efd_log_format	*efd_formatp;
> +	struct xfs_efi_log_item		*efip = NULL;
> +	struct xfs_log_item		*lip;
> +	struct xfs_ail			*ailp = log->l_ailp;
> +	uint64_t			efi_id;
> +
> +	efd_formatp = item->ri_buf[0].i_addr;
> +	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
> +		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
> +	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
> +		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
> +	efi_id = efd_formatp->efd_efi_id;
> +
> +	/*
> +	 * Search for the EFI with the id in the EFD format structure in the
> +	 * AIL.
> +	 */
> +	spin_lock(&ailp->ail_lock);
> +	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> +	while (lip != NULL) {
> +		if (lip->li_type == XFS_LI_EFI) {
> +			efip = (struct xfs_efi_log_item *)lip;
> +			if (efip->efi_format.efi_id == efi_id) {
> +				/*
> +				 * Drop the EFD reference to the EFI. This
> +				 * removes the EFI from the AIL and frees it.
> +				 */
> +				spin_unlock(&ailp->ail_lock);
> +				xfs_efi_release(efip);
> +				spin_lock(&ailp->ail_lock);
> +				break;
> +			}
> +		}
> +		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> +	}
> +
> +	xfs_trans_ail_cursor_done(&cur);
> +	spin_unlock(&ailp->ail_lock);
> +
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_extfree_done_item_ops = {
>  	.item_type		= XFS_LI_EFD,
> +	.commit_pass2		= xlog_recover_extfree_done_commit_pass2,
>  };
> diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
> index a2a736a77fa9..876e3d237f48 100644
> --- a/fs/xfs/xfs_extfree_item.h
> +++ b/fs/xfs/xfs_extfree_item.h
> @@ -78,10 +78,6 @@ struct xfs_efd_log_item {
>  extern struct kmem_zone	*xfs_efi_zone;
>  extern struct kmem_zone	*xfs_efd_zone;
>  
> -struct xfs_efi_log_item	*xfs_efi_init(struct xfs_mount *, uint);
> -int			xfs_efi_copy_format(xfs_log_iovec_t *buf,
> -					    xfs_efi_log_format_t *dst_efi_fmt);
> -void			xfs_efi_item_free(struct xfs_efi_log_item *);
>  void			xfs_efi_release(struct xfs_efi_log_item *);
>  
>  int			xfs_efi_recover(struct xfs_mount *mp,
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 86bf2da28dcd..d7c5f75cf992 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2034,102 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * This routine is called to create an in-core extent free intent
> - * item from the efi format structure which was logged on disk.
> - * It allocates an in-core efi, copies the extents from the format
> - * structure into it, and adds the efi to the AIL with the given
> - * LSN.
> - */
> -STATIC int
> -xlog_recover_efi_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item,
> -	xfs_lsn_t			lsn)
> -{
> -	int				error;
> -	struct xfs_mount		*mp = log->l_mp;
> -	struct xfs_efi_log_item		*efip;
> -	struct xfs_efi_log_format	*efi_formatp;
> -
> -	efi_formatp = item->ri_buf[0].i_addr;
> -
> -	efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
> -	error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
> -	if (error) {
> -		xfs_efi_item_free(efip);
> -		return error;
> -	}
> -	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
> -
> -	spin_lock(&log->l_ailp->ail_lock);
> -	/*
> -	 * The EFI has two references. One for the EFD and one for EFI to ensure
> -	 * it makes it into the AIL. Insert the EFI into the AIL directly and
> -	 * drop the EFI reference. Note that xfs_trans_ail_update() drops the
> -	 * AIL lock.
> -	 */
> -	xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
> -	xfs_efi_release(efip);
> -	return 0;
> -}
> -
> -
> -/*
> - * This routine is called when an EFD format structure is found in a committed
> - * transaction in the log. Its purpose is to cancel the corresponding EFI if it
> - * was still in the log. To do this it searches the AIL for the EFI with an id
> - * equal to that in the EFD format structure. If we find it we drop the EFD
> - * reference, which removes the EFI from the AIL and frees it.
> - */
> -STATIC int
> -xlog_recover_efd_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	xfs_efd_log_format_t	*efd_formatp;
> -	struct xfs_efi_log_item	*efip = NULL;
> -	struct xfs_log_item	*lip;
> -	uint64_t		efi_id;
> -	struct xfs_ail_cursor	cur;
> -	struct xfs_ail		*ailp = log->l_ailp;
> -
> -	efd_formatp = item->ri_buf[0].i_addr;
> -	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
> -		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
> -	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
> -		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
> -	efi_id = efd_formatp->efd_efi_id;
> -
> -	/*
> -	 * Search for the EFI with the id in the EFD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_EFI) {
> -			efip = (struct xfs_efi_log_item *)lip;
> -			if (efip->efi_format.efi_id == efi_id) {
> -				/*
> -				 * Drop the EFD reference to the EFI. This
> -				 * removes the EFI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_efi_release(efip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
> -
> -	return 0;
> -}
> -
>  /*
>   * This routine is called to create an in-core extent rmap update
>   * item from the rui format structure which was logged on disk.
> @@ -2481,10 +2385,6 @@ xlog_recover_commit_pass2(
>  				trans->r_lsn);
>  
>  	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_EFI:
> -		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
> -	case XFS_LI_EFD:
> -		return xlog_recover_efd_pass2(log, item);
>  	case XFS_LI_RUI:
>  		return xlog_recover_rui_pass2(log, item, trans->r_lsn);
>  	case XFS_LI_RUD:
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 10/28] xfs: refactor log recovery RUI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 10/28] xfs: refactor log recovery RUI " Darrick J. Wong
@ 2020-05-05  7:02   ` Chandan Babu R
  2020-05-06 15:12   ` Christoph Hellwig
  2020-05-06 15:13   ` Christoph Hellwig
  2 siblings, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  7:02 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:35 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the rmap update intent and intent-done pass2 commit code into the
> per-item source code files and use dispatch functions to call them.  We
> do these one at a time because there's a lot of code to move.  No
> functional changes.
>

RUI/RUD item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_log_recover.c |   97 -------------------------------------------
>  fs/xfs/xfs_rmap_item.c   |  104 +++++++++++++++++++++++++++++++++++++++++++++-
>  fs/xfs/xfs_rmap_item.h   |    4 --
>  3 files changed, 101 insertions(+), 104 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index d7c5f75cf992..0c0ce7bfc30e 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2034,99 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * This routine is called to create an in-core extent rmap update
> - * item from the rui format structure which was logged on disk.
> - * It allocates an in-core rui, copies the extents from the format
> - * structure into it, and adds the rui to the AIL with the given
> - * LSN.
> - */
> -STATIC int
> -xlog_recover_rui_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item,
> -	xfs_lsn_t			lsn)
> -{
> -	int				error;
> -	struct xfs_mount		*mp = log->l_mp;
> -	struct xfs_rui_log_item		*ruip;
> -	struct xfs_rui_log_format	*rui_formatp;
> -
> -	rui_formatp = item->ri_buf[0].i_addr;
> -
> -	ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
> -	error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
> -	if (error) {
> -		xfs_rui_item_free(ruip);
> -		return error;
> -	}
> -	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
> -
> -	spin_lock(&log->l_ailp->ail_lock);
> -	/*
> -	 * The RUI has two references. One for the RUD and one for RUI to ensure
> -	 * it makes it into the AIL. Insert the RUI into the AIL directly and
> -	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
> -	 * AIL lock.
> -	 */
> -	xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
> -	xfs_rui_release(ruip);
> -	return 0;
> -}
> -
> -
> -/*
> - * This routine is called when an RUD format structure is found in a committed
> - * transaction in the log. Its purpose is to cancel the corresponding RUI if it
> - * was still in the log. To do this it searches the AIL for the RUI with an id
> - * equal to that in the RUD format structure. If we find it we drop the RUD
> - * reference, which removes the RUI from the AIL and frees it.
> - */
> -STATIC int
> -xlog_recover_rud_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	struct xfs_rud_log_format	*rud_formatp;
> -	struct xfs_rui_log_item		*ruip = NULL;
> -	struct xfs_log_item		*lip;
> -	uint64_t			rui_id;
> -	struct xfs_ail_cursor		cur;
> -	struct xfs_ail			*ailp = log->l_ailp;
> -
> -	rud_formatp = item->ri_buf[0].i_addr;
> -	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
> -	rui_id = rud_formatp->rud_rui_id;
> -
> -	/*
> -	 * Search for the RUI with the id in the RUD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_RUI) {
> -			ruip = (struct xfs_rui_log_item *)lip;
> -			if (ruip->rui_format.rui_id == rui_id) {
> -				/*
> -				 * Drop the RUD reference to the RUI. This
> -				 * removes the RUI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_rui_release(ruip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
> -
> -	return 0;
> -}
> -
>  /*
>   * Copy an CUI format buffer from the given buf, and into the destination
>   * CUI format structure.  The CUI/CUD items were designed not to need any
> @@ -2385,10 +2292,6 @@ xlog_recover_commit_pass2(
>  				trans->r_lsn);
>  
>  	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_RUI:
> -		return xlog_recover_rui_pass2(log, item, trans->r_lsn);
> -	case XFS_LI_RUD:
> -		return xlog_recover_rud_pass2(log, item);
>  	case XFS_LI_CUI:
>  		return xlog_recover_cui_pass2(log, item, trans->r_lsn);
>  	case XFS_LI_CUD:
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index 3eb538674cb9..c87f4e429c12 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -18,6 +18,7 @@
>  #include "xfs_log.h"
>  #include "xfs_rmap.h"
>  #include "xfs_error.h"
> +#include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_rui_zone;
> @@ -28,7 +29,7 @@ static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
>  	return container_of(lip, struct xfs_rui_log_item, rui_item);
>  }
>  
> -void
> +STATIC void
>  xfs_rui_item_free(
>  	struct xfs_rui_log_item	*ruip)
>  {
> @@ -133,7 +134,7 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
>  /*
>   * Allocate and initialize an rui item with the given number of extents.
>   */
> -struct xfs_rui_log_item *
> +STATIC struct xfs_rui_log_item *
>  xfs_rui_init(
>  	struct xfs_mount		*mp,
>  	uint				nextents)
> @@ -161,7 +162,7 @@ xfs_rui_init(
>   * RUI format structure.  The RUI/RUD items were designed not to need any
>   * special alignment handling.
>   */
> -int
> +STATIC int
>  xfs_rui_copy_format(
>  	struct xfs_log_iovec		*buf,
>  	struct xfs_rui_log_format	*dst_rui_fmt)
> @@ -587,10 +588,107 @@ xfs_rui_recover(
>  	return error;
>  }
>  
> +/*
> + * This routine is called to create an in-core extent rmap update
> + * item from the rui format structure which was logged on disk.
> + * It allocates an in-core rui, copies the extents from the format
> + * structure into it, and adds the rui to the AIL with the given
> + * LSN.
> + */
> +STATIC int
> +xlog_recover_rmap_intent_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	int				error;
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_rui_log_item		*ruip;
> +	struct xfs_rui_log_format	*rui_formatp;
> +
> +	rui_formatp = item->ri_buf[0].i_addr;
> +
> +	ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
> +	error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
> +	if (error) {
> +		xfs_rui_item_free(ruip);
> +		return error;
> +	}
> +	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
> +
> +	spin_lock(&log->l_ailp->ail_lock);
> +	/*
> +	 * The RUI has two references. One for the RUD and one for RUI to ensure
> +	 * it makes it into the AIL. Insert the RUI into the AIL directly and
> +	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
> +	 * AIL lock.
> +	 */
> +	xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
> +	xfs_rui_release(ruip);
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_rmap_intent_item_ops = {
>  	.item_type		= XFS_LI_RUI,
> +	.commit_pass2		= xlog_recover_rmap_intent_commit_pass2,
>  };
>  
> +/*
> + * This routine is called when an RUD format structure is found in a committed
> + * transaction in the log. Its purpose is to cancel the corresponding RUI if it
> + * was still in the log. To do this it searches the AIL for the RUI with an id
> + * equal to that in the RUD format structure. If we find it we drop the RUD
> + * reference, which removes the RUI from the AIL and frees it.
> + */
> +STATIC int
> +xlog_recover_rmap_done_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	struct xfs_rud_log_format	*rud_formatp;
> +	struct xfs_rui_log_item		*ruip = NULL;
> +	struct xfs_log_item		*lip;
> +	uint64_t			rui_id;
> +	struct xfs_ail_cursor		cur;
> +	struct xfs_ail			*ailp = log->l_ailp;
> +
> +	rud_formatp = item->ri_buf[0].i_addr;
> +	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
> +	rui_id = rud_formatp->rud_rui_id;
> +
> +	/*
> +	 * Search for the RUI with the id in the RUD format structure in the
> +	 * AIL.
> +	 */
> +	spin_lock(&ailp->ail_lock);
> +	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> +	while (lip != NULL) {
> +		if (lip->li_type == XFS_LI_RUI) {
> +			ruip = (struct xfs_rui_log_item *)lip;
> +			if (ruip->rui_format.rui_id == rui_id) {
> +				/*
> +				 * Drop the RUD reference to the RUI. This
> +				 * removes the RUI from the AIL and frees it.
> +				 */
> +				spin_unlock(&ailp->ail_lock);
> +				xfs_rui_release(ruip);
> +				spin_lock(&ailp->ail_lock);
> +				break;
> +			}
> +		}
> +		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> +	}
> +
> +	xfs_trans_ail_cursor_done(&cur);
> +	spin_unlock(&ailp->ail_lock);
> +
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_rmap_done_item_ops = {
>  	.item_type		= XFS_LI_RUD,
> +	.commit_pass2		= xlog_recover_rmap_done_commit_pass2,
>  };
> diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
> index 8708e4a5aa5c..89bd192779f8 100644
> --- a/fs/xfs/xfs_rmap_item.h
> +++ b/fs/xfs/xfs_rmap_item.h
> @@ -77,10 +77,6 @@ struct xfs_rud_log_item {
>  extern struct kmem_zone	*xfs_rui_zone;
>  extern struct kmem_zone	*xfs_rud_zone;
>  
> -struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
> -int xfs_rui_copy_format(struct xfs_log_iovec *buf,
> -		struct xfs_rui_log_format *dst_rui_fmt);
> -void xfs_rui_item_free(struct xfs_rui_log_item *);
>  void xfs_rui_release(struct xfs_rui_log_item *);
>  int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
>  
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 11/28] xfs: refactor log recovery CUI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 11/28] xfs: refactor log recovery CUI " Darrick J. Wong
@ 2020-05-05  7:06   ` Chandan Babu R
  2020-05-06 15:13   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  7:06 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:41 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the refcount update intent and intent-done pass2 commit code into
> the per-item source code files and use dispatch functions to call them.
> We do these one at a time because there's a lot of code to move.  No
> functional changes.
>

CUI/CUD item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_log_recover.c   |  124 ------------------------------------------
>  fs/xfs/xfs_refcount_item.c |  129 +++++++++++++++++++++++++++++++++++++++++++-
>  fs/xfs/xfs_refcount_item.h |    2 -
>  3 files changed, 127 insertions(+), 128 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 0c0ce7bfc30e..23008b7cf93c 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2034,126 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * Copy an CUI format buffer from the given buf, and into the destination
> - * CUI format structure.  The CUI/CUD items were designed not to need any
> - * special alignment handling.
> - */
> -static int
> -xfs_cui_copy_format(
> -	struct xfs_log_iovec		*buf,
> -	struct xfs_cui_log_format	*dst_cui_fmt)
> -{
> -	struct xfs_cui_log_format	*src_cui_fmt;
> -	uint				len;
> -
> -	src_cui_fmt = buf->i_addr;
> -	len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
> -
> -	if (buf->i_len == len) {
> -		memcpy(dst_cui_fmt, src_cui_fmt, len);
> -		return 0;
> -	}
> -	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
> -	return -EFSCORRUPTED;
> -}
> -
> -/*
> - * This routine is called to create an in-core extent refcount update
> - * item from the cui format structure which was logged on disk.
> - * It allocates an in-core cui, copies the extents from the format
> - * structure into it, and adds the cui to the AIL with the given
> - * LSN.
> - */
> -STATIC int
> -xlog_recover_cui_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item,
> -	xfs_lsn_t			lsn)
> -{
> -	int				error;
> -	struct xfs_mount		*mp = log->l_mp;
> -	struct xfs_cui_log_item		*cuip;
> -	struct xfs_cui_log_format	*cui_formatp;
> -
> -	cui_formatp = item->ri_buf[0].i_addr;
> -
> -	cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
> -	error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
> -	if (error) {
> -		xfs_cui_item_free(cuip);
> -		return error;
> -	}
> -	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
> -
> -	spin_lock(&log->l_ailp->ail_lock);
> -	/*
> -	 * The CUI has two references. One for the CUD and one for CUI to ensure
> -	 * it makes it into the AIL. Insert the CUI into the AIL directly and
> -	 * drop the CUI reference. Note that xfs_trans_ail_update() drops the
> -	 * AIL lock.
> -	 */
> -	xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
> -	xfs_cui_release(cuip);
> -	return 0;
> -}
> -
> -
> -/*
> - * This routine is called when an CUD format structure is found in a committed
> - * transaction in the log. Its purpose is to cancel the corresponding CUI if it
> - * was still in the log. To do this it searches the AIL for the CUI with an id
> - * equal to that in the CUD format structure. If we find it we drop the CUD
> - * reference, which removes the CUI from the AIL and frees it.
> - */
> -STATIC int
> -xlog_recover_cud_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	struct xfs_cud_log_format	*cud_formatp;
> -	struct xfs_cui_log_item		*cuip = NULL;
> -	struct xfs_log_item		*lip;
> -	uint64_t			cui_id;
> -	struct xfs_ail_cursor		cur;
> -	struct xfs_ail			*ailp = log->l_ailp;
> -
> -	cud_formatp = item->ri_buf[0].i_addr;
> -	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
> -		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
> -		return -EFSCORRUPTED;
> -	}
> -	cui_id = cud_formatp->cud_cui_id;
> -
> -	/*
> -	 * Search for the CUI with the id in the CUD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_CUI) {
> -			cuip = (struct xfs_cui_log_item *)lip;
> -			if (cuip->cui_format.cui_id == cui_id) {
> -				/*
> -				 * Drop the CUD reference to the CUI. This
> -				 * removes the CUI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_cui_release(cuip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
> -
> -	return 0;
> -}
> -
>  /*
>   * Copy an BUI format buffer from the given buf, and into the destination
>   * BUI format structure.  The BUI/BUD items were designed not to need any
> @@ -2292,10 +2172,6 @@ xlog_recover_commit_pass2(
>  				trans->r_lsn);
>  
>  	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_CUI:
> -		return xlog_recover_cui_pass2(log, item, trans->r_lsn);
> -	case XFS_LI_CUD:
> -		return xlog_recover_cud_pass2(log, item);
>  	case XFS_LI_BUI:
>  		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
>  	case XFS_LI_BUD:
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index 0e8e8bab4344..28b41f5dd6bc 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -18,6 +18,7 @@
>  #include "xfs_log.h"
>  #include "xfs_refcount.h"
>  #include "xfs_error.h"
> +#include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_cui_zone;
> @@ -28,7 +29,7 @@ static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
>  	return container_of(lip, struct xfs_cui_log_item, cui_item);
>  }
>  
> -void
> +STATIC void
>  xfs_cui_item_free(
>  	struct xfs_cui_log_item	*cuip)
>  {
> @@ -134,7 +135,7 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
>  /*
>   * Allocate and initialize an cui item with the given number of extents.
>   */
> -struct xfs_cui_log_item *
> +STATIC struct xfs_cui_log_item *
>  xfs_cui_init(
>  	struct xfs_mount		*mp,
>  	uint				nextents)
> @@ -572,10 +573,134 @@ xfs_cui_recover(
>  	return error;
>  }
>  
> +/*
> + * Copy an CUI format buffer from the given buf, and into the destination
> + * CUI format structure.  The CUI/CUD items were designed not to need any
> + * special alignment handling.
> + */
> +static int
> +xfs_cui_copy_format(
> +	struct xfs_log_iovec		*buf,
> +	struct xfs_cui_log_format	*dst_cui_fmt)
> +{
> +	struct xfs_cui_log_format	*src_cui_fmt;
> +	uint				len;
> +
> +	src_cui_fmt = buf->i_addr;
> +	len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
> +
> +	if (buf->i_len == len) {
> +		memcpy(dst_cui_fmt, src_cui_fmt, len);
> +		return 0;
> +	}
> +	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
> +	return -EFSCORRUPTED;
> +}
> +
> +/*
> + * This routine is called to create an in-core extent refcount update
> + * item from the cui format structure which was logged on disk.
> + * It allocates an in-core cui, copies the extents from the format
> + * structure into it, and adds the cui to the AIL with the given
> + * LSN.
> + */
> +STATIC int
> +xlog_recover_refcount_intent_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	int				error;
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_cui_log_item		*cuip;
> +	struct xfs_cui_log_format	*cui_formatp;
> +
> +	cui_formatp = item->ri_buf[0].i_addr;
> +
> +	cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
> +	error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
> +	if (error) {
> +		xfs_cui_item_free(cuip);
> +		return error;
> +	}
> +	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
> +
> +	spin_lock(&log->l_ailp->ail_lock);
> +	/*
> +	 * The CUI has two references. One for the CUD and one for CUI to ensure
> +	 * it makes it into the AIL. Insert the CUI into the AIL directly and
> +	 * drop the CUI reference. Note that xfs_trans_ail_update() drops the
> +	 * AIL lock.
> +	 */
> +	xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
> +	xfs_cui_release(cuip);
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_refcount_intent_item_ops = {
>  	.item_type		= XFS_LI_CUI,
> +	.commit_pass2		= xlog_recover_refcount_intent_commit_pass2,
>  };
>  
> +/*
> + * This routine is called when an CUD format structure is found in a committed
> + * transaction in the log. Its purpose is to cancel the corresponding CUI if it
> + * was still in the log. To do this it searches the AIL for the CUI with an id
> + * equal to that in the CUD format structure. If we find it we drop the CUD
> + * reference, which removes the CUI from the AIL and frees it.
> + */
> +STATIC int
> +xlog_recover_refcount_done_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	struct xfs_cud_log_format	*cud_formatp;
> +	struct xfs_cui_log_item		*cuip = NULL;
> +	struct xfs_log_item		*lip;
> +	uint64_t			cui_id;
> +	struct xfs_ail_cursor		cur;
> +	struct xfs_ail			*ailp = log->l_ailp;
> +
> +	cud_formatp = item->ri_buf[0].i_addr;
> +	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
> +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
> +		return -EFSCORRUPTED;
> +	}
> +	cui_id = cud_formatp->cud_cui_id;
> +
> +	/*
> +	 * Search for the CUI with the id in the CUD format structure in the
> +	 * AIL.
> +	 */
> +	spin_lock(&ailp->ail_lock);
> +	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> +	while (lip != NULL) {
> +		if (lip->li_type == XFS_LI_CUI) {
> +			cuip = (struct xfs_cui_log_item *)lip;
> +			if (cuip->cui_format.cui_id == cui_id) {
> +				/*
> +				 * Drop the CUD reference to the CUI. This
> +				 * removes the CUI from the AIL and frees it.
> +				 */
> +				spin_unlock(&ailp->ail_lock);
> +				xfs_cui_release(cuip);
> +				spin_lock(&ailp->ail_lock);
> +				break;
> +			}
> +		}
> +		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> +	}
> +
> +	xfs_trans_ail_cursor_done(&cur);
> +	spin_unlock(&ailp->ail_lock);
> +
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_refcount_done_item_ops = {
>  	.item_type		= XFS_LI_CUD,
> +	.commit_pass2		= xlog_recover_refcount_done_commit_pass2,
>  };
> diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
> index e47530f30489..ebe12779eaac 100644
> --- a/fs/xfs/xfs_refcount_item.h
> +++ b/fs/xfs/xfs_refcount_item.h
> @@ -77,8 +77,6 @@ struct xfs_cud_log_item {
>  extern struct kmem_zone	*xfs_cui_zone;
>  extern struct kmem_zone	*xfs_cud_zone;
>  
> -struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
> -void xfs_cui_item_free(struct xfs_cui_log_item *);
>  void xfs_cui_release(struct xfs_cui_log_item *);
>  int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
>  
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 12/28] xfs: refactor log recovery BUI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 12/28] xfs: refactor log recovery BUI " Darrick J. Wong
@ 2020-05-05  7:14   ` Chandan Babu R
  2020-05-06 15:14   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  7:14 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:47 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the bmap update intent and intent-done pass2 commit code into the
> per-item source code files and use dispatch functions to call them.  We
> do these one at a time because there's a lot of code to move.  No
> functional changes.
>

BUI/BUD item pass2 processing is functionally consistent with what was done
before this patch is applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_bmap_item.c   |  133 +++++++++++++++++++++++++++++++++++++++++++++-
>  fs/xfs/xfs_bmap_item.h   |    2 -
>  fs/xfs/xfs_log_recover.c |  128 --------------------------------------------
>  3 files changed, 131 insertions(+), 132 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 42354403fec7..0fbebef69e26 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -22,6 +22,7 @@
>  #include "xfs_bmap_btree.h"
>  #include "xfs_trans_space.h"
>  #include "xfs_error.h"
> +#include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
>  
>  kmem_zone_t	*xfs_bui_zone;
> @@ -32,7 +33,7 @@ static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
>  	return container_of(lip, struct xfs_bui_log_item, bui_item);
>  }
>  
> -void
> +STATIC void
>  xfs_bui_item_free(
>  	struct xfs_bui_log_item	*buip)
>  {
> @@ -135,7 +136,7 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
>  /*
>   * Allocate and initialize an bui item with the given number of extents.
>   */
> -struct xfs_bui_log_item *
> +STATIC struct xfs_bui_log_item *
>  xfs_bui_init(
>  	struct xfs_mount		*mp)
>  
> @@ -559,10 +560,138 @@ xfs_bui_recover(
>  	return error;
>  }
>  
> +/*
> + * Copy an BUI format buffer from the given buf, and into the destination
> + * BUI format structure.  The BUI/BUD items were designed not to need any
> + * special alignment handling.
> + */
> +static int
> +xfs_bui_copy_format(
> +	struct xfs_log_iovec		*buf,
> +	struct xfs_bui_log_format	*dst_bui_fmt)
> +{
> +	struct xfs_bui_log_format	*src_bui_fmt;
> +	uint				len;
> +
> +	src_bui_fmt = buf->i_addr;
> +	len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
> +
> +	if (buf->i_len == len) {
> +		memcpy(dst_bui_fmt, src_bui_fmt, len);
> +		return 0;
> +	}
> +	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
> +	return -EFSCORRUPTED;
> +}
> +
> +/*
> + * This routine is called to create an in-core extent bmap update
> + * item from the bui format structure which was logged on disk.
> + * It allocates an in-core bui, copies the extents from the format
> + * structure into it, and adds the bui to the AIL with the given
> + * LSN.
> + */
> +STATIC int
> +xlog_recover_bmap_intent_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	int				error;
> +	struct xfs_mount		*mp = log->l_mp;
> +	struct xfs_bui_log_item		*buip;
> +	struct xfs_bui_log_format	*bui_formatp;
> +
> +	bui_formatp = item->ri_buf[0].i_addr;
> +
> +	if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
> +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
> +		return -EFSCORRUPTED;
> +	}
> +	buip = xfs_bui_init(mp);
> +	error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
> +	if (error) {
> +		xfs_bui_item_free(buip);
> +		return error;
> +	}
> +	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
> +
> +	spin_lock(&log->l_ailp->ail_lock);
> +	/*
> +	 * The RUI has two references. One for the RUD and one for RUI to ensure
> +	 * it makes it into the AIL. Insert the RUI into the AIL directly and
> +	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
> +	 * AIL lock.
> +	 */
> +	xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
> +	xfs_bui_release(buip);
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_bmap_intent_item_ops = {
>  	.item_type		= XFS_LI_BUI,
> +	.commit_pass2		= xlog_recover_bmap_intent_commit_pass2,
>  };
>  
> +/*
> + * This routine is called when an BUD format structure is found in a committed
> + * transaction in the log. Its purpose is to cancel the corresponding BUI if it
> + * was still in the log. To do this it searches the AIL for the BUI with an id
> + * equal to that in the BUD format structure. If we find it we drop the BUD
> + * reference, which removes the BUI from the AIL and frees it.
> + */
> +STATIC int
> +xlog_recover_bmap_done_commit_pass2(
> +	struct xlog			*log,
> +	struct list_head		*buffer_list,
> +	struct xlog_recover_item	*item,
> +	xfs_lsn_t			lsn)
> +{
> +	struct xfs_bud_log_format	*bud_formatp;
> +	struct xfs_bui_log_item		*buip = NULL;
> +	struct xfs_log_item		*lip;
> +	uint64_t			bui_id;
> +	struct xfs_ail_cursor		cur;
> +	struct xfs_ail			*ailp = log->l_ailp;
> +
> +	bud_formatp = item->ri_buf[0].i_addr;
> +	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
> +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
> +		return -EFSCORRUPTED;
> +	}
> +	bui_id = bud_formatp->bud_bui_id;
> +
> +	/*
> +	 * Search for the BUI with the id in the BUD format structure in the
> +	 * AIL.
> +	 */
> +	spin_lock(&ailp->ail_lock);
> +	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> +	while (lip != NULL) {
> +		if (lip->li_type == XFS_LI_BUI) {
> +			buip = (struct xfs_bui_log_item *)lip;
> +			if (buip->bui_format.bui_id == bui_id) {
> +				/*
> +				 * Drop the BUD reference to the BUI. This
> +				 * removes the BUI from the AIL and frees it.
> +				 */
> +				spin_unlock(&ailp->ail_lock);
> +				xfs_bui_release(buip);
> +				spin_lock(&ailp->ail_lock);
> +				break;
> +			}
> +		}
> +		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> +	}
> +
> +	xfs_trans_ail_cursor_done(&cur);
> +	spin_unlock(&ailp->ail_lock);
> +
> +	return 0;
> +}
> +
>  const struct xlog_recover_item_ops xlog_bmap_done_item_ops = {
>  	.item_type		= XFS_LI_BUD,
> +	.commit_pass2		= xlog_recover_bmap_done_commit_pass2,
>  };
> diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
> index ad479cc73de8..515b1d5d6ab7 100644
> --- a/fs/xfs/xfs_bmap_item.h
> +++ b/fs/xfs/xfs_bmap_item.h
> @@ -74,8 +74,6 @@ struct xfs_bud_log_item {
>  extern struct kmem_zone	*xfs_bui_zone;
>  extern struct kmem_zone	*xfs_bud_zone;
>  
> -struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
> -void xfs_bui_item_free(struct xfs_bui_log_item *);
>  void xfs_bui_release(struct xfs_bui_log_item *);
>  int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
>  
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 23008b7cf93c..a5158e9e0662 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2034,130 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -/*
> - * Copy an BUI format buffer from the given buf, and into the destination
> - * BUI format structure.  The BUI/BUD items were designed not to need any
> - * special alignment handling.
> - */
> -static int
> -xfs_bui_copy_format(
> -	struct xfs_log_iovec		*buf,
> -	struct xfs_bui_log_format	*dst_bui_fmt)
> -{
> -	struct xfs_bui_log_format	*src_bui_fmt;
> -	uint				len;
> -
> -	src_bui_fmt = buf->i_addr;
> -	len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
> -
> -	if (buf->i_len == len) {
> -		memcpy(dst_bui_fmt, src_bui_fmt, len);
> -		return 0;
> -	}
> -	XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
> -	return -EFSCORRUPTED;
> -}
> -
> -/*
> - * This routine is called to create an in-core extent bmap update
> - * item from the bui format structure which was logged on disk.
> - * It allocates an in-core bui, copies the extents from the format
> - * structure into it, and adds the bui to the AIL with the given
> - * LSN.
> - */
> -STATIC int
> -xlog_recover_bui_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item,
> -	xfs_lsn_t			lsn)
> -{
> -	int				error;
> -	struct xfs_mount		*mp = log->l_mp;
> -	struct xfs_bui_log_item		*buip;
> -	struct xfs_bui_log_format	*bui_formatp;
> -
> -	bui_formatp = item->ri_buf[0].i_addr;
> -
> -	if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
> -		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
> -		return -EFSCORRUPTED;
> -	}
> -	buip = xfs_bui_init(mp);
> -	error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
> -	if (error) {
> -		xfs_bui_item_free(buip);
> -		return error;
> -	}
> -	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
> -
> -	spin_lock(&log->l_ailp->ail_lock);
> -	/*
> -	 * The RUI has two references. One for the RUD and one for RUI to ensure
> -	 * it makes it into the AIL. Insert the RUI into the AIL directly and
> -	 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
> -	 * AIL lock.
> -	 */
> -	xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn);
> -	xfs_bui_release(buip);
> -	return 0;
> -}
> -
> -
> -/*
> - * This routine is called when an BUD format structure is found in a committed
> - * transaction in the log. Its purpose is to cancel the corresponding BUI if it
> - * was still in the log. To do this it searches the AIL for the BUI with an id
> - * equal to that in the BUD format structure. If we find it we drop the BUD
> - * reference, which removes the BUI from the AIL and frees it.
> - */
> -STATIC int
> -xlog_recover_bud_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover_item	*item)
> -{
> -	struct xfs_bud_log_format	*bud_formatp;
> -	struct xfs_bui_log_item		*buip = NULL;
> -	struct xfs_log_item		*lip;
> -	uint64_t			bui_id;
> -	struct xfs_ail_cursor		cur;
> -	struct xfs_ail			*ailp = log->l_ailp;
> -
> -	bud_formatp = item->ri_buf[0].i_addr;
> -	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
> -		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
> -		return -EFSCORRUPTED;
> -	}
> -	bui_id = bud_formatp->bud_bui_id;
> -
> -	/*
> -	 * Search for the BUI with the id in the BUD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_BUI) {
> -			buip = (struct xfs_bui_log_item *)lip;
> -			if (buip->bui_format.bui_id == bui_id) {
> -				/*
> -				 * Drop the BUD reference to the BUI. This
> -				 * removes the BUI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_bui_release(buip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
> -
> -	return 0;
> -}
> -
>  STATIC int
>  xlog_recover_commit_pass2(
>  	struct xlog			*log,
> @@ -2172,10 +2048,6 @@ xlog_recover_commit_pass2(
>  				trans->r_lsn);
>  
>  	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_BUI:
> -		return xlog_recover_bui_pass2(log, item, trans->r_lsn);
> -	case XFS_LI_BUD:
> -		return xlog_recover_bud_pass2(log, item);
>  	case XFS_LI_QUOTAOFF:
>  		/* nothing to do in pass2 */
>  		return 0;
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 13/28] xfs: remove log recovery quotaoff item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 13/28] xfs: remove log recovery quotaoff " Darrick J. Wong
@ 2020-05-05  7:32   ` Chandan Babu R
  2020-05-06 15:16   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  7:32 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:41:53 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Quotaoff doesn't actually do anything, so take advantage of the
> commit_pass2 pointer being optional and get rid of the switch
> statement clause.
>

If we did have an invalid item the check in xlog_recover_commit_trans() would
have caught it. Hence we don't require yet another invalid item type check.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_dquot_item_recover.c |    1 +
>  fs/xfs/xfs_log_recover.c        |   33 ++++++---------------------------
>  2 files changed, 7 insertions(+), 27 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
> index 07ff943972a3..a07c1c8344d8 100644
> --- a/fs/xfs/xfs_dquot_item_recover.c
> +++ b/fs/xfs/xfs_dquot_item_recover.c
> @@ -197,4 +197,5 @@ xlog_recover_quotaoff_commit_pass1(
>  const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
>  	.item_type		= XFS_LI_QUOTAOFF,
>  	.commit_pass1		= xlog_recover_quotaoff_commit_pass1,
> +	.commit_pass2		= NULL, /* nothing to do in pass2 */
>  };
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index a5158e9e0662..929e2caeeb42 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2034,31 +2034,6 @@ xlog_buf_readahead(
>  		xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
>  }
>  
> -STATIC int
> -xlog_recover_commit_pass2(
> -	struct xlog			*log,
> -	struct xlog_recover		*trans,
> -	struct list_head		*buffer_list,
> -	struct xlog_recover_item	*item)
> -{
> -	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
> -
> -	if (item->ri_ops && item->ri_ops->commit_pass2)
> -		return item->ri_ops->commit_pass2(log, buffer_list, item,
> -				trans->r_lsn);
> -
> -	switch (ITEM_TYPE(item)) {
> -	case XFS_LI_QUOTAOFF:
> -		/* nothing to do in pass2 */
> -		return 0;
> -	default:
> -		xfs_warn(log->l_mp, "%s: invalid item type (%d)",
> -			__func__, ITEM_TYPE(item));
> -		ASSERT(0);
> -		return -EFSCORRUPTED;
> -	}
> -}
> -
>  STATIC int
>  xlog_recover_items_pass2(
>  	struct xlog                     *log,
> @@ -2070,8 +2045,12 @@ xlog_recover_items_pass2(
>  	int				error = 0;
>  
>  	list_for_each_entry(item, item_list, ri_list) {
> -		error = xlog_recover_commit_pass2(log, trans,
> -					  buffer_list, item);
> +		trace_xfs_log_recover_item_recover(log, trans, item,
> +				XLOG_RECOVER_PASS2);
> +
> +		if (item->ri_ops->commit_pass2)
> +			error = item->ri_ops->commit_pass2(log, buffer_list,
> +					item, trans->r_lsn);
>  		if (error)
>  			return error;
>  	}
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 14/28] xfs: refactor recovered EFI log item playback
  2020-05-05  1:12 ` [PATCH 14/28] xfs: refactor recovered EFI log item playback Darrick J. Wong
@ 2020-05-05  9:03   ` Chandan Babu R
  2020-05-06 15:18   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  9:03 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:01 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the code that processes the log items created from the recovered
> log items into the per-item source code files and use dispatch functions
> to call them.  No functional changes.
>

EFI log item playback is consistent with what was done before the patch is
applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_extfree_item.c |   47 +++++++++++++++++++++++++++++++++++----------
>  fs/xfs/xfs_extfree_item.h |    5 -----
>  fs/xfs/xfs_log_recover.c  |   46 ++++----------------------------------------
>  fs/xfs/xfs_trans.h        |    1 +
>  4 files changed, 42 insertions(+), 57 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index dca098660753..3fc8a9864217 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -28,6 +28,8 @@
>  kmem_zone_t	*xfs_efi_zone;
>  kmem_zone_t	*xfs_efd_zone;
>  
> +static const struct xfs_item_ops xfs_efi_item_ops;
> +
>  static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
>  {
>  	return container_of(lip, struct xfs_efi_log_item, efi_item);
> @@ -51,7 +53,7 @@ xfs_efi_item_free(
>   * committed vs unpin operations in bulk insert operations. Hence the reference
>   * count to ensure only the last caller frees the EFI.
>   */
> -void
> +STATIC void
>  xfs_efi_release(
>  	struct xfs_efi_log_item	*efip)
>  {
> @@ -141,14 +143,6 @@ xfs_efi_item_release(
>  	xfs_efi_release(EFI_ITEM(lip));
>  }
>  
> -static const struct xfs_item_ops xfs_efi_item_ops = {
> -	.iop_size	= xfs_efi_item_size,
> -	.iop_format	= xfs_efi_item_format,
> -	.iop_unpin	= xfs_efi_item_unpin,
> -	.iop_release	= xfs_efi_item_release,
> -};
> -
> -
>  /*
>   * Allocate and initialize an efi item with the given number of extents.
>   */
> @@ -586,7 +580,7 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
>   * Process an extent free intent item that was recovered from
>   * the log.  We need to free the extents that it describes.
>   */
> -int
> +STATIC int
>  xfs_efi_recover(
>  	struct xfs_mount	*mp,
>  	struct xfs_efi_log_item	*efip)
> @@ -647,6 +641,39 @@ xfs_efi_recover(
>  	return error;
>  }
>  
> +/* Recover the EFI if necessary. */
> +STATIC int
> +xfs_efi_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*tp)
> +{
> +	struct xfs_ail			*ailp = lip->li_ailp;
> +	struct xfs_efi_log_item		*efip;
> +	int				error;
> +
> +	/*
> +	 * Skip EFIs that we've already processed.
> +	 */
> +	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
> +	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
> +		return 0;
> +
> +	spin_unlock(&ailp->ail_lock);
> +	error = xfs_efi_recover(tp->t_mountp, efip);
> +	spin_lock(&ailp->ail_lock);
> +
> +	return error;
> +}
> +
> +static const struct xfs_item_ops xfs_efi_item_ops = {
> +	.iop_size	= xfs_efi_item_size,
> +	.iop_format	= xfs_efi_item_format,
> +	.iop_unpin	= xfs_efi_item_unpin,
> +	.iop_release	= xfs_efi_item_release,
> +	.iop_recover	= xfs_efi_item_recover,
> +};
> +
> +
>  /*
>   * This routine is called to create an in-core extent free intent
>   * item from the efi format structure which was logged on disk.
> diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
> index 876e3d237f48..4b2c2c5c5985 100644
> --- a/fs/xfs/xfs_extfree_item.h
> +++ b/fs/xfs/xfs_extfree_item.h
> @@ -78,9 +78,4 @@ struct xfs_efd_log_item {
>  extern struct kmem_zone	*xfs_efi_zone;
>  extern struct kmem_zone	*xfs_efd_zone;
>  
> -void			xfs_efi_release(struct xfs_efi_log_item *);
> -
> -int			xfs_efi_recover(struct xfs_mount *mp,
> -					struct xfs_efi_log_item *efip);
> -
>  #endif	/* __XFS_EXTFREE_ITEM_H__ */
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 929e2caeeb42..f12e14719202 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2553,46 +2553,6 @@ xlog_recover_process_data(
>  	return 0;
>  }
>  
> -/* Recover the EFI if necessary. */
> -STATIC int
> -xlog_recover_process_efi(
> -	struct xfs_mount		*mp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_efi_log_item		*efip;
> -	int				error;
> -
> -	/*
> -	 * Skip EFIs that we've already processed.
> -	 */
> -	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
> -	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_efi_recover(mp, efip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
> -/* Release the EFI since we're cancelling everything. */
> -STATIC void
> -xlog_recover_cancel_efi(
> -	struct xfs_mount		*mp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_efi_log_item		*efip;
> -
> -	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
> -
> -	spin_unlock(&ailp->ail_lock);
> -	xfs_efi_release(efip);
> -	spin_lock(&ailp->ail_lock);
> -}
> -
>  /* Recover the RUI if necessary. */
>  STATIC int
>  xlog_recover_process_rui(
> @@ -2837,7 +2797,7 @@ xlog_recover_process_intents(
>  		 */
>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
> -			error = xlog_recover_process_efi(log->l_mp, ailp, lip);
> +			error = lip->li_ops->iop_recover(lip, parent_tp);
>  			break;
>  		case XFS_LI_RUI:
>  			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
> @@ -2893,7 +2853,9 @@ xlog_recover_cancel_intents(
>  
>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
> -			xlog_recover_cancel_efi(log->l_mp, ailp, lip);
> +			spin_unlock(&ailp->ail_lock);
> +			lip->li_ops->iop_release(lip);
> +			spin_lock(&ailp->ail_lock);
>  			break;
>  		case XFS_LI_RUI:
>  			xlog_recover_cancel_rui(log->l_mp, ailp, lip);
> diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
> index 752c7fef9de7..3f6a79108991 100644
> --- a/fs/xfs/xfs_trans.h
> +++ b/fs/xfs/xfs_trans.h
> @@ -77,6 +77,7 @@ struct xfs_item_ops {
>  	void (*iop_release)(struct xfs_log_item *);
>  	xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
>  	void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
> +	int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
>  };
>  
>  /*
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 15/28] xfs: refactor recovered RUI log item playback
  2020-05-05  1:12 ` [PATCH 15/28] xfs: refactor recovered RUI " Darrick J. Wong
@ 2020-05-05  9:10   ` Chandan Babu R
  2020-05-06 15:18   ` Christoph Hellwig
  2020-05-06 15:19   ` Christoph Hellwig
  2 siblings, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  9:10 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:07 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the code that processes the log items created from the recovered
> log items into the per-item source code files and use dispatch functions
> to call them.  No functional changes.
>

RUI log item playback is consistent with what was done before the patch is
applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_log_recover.c |   48 ++--------------------------------------------
>  fs/xfs/xfs_rmap_item.c   |   44 ++++++++++++++++++++++++++++++++++--------
>  fs/xfs/xfs_rmap_item.h   |    3 ---
>  3 files changed, 37 insertions(+), 58 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index f12e14719202..da66484acaa7 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2553,46 +2553,6 @@ xlog_recover_process_data(
>  	return 0;
>  }
>  
> -/* Recover the RUI if necessary. */
> -STATIC int
> -xlog_recover_process_rui(
> -	struct xfs_mount		*mp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_rui_log_item		*ruip;
> -	int				error;
> -
> -	/*
> -	 * Skip RUIs that we've already processed.
> -	 */
> -	ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
> -	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_rui_recover(mp, ruip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
> -/* Release the RUI since we're cancelling everything. */
> -STATIC void
> -xlog_recover_cancel_rui(
> -	struct xfs_mount		*mp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_rui_log_item		*ruip;
> -
> -	ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
> -
> -	spin_unlock(&ailp->ail_lock);
> -	xfs_rui_release(ruip);
> -	spin_lock(&ailp->ail_lock);
> -}
> -
>  /* Recover the CUI if necessary. */
>  STATIC int
>  xlog_recover_process_cui(
> @@ -2797,10 +2757,8 @@ xlog_recover_process_intents(
>  		 */
>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
> -			error = lip->li_ops->iop_recover(lip, parent_tp);
> -			break;
>  		case XFS_LI_RUI:
> -			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
> +			error = lip->li_ops->iop_recover(lip, parent_tp);
>  			break;
>  		case XFS_LI_CUI:
>  			error = xlog_recover_process_cui(parent_tp, ailp, lip);
> @@ -2853,13 +2811,11 @@ xlog_recover_cancel_intents(
>  
>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
> +		case XFS_LI_RUI:
>  			spin_unlock(&ailp->ail_lock);
>  			lip->li_ops->iop_release(lip);
>  			spin_lock(&ailp->ail_lock);
>  			break;
> -		case XFS_LI_RUI:
> -			xlog_recover_cancel_rui(log->l_mp, ailp, lip);
> -			break;
>  		case XFS_LI_CUI:
>  			xlog_recover_cancel_cui(log->l_mp, ailp, lip);
>  			break;
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index c87f4e429c12..e763dd8ed0a6 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -24,6 +24,8 @@
>  kmem_zone_t	*xfs_rui_zone;
>  kmem_zone_t	*xfs_rud_zone;
>  
> +static const struct xfs_item_ops xfs_rui_item_ops;
> +
>  static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
>  {
>  	return container_of(lip, struct xfs_rui_log_item, rui_item);
> @@ -46,7 +48,7 @@ xfs_rui_item_free(
>   * committed vs unpin operations in bulk insert operations. Hence the reference
>   * count to ensure only the last caller frees the RUI.
>   */
> -void
> +STATIC void
>  xfs_rui_release(
>  	struct xfs_rui_log_item	*ruip)
>  {
> @@ -124,13 +126,6 @@ xfs_rui_item_release(
>  	xfs_rui_release(RUI_ITEM(lip));
>  }
>  
> -static const struct xfs_item_ops xfs_rui_item_ops = {
> -	.iop_size	= xfs_rui_item_size,
> -	.iop_format	= xfs_rui_item_format,
> -	.iop_unpin	= xfs_rui_item_unpin,
> -	.iop_release	= xfs_rui_item_release,
> -};
> -
>  /*
>   * Allocate and initialize an rui item with the given number of extents.
>   */
> @@ -468,7 +463,7 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
>   * Process an rmap update intent item that was recovered from the log.
>   * We need to update the rmapbt.
>   */
> -int
> +STATIC int
>  xfs_rui_recover(
>  	struct xfs_mount		*mp,
>  	struct xfs_rui_log_item		*ruip)
> @@ -588,6 +583,37 @@ xfs_rui_recover(
>  	return error;
>  }
>  
> +/* Recover the RUI if necessary. */
> +STATIC int
> +xfs_rui_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*tp)
> +{
> +	struct xfs_ail			*ailp = lip->li_ailp;
> +	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);
> +	int				error;
> +
> +	/*
> +	 * Skip RUIs that we've already processed.
> +	 */
> +	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
> +		return 0;
> +
> +	spin_unlock(&ailp->ail_lock);
> +	error = xfs_rui_recover(tp->t_mountp, ruip);
> +	spin_lock(&ailp->ail_lock);
> +
> +	return error;
> +}
> +
> +static const struct xfs_item_ops xfs_rui_item_ops = {
> +	.iop_size	= xfs_rui_item_size,
> +	.iop_format	= xfs_rui_item_format,
> +	.iop_unpin	= xfs_rui_item_unpin,
> +	.iop_release	= xfs_rui_item_release,
> +	.iop_recover	= xfs_rui_item_recover,
> +};
> +
>  /*
>   * This routine is called to create an in-core extent rmap update
>   * item from the rui format structure which was logged on disk.
> diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
> index 89bd192779f8..48a77a6f5c94 100644
> --- a/fs/xfs/xfs_rmap_item.h
> +++ b/fs/xfs/xfs_rmap_item.h
> @@ -77,7 +77,4 @@ struct xfs_rud_log_item {
>  extern struct kmem_zone	*xfs_rui_zone;
>  extern struct kmem_zone	*xfs_rud_zone;
>  
> -void xfs_rui_release(struct xfs_rui_log_item *);
> -int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
> -
>  #endif	/* __XFS_RMAP_ITEM_H__ */
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 16/28] xfs: refactor recovered CUI log item playback
  2020-05-05  1:12 ` [PATCH 16/28] xfs: refactor recovered CUI " Darrick J. Wong
@ 2020-05-05  9:29   ` Chandan Babu R
  2020-05-05  9:29     ` Chandan Babu R
  2020-05-06 15:19   ` Christoph Hellwig
  1 sibling, 1 reply; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  9:29 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:14 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the code that processes the log items created from the recovered
> log items into the per-item source code files and use dispatch functions
> to call them.  No functional changes.
>

RUI log item playback is consistent with what was done before the patch is
applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_log_recover.c   |   48 ++------------------------------------------
>  fs/xfs/xfs_refcount_item.c |   44 ++++++++++++++++++++++++++++++++--------
>  fs/xfs/xfs_refcount_item.h |    3 ---
>  3 files changed, 37 insertions(+), 58 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index da66484acaa7..ad5ac97ed0c7 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2553,46 +2553,6 @@ xlog_recover_process_data(
>  	return 0;
>  }
>  
> -/* Recover the CUI if necessary. */
> -STATIC int
> -xlog_recover_process_cui(
> -	struct xfs_trans		*parent_tp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_cui_log_item		*cuip;
> -	int				error;
> -
> -	/*
> -	 * Skip CUIs that we've already processed.
> -	 */
> -	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
> -	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_cui_recover(parent_tp, cuip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
> -/* Release the CUI since we're cancelling everything. */
> -STATIC void
> -xlog_recover_cancel_cui(
> -	struct xfs_mount		*mp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_cui_log_item		*cuip;
> -
> -	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
> -
> -	spin_unlock(&ailp->ail_lock);
> -	xfs_cui_release(cuip);
> -	spin_lock(&ailp->ail_lock);
> -}
> -
>  /* Recover the BUI if necessary. */
>  STATIC int
>  xlog_recover_process_bui(
> @@ -2758,10 +2718,8 @@ xlog_recover_process_intents(
>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
>  		case XFS_LI_RUI:
> -			error = lip->li_ops->iop_recover(lip, parent_tp);
> -			break;
>  		case XFS_LI_CUI:
> -			error = xlog_recover_process_cui(parent_tp, ailp, lip);
> +			error = lip->li_ops->iop_recover(lip, parent_tp);
>  			break;
>  		case XFS_LI_BUI:
>  			error = xlog_recover_process_bui(parent_tp, ailp, lip);
> @@ -2812,13 +2770,11 @@ xlog_recover_cancel_intents(
>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
>  		case XFS_LI_RUI:
> +		case XFS_LI_CUI:
>  			spin_unlock(&ailp->ail_lock);
>  			lip->li_ops->iop_release(lip);
>  			spin_lock(&ailp->ail_lock);
>  			break;
> -		case XFS_LI_CUI:
> -			xlog_recover_cancel_cui(log->l_mp, ailp, lip);
> -			break;
>  		case XFS_LI_BUI:
>  			xlog_recover_cancel_bui(log->l_mp, ailp, lip);
>  			break;
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index 28b41f5dd6bc..5b72eebd8764 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -24,6 +24,8 @@
>  kmem_zone_t	*xfs_cui_zone;
>  kmem_zone_t	*xfs_cud_zone;
>  
> +static const struct xfs_item_ops xfs_cui_item_ops;
> +
>  static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
>  {
>  	return container_of(lip, struct xfs_cui_log_item, cui_item);
> @@ -46,7 +48,7 @@ xfs_cui_item_free(
>   * committed vs unpin operations in bulk insert operations. Hence the reference
>   * count to ensure only the last caller frees the CUI.
>   */
> -void
> +STATIC void
>  xfs_cui_release(
>  	struct xfs_cui_log_item	*cuip)
>  {
> @@ -125,13 +127,6 @@ xfs_cui_item_release(
>  	xfs_cui_release(CUI_ITEM(lip));
>  }
>  
> -static const struct xfs_item_ops xfs_cui_item_ops = {
> -	.iop_size	= xfs_cui_item_size,
> -	.iop_format	= xfs_cui_item_format,
> -	.iop_unpin	= xfs_cui_item_unpin,
> -	.iop_release	= xfs_cui_item_release,
> -};
> -
>  /*
>   * Allocate and initialize an cui item with the given number of extents.
>   */
> @@ -425,7 +420,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
>   * Process a refcount update intent item that was recovered from the log.
>   * We need to update the refcountbt.
>   */
> -int
> +STATIC int
>  xfs_cui_recover(
>  	struct xfs_trans		*parent_tp,
>  	struct xfs_cui_log_item		*cuip)
> @@ -573,6 +568,37 @@ xfs_cui_recover(
>  	return error;
>  }
>  
> +/* Recover the CUI if necessary. */
> +STATIC int
> +xfs_cui_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*tp)
> +{
> +	struct xfs_ail			*ailp = lip->li_ailp;
> +	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
> +	int				error;
> +
> +	/*
> +	 * Skip CUIs that we've already processed.
> +	 */
> +	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
> +		return 0;
> +
> +	spin_unlock(&ailp->ail_lock);
> +	error = xfs_cui_recover(tp, cuip);
> +	spin_lock(&ailp->ail_lock);
> +
> +	return error;
> +}
> +
> +static const struct xfs_item_ops xfs_cui_item_ops = {
> +	.iop_size	= xfs_cui_item_size,
> +	.iop_format	= xfs_cui_item_format,
> +	.iop_unpin	= xfs_cui_item_unpin,
> +	.iop_release	= xfs_cui_item_release,
> +	.iop_recover	= xfs_cui_item_recover,
> +};
> +
>  /*
>   * Copy an CUI format buffer from the given buf, and into the destination
>   * CUI format structure.  The CUI/CUD items were designed not to need any
> diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
> index ebe12779eaac..cfaa857673a6 100644
> --- a/fs/xfs/xfs_refcount_item.h
> +++ b/fs/xfs/xfs_refcount_item.h
> @@ -77,7 +77,4 @@ struct xfs_cud_log_item {
>  extern struct kmem_zone	*xfs_cui_zone;
>  extern struct kmem_zone	*xfs_cud_zone;
>  
> -void xfs_cui_release(struct xfs_cui_log_item *);
> -int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
> -
>  #endif	/* __XFS_REFCOUNT_ITEM_H__ */
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 16/28] xfs: refactor recovered CUI log item playback
  2020-05-05  9:29   ` Chandan Babu R
@ 2020-05-05  9:29     ` Chandan Babu R
  0 siblings, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  9:29 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 2:59:02 PM IST Chandan Babu R wrote:
> On Tuesday 5 May 2020 6:42:14 AM IST Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Move the code that processes the log items created from the recovered
> > log items into the per-item source code files and use dispatch functions
> > to call them.  No functional changes.
> >
> 
> RUI log item playback is consistent with what was done before the patch is
> applied.

I meant "CUI log item playback ...".

> 
> Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
> 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/xfs_log_recover.c   |   48 ++------------------------------------------
> >  fs/xfs/xfs_refcount_item.c |   44 ++++++++++++++++++++++++++++++++--------
> >  fs/xfs/xfs_refcount_item.h |    3 ---
> >  3 files changed, 37 insertions(+), 58 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> > index da66484acaa7..ad5ac97ed0c7 100644
> > --- a/fs/xfs/xfs_log_recover.c
> > +++ b/fs/xfs/xfs_log_recover.c
> > @@ -2553,46 +2553,6 @@ xlog_recover_process_data(
> >  	return 0;
> >  }
> >  
> > -/* Recover the CUI if necessary. */
> > -STATIC int
> > -xlog_recover_process_cui(
> > -	struct xfs_trans		*parent_tp,
> > -	struct xfs_ail			*ailp,
> > -	struct xfs_log_item		*lip)
> > -{
> > -	struct xfs_cui_log_item		*cuip;
> > -	int				error;
> > -
> > -	/*
> > -	 * Skip CUIs that we've already processed.
> > -	 */
> > -	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
> > -	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
> > -		return 0;
> > -
> > -	spin_unlock(&ailp->ail_lock);
> > -	error = xfs_cui_recover(parent_tp, cuip);
> > -	spin_lock(&ailp->ail_lock);
> > -
> > -	return error;
> > -}
> > -
> > -/* Release the CUI since we're cancelling everything. */
> > -STATIC void
> > -xlog_recover_cancel_cui(
> > -	struct xfs_mount		*mp,
> > -	struct xfs_ail			*ailp,
> > -	struct xfs_log_item		*lip)
> > -{
> > -	struct xfs_cui_log_item		*cuip;
> > -
> > -	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
> > -
> > -	spin_unlock(&ailp->ail_lock);
> > -	xfs_cui_release(cuip);
> > -	spin_lock(&ailp->ail_lock);
> > -}
> > -
> >  /* Recover the BUI if necessary. */
> >  STATIC int
> >  xlog_recover_process_bui(
> > @@ -2758,10 +2718,8 @@ xlog_recover_process_intents(
> >  		switch (lip->li_type) {
> >  		case XFS_LI_EFI:
> >  		case XFS_LI_RUI:
> > -			error = lip->li_ops->iop_recover(lip, parent_tp);
> > -			break;
> >  		case XFS_LI_CUI:
> > -			error = xlog_recover_process_cui(parent_tp, ailp, lip);
> > +			error = lip->li_ops->iop_recover(lip, parent_tp);
> >  			break;
> >  		case XFS_LI_BUI:
> >  			error = xlog_recover_process_bui(parent_tp, ailp, lip);
> > @@ -2812,13 +2770,11 @@ xlog_recover_cancel_intents(
> >  		switch (lip->li_type) {
> >  		case XFS_LI_EFI:
> >  		case XFS_LI_RUI:
> > +		case XFS_LI_CUI:
> >  			spin_unlock(&ailp->ail_lock);
> >  			lip->li_ops->iop_release(lip);
> >  			spin_lock(&ailp->ail_lock);
> >  			break;
> > -		case XFS_LI_CUI:
> > -			xlog_recover_cancel_cui(log->l_mp, ailp, lip);
> > -			break;
> >  		case XFS_LI_BUI:
> >  			xlog_recover_cancel_bui(log->l_mp, ailp, lip);
> >  			break;
> > diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> > index 28b41f5dd6bc..5b72eebd8764 100644
> > --- a/fs/xfs/xfs_refcount_item.c
> > +++ b/fs/xfs/xfs_refcount_item.c
> > @@ -24,6 +24,8 @@
> >  kmem_zone_t	*xfs_cui_zone;
> >  kmem_zone_t	*xfs_cud_zone;
> >  
> > +static const struct xfs_item_ops xfs_cui_item_ops;
> > +
> >  static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
> >  {
> >  	return container_of(lip, struct xfs_cui_log_item, cui_item);
> > @@ -46,7 +48,7 @@ xfs_cui_item_free(
> >   * committed vs unpin operations in bulk insert operations. Hence the reference
> >   * count to ensure only the last caller frees the CUI.
> >   */
> > -void
> > +STATIC void
> >  xfs_cui_release(
> >  	struct xfs_cui_log_item	*cuip)
> >  {
> > @@ -125,13 +127,6 @@ xfs_cui_item_release(
> >  	xfs_cui_release(CUI_ITEM(lip));
> >  }
> >  
> > -static const struct xfs_item_ops xfs_cui_item_ops = {
> > -	.iop_size	= xfs_cui_item_size,
> > -	.iop_format	= xfs_cui_item_format,
> > -	.iop_unpin	= xfs_cui_item_unpin,
> > -	.iop_release	= xfs_cui_item_release,
> > -};
> > -
> >  /*
> >   * Allocate and initialize an cui item with the given number of extents.
> >   */
> > @@ -425,7 +420,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
> >   * Process a refcount update intent item that was recovered from the log.
> >   * We need to update the refcountbt.
> >   */
> > -int
> > +STATIC int
> >  xfs_cui_recover(
> >  	struct xfs_trans		*parent_tp,
> >  	struct xfs_cui_log_item		*cuip)
> > @@ -573,6 +568,37 @@ xfs_cui_recover(
> >  	return error;
> >  }
> >  
> > +/* Recover the CUI if necessary. */
> > +STATIC int
> > +xfs_cui_item_recover(
> > +	struct xfs_log_item		*lip,
> > +	struct xfs_trans		*tp)
> > +{
> > +	struct xfs_ail			*ailp = lip->li_ailp;
> > +	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
> > +	int				error;
> > +
> > +	/*
> > +	 * Skip CUIs that we've already processed.
> > +	 */
> > +	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
> > +		return 0;
> > +
> > +	spin_unlock(&ailp->ail_lock);
> > +	error = xfs_cui_recover(tp, cuip);
> > +	spin_lock(&ailp->ail_lock);
> > +
> > +	return error;
> > +}
> > +
> > +static const struct xfs_item_ops xfs_cui_item_ops = {
> > +	.iop_size	= xfs_cui_item_size,
> > +	.iop_format	= xfs_cui_item_format,
> > +	.iop_unpin	= xfs_cui_item_unpin,
> > +	.iop_release	= xfs_cui_item_release,
> > +	.iop_recover	= xfs_cui_item_recover,
> > +};
> > +
> >  /*
> >   * Copy an CUI format buffer from the given buf, and into the destination
> >   * CUI format structure.  The CUI/CUD items were designed not to need any
> > diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
> > index ebe12779eaac..cfaa857673a6 100644
> > --- a/fs/xfs/xfs_refcount_item.h
> > +++ b/fs/xfs/xfs_refcount_item.h
> > @@ -77,7 +77,4 @@ struct xfs_cud_log_item {
> >  extern struct kmem_zone	*xfs_cui_zone;
> >  extern struct kmem_zone	*xfs_cud_zone;
> >  
> > -void xfs_cui_release(struct xfs_cui_log_item *);
> > -int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
> > -
> >  #endif	/* __XFS_REFCOUNT_ITEM_H__ */
> > 
> > 
> 
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 17/28] xfs: refactor recovered BUI log item playback
  2020-05-05  1:12 ` [PATCH 17/28] xfs: refactor recovered BUI " Darrick J. Wong
@ 2020-05-05  9:49   ` Chandan Babu R
  2020-05-06 15:21   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05  9:49 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:22 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the code that processes the log items created from the recovered
> log items into the per-item source code files and use dispatch functions
> to call them.  No functional changes.
>

BUI log item playback is consistent with what was done before the patch is
applied.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_bmap_item.c   |   44 ++++++++++++++++++----
>  fs/xfs/xfs_bmap_item.h   |    3 --
>  fs/xfs/xfs_log_recover.c |   91 ++++++----------------------------------------
>  3 files changed, 47 insertions(+), 91 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 0fbebef69e26..f88ebf8634c4 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -28,6 +28,8 @@
>  kmem_zone_t	*xfs_bui_zone;
>  kmem_zone_t	*xfs_bud_zone;
>  
> +static const struct xfs_item_ops xfs_bui_item_ops;
> +
>  static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
>  {
>  	return container_of(lip, struct xfs_bui_log_item, bui_item);
> @@ -47,7 +49,7 @@ xfs_bui_item_free(
>   * committed vs unpin operations in bulk insert operations. Hence the reference
>   * count to ensure only the last caller frees the BUI.
>   */
> -void
> +STATIC void
>  xfs_bui_release(
>  	struct xfs_bui_log_item	*buip)
>  {
> @@ -126,13 +128,6 @@ xfs_bui_item_release(
>  	xfs_bui_release(BUI_ITEM(lip));
>  }
>  
> -static const struct xfs_item_ops xfs_bui_item_ops = {
> -	.iop_size	= xfs_bui_item_size,
> -	.iop_format	= xfs_bui_item_format,
> -	.iop_unpin	= xfs_bui_item_unpin,
> -	.iop_release	= xfs_bui_item_release,
> -};
> -
>  /*
>   * Allocate and initialize an bui item with the given number of extents.
>   */
> @@ -425,7 +420,7 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
>   * Process a bmap update intent item that was recovered from the log.
>   * We need to update some inode's bmbt.
>   */
> -int
> +STATIC int
>  xfs_bui_recover(
>  	struct xfs_trans		*parent_tp,
>  	struct xfs_bui_log_item		*buip)
> @@ -560,6 +555,37 @@ xfs_bui_recover(
>  	return error;
>  }
>  
> +/* Recover the BUI if necessary. */
> +STATIC int
> +xfs_bui_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*tp)
> +{
> +	struct xfs_ail			*ailp = lip->li_ailp;
> +	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);
> +	int				error;
> +
> +	/*
> +	 * Skip BUIs that we've already processed.
> +	 */
> +	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
> +		return 0;
> +
> +	spin_unlock(&ailp->ail_lock);
> +	error = xfs_bui_recover(tp, buip);
> +	spin_lock(&ailp->ail_lock);
> +
> +	return error;
> +}
> +
> +static const struct xfs_item_ops xfs_bui_item_ops = {
> +	.iop_size	= xfs_bui_item_size,
> +	.iop_format	= xfs_bui_item_format,
> +	.iop_unpin	= xfs_bui_item_unpin,
> +	.iop_release	= xfs_bui_item_release,
> +	.iop_recover	= xfs_bui_item_recover,
> +};
> +
>  /*
>   * Copy an BUI format buffer from the given buf, and into the destination
>   * BUI format structure.  The BUI/BUD items were designed not to need any
> diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
> index 515b1d5d6ab7..44d06e62f8f9 100644
> --- a/fs/xfs/xfs_bmap_item.h
> +++ b/fs/xfs/xfs_bmap_item.h
> @@ -74,7 +74,4 @@ struct xfs_bud_log_item {
>  extern struct kmem_zone	*xfs_bui_zone;
>  extern struct kmem_zone	*xfs_bud_zone;
>  
> -void xfs_bui_release(struct xfs_bui_log_item *);
> -int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
> -
>  #endif	/* __XFS_BMAP_ITEM_H__ */
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index ad5ac97ed0c7..20ee32c2652d 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2553,60 +2553,6 @@ xlog_recover_process_data(
>  	return 0;
>  }
>  
> -/* Recover the BUI if necessary. */
> -STATIC int
> -xlog_recover_process_bui(
> -	struct xfs_trans		*parent_tp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_bui_log_item		*buip;
> -	int				error;
> -
> -	/*
> -	 * Skip BUIs that we've already processed.
> -	 */
> -	buip = container_of(lip, struct xfs_bui_log_item, bui_item);
> -	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_bui_recover(parent_tp, buip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
> -/* Release the BUI since we're cancelling everything. */
> -STATIC void
> -xlog_recover_cancel_bui(
> -	struct xfs_mount		*mp,
> -	struct xfs_ail			*ailp,
> -	struct xfs_log_item		*lip)
> -{
> -	struct xfs_bui_log_item		*buip;
> -
> -	buip = container_of(lip, struct xfs_bui_log_item, bui_item);
> -
> -	spin_unlock(&ailp->ail_lock);
> -	xfs_bui_release(buip);
> -	spin_lock(&ailp->ail_lock);
> -}
> -
> -/* Is this log item a deferred action intent? */
> -static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
> -{
> -	switch (lip->li_type) {
> -	case XFS_LI_EFI:
> -	case XFS_LI_RUI:
> -	case XFS_LI_CUI:
> -	case XFS_LI_BUI:
> -		return true;
> -	default:
> -		return false;
> -	}
> -}
> -
>  /* Take all the collected deferred ops and finish them in order. */
>  static int
>  xlog_finish_defer_ops(
> @@ -2641,6 +2587,12 @@ xlog_finish_defer_ops(
>  	return xfs_trans_commit(tp);
>  }
>  
> +/* Is this log item a deferred action intent? */
> +static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
> +{
> +	return lip->li_ops->iop_recover != NULL;
> +}
> +
>  /*
>   * When this is called, all of the log intent items which did not have
>   * corresponding log done items should be in the AIL.  What we do now
> @@ -2711,20 +2663,11 @@ xlog_recover_process_intents(
>  
>  		/*
>  		 * NOTE: If your intent processing routine can create more
> -		 * deferred ops, you /must/ attach them to the dfops in this
> -		 * routine or else those subsequent intents will get
> +		 * deferred ops, you /must/ attach them to the transaction in
> +		 * this routine or else those subsequent intents will get
>  		 * replayed in the wrong order!
>  		 */
> -		switch (lip->li_type) {
> -		case XFS_LI_EFI:
> -		case XFS_LI_RUI:
> -		case XFS_LI_CUI:
> -			error = lip->li_ops->iop_recover(lip, parent_tp);
> -			break;
> -		case XFS_LI_BUI:
> -			error = xlog_recover_process_bui(parent_tp, ailp, lip);
> -			break;
> -		}
> +		error = lip->li_ops->iop_recover(lip, parent_tp);
>  		if (error)
>  			goto out;
>  		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> @@ -2767,19 +2710,9 @@ xlog_recover_cancel_intents(
>  			break;
>  		}
>  
> -		switch (lip->li_type) {
> -		case XFS_LI_EFI:
> -		case XFS_LI_RUI:
> -		case XFS_LI_CUI:
> -			spin_unlock(&ailp->ail_lock);
> -			lip->li_ops->iop_release(lip);
> -			spin_lock(&ailp->ail_lock);
> -			break;
> -		case XFS_LI_BUI:
> -			xlog_recover_cancel_bui(log->l_mp, ailp, lip);
> -			break;
> -		}
> -
> +		spin_unlock(&ailp->ail_lock);
> +		lip->li_ops->iop_release(lip);
> +		spin_lock(&ailp->ail_lock);
>  		lip = xfs_trans_ail_cursor_next(ailp, &cur);
>  	}
>  
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 18/28] xfs: refactor unlinked inode recovery
  2020-05-05  1:12 ` [PATCH 18/28] xfs: refactor unlinked inode recovery Darrick J. Wong
@ 2020-05-05 13:05   ` Chandan Babu R
  2020-05-06 15:26   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05 13:05 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:29 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the code that processes unlinked inodes into a separate file in
> preparation for centralizing the log recovery bits that have to walk
> every AG.  No functional changes.
>

The functionality is indeed the same as was before applying this patch.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>


> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/Makefile                 |    3 -
>  fs/xfs/libxfs/xfs_log_recover.h |    1 
>  fs/xfs/xfs_log_recover.c        |  177 -----------------------------------
>  fs/xfs/xfs_unlink_recover.c     |  198 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 202 insertions(+), 177 deletions(-)
>  create mode 100644 fs/xfs/xfs_unlink_recover.c
> 
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 04611a1068b4..505c898d6cee 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -109,7 +109,8 @@ xfs-y				+= xfs_log.o \
>  				   xfs_rmap_item.o \
>  				   xfs_log_recover.o \
>  				   xfs_trans_ail.o \
> -				   xfs_trans_buf.o
> +				   xfs_trans_buf.o \
> +				   xfs_unlink_recover.o
>  
>  # optional features
>  xfs-$(CONFIG_XFS_QUOTA)		+= xfs_dquot.o \
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index a45f6e9fa47b..33c14dd22b77 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -124,5 +124,6 @@ bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  void xlog_recover_iodone(struct xfs_buf *bp);
> +void xlog_recover_process_unlinked(struct xlog *log);
>  
>  #endif	/* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 20ee32c2652d..362296b34490 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2720,181 +2720,6 @@ xlog_recover_cancel_intents(
>  	spin_unlock(&ailp->ail_lock);
>  }
>  
> -/*
> - * This routine performs a transaction to null out a bad inode pointer
> - * in an agi unlinked inode hash bucket.
> - */
> -STATIC void
> -xlog_recover_clear_agi_bucket(
> -	xfs_mount_t	*mp,
> -	xfs_agnumber_t	agno,
> -	int		bucket)
> -{
> -	xfs_trans_t	*tp;
> -	xfs_agi_t	*agi;
> -	xfs_buf_t	*agibp;
> -	int		offset;
> -	int		error;
> -
> -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
> -	if (error)
> -		goto out_error;
> -
> -	error = xfs_read_agi(mp, tp, agno, &agibp);
> -	if (error)
> -		goto out_abort;
> -
> -	agi = agibp->b_addr;
> -	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
> -	offset = offsetof(xfs_agi_t, agi_unlinked) +
> -		 (sizeof(xfs_agino_t) * bucket);
> -	xfs_trans_log_buf(tp, agibp, offset,
> -			  (offset + sizeof(xfs_agino_t) - 1));
> -
> -	error = xfs_trans_commit(tp);
> -	if (error)
> -		goto out_error;
> -	return;
> -
> -out_abort:
> -	xfs_trans_cancel(tp);
> -out_error:
> -	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
> -	return;
> -}
> -
> -STATIC xfs_agino_t
> -xlog_recover_process_one_iunlink(
> -	struct xfs_mount		*mp,
> -	xfs_agnumber_t			agno,
> -	xfs_agino_t			agino,
> -	int				bucket)
> -{
> -	struct xfs_buf			*ibp;
> -	struct xfs_dinode		*dip;
> -	struct xfs_inode		*ip;
> -	xfs_ino_t			ino;
> -	int				error;
> -
> -	ino = XFS_AGINO_TO_INO(mp, agno, agino);
> -	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
> -	if (error)
> -		goto fail;
> -
> -	/*
> -	 * Get the on disk inode to find the next inode in the bucket.
> -	 */
> -	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
> -	if (error)
> -		goto fail_iput;
> -
> -	xfs_iflags_clear(ip, XFS_IRECOVERY);
> -	ASSERT(VFS_I(ip)->i_nlink == 0);
> -	ASSERT(VFS_I(ip)->i_mode != 0);
> -
> -	/* setup for the next pass */
> -	agino = be32_to_cpu(dip->di_next_unlinked);
> -	xfs_buf_relse(ibp);
> -
> -	/*
> -	 * Prevent any DMAPI event from being sent when the reference on
> -	 * the inode is dropped.
> -	 */
> -	ip->i_d.di_dmevmask = 0;
> -
> -	xfs_irele(ip);
> -	return agino;
> -
> - fail_iput:
> -	xfs_irele(ip);
> - fail:
> -	/*
> -	 * We can't read in the inode this bucket points to, or this inode
> -	 * is messed up.  Just ditch this bucket of inodes.  We will lose
> -	 * some inodes and space, but at least we won't hang.
> -	 *
> -	 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
> -	 * clear the inode pointer in the bucket.
> -	 */
> -	xlog_recover_clear_agi_bucket(mp, agno, bucket);
> -	return NULLAGINO;
> -}
> -
> -/*
> - * Recover AGI unlinked lists
> - *
> - * This is called during recovery to process any inodes which we unlinked but
> - * not freed when the system crashed.  These inodes will be on the lists in the
> - * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
> - * any inodes found on the lists. Each inode is removed from the lists when it
> - * has been fully truncated and is freed. The freeing of the inode and its
> - * removal from the list must be atomic.
> - *
> - * If everything we touch in the agi processing loop is already in memory, this
> - * loop can hold the cpu for a long time. It runs without lock contention,
> - * memory allocation contention, the need wait for IO, etc, and so will run
> - * until we either run out of inodes to process, run low on memory or we run out
> - * of log space.
> - *
> - * This behaviour is bad for latency on single CPU and non-preemptible kernels,
> - * and can prevent other filesytem work (such as CIL pushes) from running. This
> - * can lead to deadlocks if the recovery process runs out of log reservation
> - * space. Hence we need to yield the CPU when there is other kernel work
> - * scheduled on this CPU to ensure other scheduled work can run without undue
> - * latency.
> - */
> -STATIC void
> -xlog_recover_process_iunlinks(
> -	struct xlog	*log)
> -{
> -	xfs_mount_t	*mp;
> -	xfs_agnumber_t	agno;
> -	xfs_agi_t	*agi;
> -	xfs_buf_t	*agibp;
> -	xfs_agino_t	agino;
> -	int		bucket;
> -	int		error;
> -
> -	mp = log->l_mp;
> -
> -	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> -		/*
> -		 * Find the agi for this ag.
> -		 */
> -		error = xfs_read_agi(mp, NULL, agno, &agibp);
> -		if (error) {
> -			/*
> -			 * AGI is b0rked. Don't process it.
> -			 *
> -			 * We should probably mark the filesystem as corrupt
> -			 * after we've recovered all the ag's we can....
> -			 */
> -			continue;
> -		}
> -		/*
> -		 * Unlock the buffer so that it can be acquired in the normal
> -		 * course of the transaction to truncate and free each inode.
> -		 * Because we are not racing with anyone else here for the AGI
> -		 * buffer, we don't even need to hold it locked to read the
> -		 * initial unlinked bucket entries out of the buffer. We keep
> -		 * buffer reference though, so that it stays pinned in memory
> -		 * while we need the buffer.
> -		 */
> -		agi = agibp->b_addr;
> -		xfs_buf_unlock(agibp);
> -
> -		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> -			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> -			while (agino != NULLAGINO) {
> -				agino = xlog_recover_process_one_iunlink(mp,
> -							agno, agino, bucket);
> -				cond_resched();
> -			}
> -		}
> -		xfs_buf_rele(agibp);
> -	}
> -}
> -
>  STATIC void
>  xlog_unpack_data(
>  	struct xlog_rec_header	*rhead,
> @@ -3574,7 +3399,7 @@ xlog_recover_finish(
>  		 */
>  		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
>  
> -		xlog_recover_process_iunlinks(log);
> +		xlog_recover_process_unlinked(log);
>  
>  		xlog_recover_check_summary(log);
>  
> diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> new file mode 100644
> index 000000000000..2a19d096e88d
> --- /dev/null
> +++ b/fs/xfs/xfs_unlink_recover.c
> @@ -0,0 +1,198 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2000-2006 Silicon Graphics, Inc.
> + * All Rights Reserved.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_bit.h"
> +#include "xfs_sb.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_inode.h"
> +#include "xfs_trans.h"
> +#include "xfs_log.h"
> +#include "xfs_log_priv.h"
> +#include "xfs_log_recover.h"
> +#include "xfs_trans_priv.h"
> +#include "xfs_ialloc.h"
> +#include "xfs_icache.h"
> +
> +/*
> + * This routine performs a transaction to null out a bad inode pointer
> + * in an agi unlinked inode hash bucket.
> + */
> +STATIC void
> +xlog_recover_clear_agi_bucket(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	int			bucket)
> +{
> +	struct xfs_trans	*tp;
> +	struct xfs_agi		*agi;
> +	struct xfs_buf		*agibp;
> +	int			offset;
> +	int			error;
> +
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
> +	if (error)
> +		goto out_error;
> +
> +	error = xfs_read_agi(mp, tp, agno, &agibp);
> +	if (error)
> +		goto out_abort;
> +
> +	agi = agibp->b_addr;
> +	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
> +	offset = offsetof(xfs_agi_t, agi_unlinked) +
> +		 (sizeof(xfs_agino_t) * bucket);
> +	xfs_trans_log_buf(tp, agibp, offset,
> +			  (offset + sizeof(xfs_agino_t) - 1));
> +
> +	error = xfs_trans_commit(tp);
> +	if (error)
> +		goto out_error;
> +	return;
> +
> +out_abort:
> +	xfs_trans_cancel(tp);
> +out_error:
> +	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
> +	return;
> +}
> +
> +STATIC xfs_agino_t
> +xlog_recover_process_one_iunlink(
> +	struct xfs_mount		*mp,
> +	xfs_agnumber_t			agno,
> +	xfs_agino_t			agino,
> +	int				bucket)
> +{
> +	struct xfs_buf			*ibp;
> +	struct xfs_dinode		*dip;
> +	struct xfs_inode		*ip;
> +	xfs_ino_t			ino;
> +	int				error;
> +
> +	ino = XFS_AGINO_TO_INO(mp, agno, agino);
> +	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
> +	if (error)
> +		goto fail;
> +
> +	/*
> +	 * Get the on disk inode to find the next inode in the bucket.
> +	 */
> +	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
> +	if (error)
> +		goto fail_iput;
> +
> +	xfs_iflags_clear(ip, XFS_IRECOVERY);
> +	ASSERT(VFS_I(ip)->i_nlink == 0);
> +	ASSERT(VFS_I(ip)->i_mode != 0);
> +
> +	/* setup for the next pass */
> +	agino = be32_to_cpu(dip->di_next_unlinked);
> +	xfs_buf_relse(ibp);
> +
> +	/*
> +	 * Prevent any DMAPI event from being sent when the reference on
> +	 * the inode is dropped.
> +	 */
> +	ip->i_d.di_dmevmask = 0;
> +
> +	xfs_irele(ip);
> +	return agino;
> +
> + fail_iput:
> +	xfs_irele(ip);
> + fail:
> +	/*
> +	 * We can't read in the inode this bucket points to, or this inode
> +	 * is messed up.  Just ditch this bucket of inodes.  We will lose
> +	 * some inodes and space, but at least we won't hang.
> +	 *
> +	 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
> +	 * clear the inode pointer in the bucket.
> +	 */
> +	xlog_recover_clear_agi_bucket(mp, agno, bucket);
> +	return NULLAGINO;
> +}
> +
> +/*
> + * Recover AGI unlinked lists
> + *
> + * This is called during recovery to process any inodes which we unlinked but
> + * not freed when the system crashed.  These inodes will be on the lists in the
> + * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
> + * any inodes found on the lists. Each inode is removed from the lists when it
> + * has been fully truncated and is freed. The freeing of the inode and its
> + * removal from the list must be atomic.
> + *
> + * If everything we touch in the agi processing loop is already in memory, this
> + * loop can hold the cpu for a long time. It runs without lock contention,
> + * memory allocation contention, the need wait for IO, etc, and so will run
> + * until we either run out of inodes to process, run low on memory or we run out
> + * of log space.
> + *
> + * This behaviour is bad for latency on single CPU and non-preemptible kernels,
> + * and can prevent other filesytem work (such as CIL pushes) from running. This
> + * can lead to deadlocks if the recovery process runs out of log reservation
> + * space. Hence we need to yield the CPU when there is other kernel work
> + * scheduled on this CPU to ensure other scheduled work can run without undue
> + * latency.
> + */
> +void
> +xlog_recover_process_unlinked(
> +	struct xlog		*log)
> +{
> +	struct xfs_mount	*mp;
> +	struct xfs_agi		*agi;
> +	struct xfs_buf		*agibp;
> +	xfs_agnumber_t		agno;
> +	xfs_agino_t		agino;
> +	int			bucket;
> +	int			error;
> +
> +	mp = log->l_mp;
> +
> +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> +		/*
> +		 * Find the agi for this ag.
> +		 */
> +		error = xfs_read_agi(mp, NULL, agno, &agibp);
> +		if (error) {
> +			/*
> +			 * AGI is b0rked. Don't process it.
> +			 *
> +			 * We should probably mark the filesystem as corrupt
> +			 * after we've recovered all the ag's we can....
> +			 */
> +			continue;
> +		}
> +		/*
> +		 * Unlock the buffer so that it can be acquired in the normal
> +		 * course of the transaction to truncate and free each inode.
> +		 * Because we are not racing with anyone else here for the AGI
> +		 * buffer, we don't even need to hold it locked to read the
> +		 * initial unlinked bucket entries out of the buffer. We keep
> +		 * buffer reference though, so that it stays pinned in memory
> +		 * while we need the buffer.
> +		 */
> +		agi = agibp->b_addr;
> +		xfs_buf_unlock(agibp);
> +
> +		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> +			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> +			while (agino != NULLAGINO) {
> +				agino = xlog_recover_process_one_iunlink(mp,
> +							agno, agino, bucket);
> +				cond_resched();
> +			}
> +		}
> +		xfs_buf_rele(agibp);
> +	}
> +}
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked
  2020-05-05  1:12 ` [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked Darrick J. Wong
@ 2020-05-05 13:19   ` Chandan Babu R
  2020-05-05 13:30     ` Chandan Babu R
  2020-05-06 19:11     ` Darrick J. Wong
  2020-05-06 15:27   ` Christoph Hellwig
  1 sibling, 2 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05 13:19 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:35 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Hoist the unlinked inode processing logic out of the AG loop and into
> its own function.  No functional changes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_unlink_recover.c |   91 +++++++++++++++++++++++++------------------
>  1 file changed, 52 insertions(+), 39 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> index 2a19d096e88d..413b34085640 100644
> --- a/fs/xfs/xfs_unlink_recover.c
> +++ b/fs/xfs/xfs_unlink_recover.c
> @@ -145,54 +145,67 @@ xlog_recover_process_one_iunlink(
>   * scheduled on this CPU to ensure other scheduled work can run without undue
>   * latency.
>   */
> -void
> -xlog_recover_process_unlinked(
> -	struct xlog		*log)
> +STATIC int
> +xlog_recover_process_iunlinked(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno)
>  {
> -	struct xfs_mount	*mp;
>  	struct xfs_agi		*agi;
>  	struct xfs_buf		*agibp;
> -	xfs_agnumber_t		agno;
>  	xfs_agino_t		agino;
>  	int			bucket;
>  	int			error;
>  
> -	mp = log->l_mp;
> -
> -	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> -		/*
> -		 * Find the agi for this ag.
> -		 */
> -		error = xfs_read_agi(mp, NULL, agno, &agibp);
> -		if (error) {
> -			/*
> -			 * AGI is b0rked. Don't process it.
> -			 *
> -			 * We should probably mark the filesystem as corrupt
> -			 * after we've recovered all the ag's we can....
> -			 */
> -			continue;
> -		}
> +	/*
> +	 * Find the agi for this ag.
> +	 */
> +	error = xfs_read_agi(mp, NULL, agno, &agibp);
> +	if (error) {
>  		/*
> -		 * Unlock the buffer so that it can be acquired in the normal
> -		 * course of the transaction to truncate and free each inode.
> -		 * Because we are not racing with anyone else here for the AGI
> -		 * buffer, we don't even need to hold it locked to read the
> -		 * initial unlinked bucket entries out of the buffer. We keep
> -		 * buffer reference though, so that it stays pinned in memory
> -		 * while we need the buffer.
> +		 * AGI is b0rked. Don't process it.
> +		 *
> +		 * We should probably mark the filesystem as corrupt
> +		 * after we've recovered all the ag's we can....
>  		 */
> -		agi = agibp->b_addr;
> -		xfs_buf_unlock(agibp);
> -
> -		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> -			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> -			while (agino != NULLAGINO) {
> -				agino = xlog_recover_process_one_iunlink(mp,
> -							agno, agino, bucket);
> -				cond_resched();
> -			}
> +		return error;


This causes a change in behaviour i.e. an error return from here would cause
xlog_recover_process_unlinked() to break "loop on all AGs". Before this
change, XFS would continue to process all the remaining AGs as described by
the above comment.


> +	}
> +
> +	/*
> +	 * Unlock the buffer so that it can be acquired in the normal
> +	 * course of the transaction to truncate and free each inode.
> +	 * Because we are not racing with anyone else here for the AGI
> +	 * buffer, we don't even need to hold it locked to read the
> +	 * initial unlinked bucket entries out of the buffer. We keep
> +	 * buffer reference though, so that it stays pinned in memory
> +	 * while we need the buffer.
> +	 */
> +	agi = agibp->b_addr;
> +	xfs_buf_unlock(agibp);
> +
> +	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> +		agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> +		while (agino != NULLAGINO) {
> +			agino = xlog_recover_process_one_iunlink(mp,
> +						agno, agino, bucket);
> +			cond_resched();
>  		}
> -		xfs_buf_rele(agibp);
> +	}
> +	xfs_buf_rele(agibp);
> +
> +	return 0;
> +}
> +
> +void
> +xlog_recover_process_unlinked(
> +	struct xlog		*log)
> +{
> +	struct xfs_mount	*mp = log->l_mp;
> +	xfs_agnumber_t		agno;
> +	int			error;
> +
> +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> +		error = xlog_recover_process_iunlinked(mp, agno);
> +		if (error)
> +			break;
>  	}
>  }
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked
  2020-05-05 13:19   ` Chandan Babu R
@ 2020-05-05 13:30     ` Chandan Babu R
  2020-05-06 19:11     ` Darrick J. Wong
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05 13:30 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:49:17 PM IST Chandan Babu R wrote:
> On Tuesday 5 May 2020 6:42:35 AM IST Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Hoist the unlinked inode processing logic out of the AG loop and into
> > its own function.  No functional changes.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/xfs_unlink_recover.c |   91 +++++++++++++++++++++++++------------------
> >  1 file changed, 52 insertions(+), 39 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> > index 2a19d096e88d..413b34085640 100644
> > --- a/fs/xfs/xfs_unlink_recover.c
> > +++ b/fs/xfs/xfs_unlink_recover.c
> > @@ -145,54 +145,67 @@ xlog_recover_process_one_iunlink(
> >   * scheduled on this CPU to ensure other scheduled work can run without undue
> >   * latency.
> >   */
> > -void
> > -xlog_recover_process_unlinked(
> > -	struct xlog		*log)
> > +STATIC int
> > +xlog_recover_process_iunlinked(
> > +	struct xfs_mount	*mp,
> > +	xfs_agnumber_t		agno)
> >  {
> > -	struct xfs_mount	*mp;
> >  	struct xfs_agi		*agi;
> >  	struct xfs_buf		*agibp;
> > -	xfs_agnumber_t		agno;
> >  	xfs_agino_t		agino;
> >  	int			bucket;
> >  	int			error;
> >  
> > -	mp = log->l_mp;
> > -
> > -	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> > -		/*
> > -		 * Find the agi for this ag.
> > -		 */
> > -		error = xfs_read_agi(mp, NULL, agno, &agibp);
> > -		if (error) {
> > -			/*
> > -			 * AGI is b0rked. Don't process it.
> > -			 *
> > -			 * We should probably mark the filesystem as corrupt
> > -			 * after we've recovered all the ag's we can....
> > -			 */
> > -			continue;
> > -		}
> > +	/*
> > +	 * Find the agi for this ag.
> > +	 */
> > +	error = xfs_read_agi(mp, NULL, agno, &agibp);
> > +	if (error) {
> >  		/*
> > -		 * Unlock the buffer so that it can be acquired in the normal
> > -		 * course of the transaction to truncate and free each inode.
> > -		 * Because we are not racing with anyone else here for the AGI
> > -		 * buffer, we don't even need to hold it locked to read the
> > -		 * initial unlinked bucket entries out of the buffer. We keep
> > -		 * buffer reference though, so that it stays pinned in memory
> > -		 * while we need the buffer.
> > +		 * AGI is b0rked. Don't process it.
> > +		 *
> > +		 * We should probably mark the filesystem as corrupt
> > +		 * after we've recovered all the ag's we can....
> >  		 */
> > -		agi = agibp->b_addr;
> > -		xfs_buf_unlock(agibp);
> > -
> > -		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> > -			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> > -			while (agino != NULLAGINO) {
> > -				agino = xlog_recover_process_one_iunlink(mp,
> > -							agno, agino, bucket);
> > -				cond_resched();
> > -			}
> > +		return error;
> 
> 
> This causes a change in behaviour i.e. an error return from here would cause
> xlog_recover_process_unlinked() to break "loop on all AGs". Before this
> change, XFS would continue to process all the remaining AGs as described by
> the above comment.
>

I noticed that in the next patch the error code is percolated to the calling
functions and it is done with the intention that since the agi[s] is already
corrupt the code will most likely hit this corruption during a normal fs
operation.

Hence,

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> 
> > +	}
> > +
> > +	/*
> > +	 * Unlock the buffer so that it can be acquired in the normal
> > +	 * course of the transaction to truncate and free each inode.
> > +	 * Because we are not racing with anyone else here for the AGI
> > +	 * buffer, we don't even need to hold it locked to read the
> > +	 * initial unlinked bucket entries out of the buffer. We keep
> > +	 * buffer reference though, so that it stays pinned in memory
> > +	 * while we need the buffer.
> > +	 */
> > +	agi = agibp->b_addr;
> > +	xfs_buf_unlock(agibp);
> > +
> > +	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> > +		agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> > +		while (agino != NULLAGINO) {
> > +			agino = xlog_recover_process_one_iunlink(mp,
> > +						agno, agino, bucket);
> > +			cond_resched();
> >  		}
> > -		xfs_buf_rele(agibp);
> > +	}
> > +	xfs_buf_rele(agibp);
> > +
> > +	return 0;
> > +}
> > +
> > +void
> > +xlog_recover_process_unlinked(
> > +	struct xlog		*log)
> > +{
> > +	struct xfs_mount	*mp = log->l_mp;
> > +	xfs_agnumber_t		agno;
> > +	int			error;
> > +
> > +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> > +		error = xlog_recover_process_iunlinked(mp, agno);
> > +		if (error)
> > +			break;
> >  	}
> >  }
> > 
> > 
> 
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 20/28] xfs: report iunlink recovery failure upwards
  2020-05-05  1:12 ` [PATCH 20/28] xfs: report iunlink recovery failure upwards Darrick J. Wong
@ 2020-05-05 13:43   ` Chandan Babu R
  2020-05-06 15:27   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-05 13:43 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:41 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> If we fail to recover unlinked inodes due to corruption or whatnot, we
> should report this upwards and fail the mount instead of continuing on
> like nothing's wrong.  Eventually the user will trip over the busted
> AGI anyway.

The changes look good to me.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_log_recover.h |    2 +-
>  fs/xfs/xfs_log.c                |    4 +++-
>  fs/xfs/xfs_log_recover.c        |    7 ++++++-
>  fs/xfs/xfs_unlink_recover.c     |    4 +++-
>  4 files changed, 13 insertions(+), 4 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index 33c14dd22b77..d4d6d4f84fda 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -124,6 +124,6 @@ bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  void xlog_recover_iodone(struct xfs_buf *bp);
> -void xlog_recover_process_unlinked(struct xlog *log);
> +int xlog_recover_process_unlinked(struct xlog *log);
>  
>  #endif	/* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 00fda2e8e738..8203b9b0fd08 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -727,6 +727,8 @@ xfs_log_mount_finish(
>  		xfs_log_work_queue(mp);
>  	mp->m_super->s_flags &= ~SB_ACTIVE;
>  	evict_inodes(mp->m_super);
> +	if (error)
> +		return error;
>  
>  	/*
>  	 * Drain the buffer LRU after log recovery. This is required for v4
> @@ -737,7 +739,7 @@ xfs_log_mount_finish(
>  	 * Don't push in the error case because the AIL may have pending intents
>  	 * that aren't removed until recovery is cancelled.
>  	 */
> -	if (!error && recovered) {
> +	if (recovered) {
>  		xfs_log_force(mp, XFS_LOG_SYNC);
>  		xfs_ail_push_all_sync(mp->m_ail);
>  	}
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 362296b34490..0ccc09c004f1 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -3399,7 +3399,12 @@ xlog_recover_finish(
>  		 */
>  		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
>  
> -		xlog_recover_process_unlinked(log);
> +		error = xlog_recover_process_unlinked(log);
> +		if (error) {
> +			xfs_alert(log->l_mp,
> +					"Failed to recover unlinked metadata");
> +			return error;
> +		}
>  
>  		xlog_recover_check_summary(log);
>  
> diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> index 413b34085640..fe7fa3d623f2 100644
> --- a/fs/xfs/xfs_unlink_recover.c
> +++ b/fs/xfs/xfs_unlink_recover.c
> @@ -195,7 +195,7 @@ xlog_recover_process_iunlinked(
>  	return 0;
>  }
>  
> -void
> +int
>  xlog_recover_process_unlinked(
>  	struct xlog		*log)
>  {
> @@ -208,4 +208,6 @@ xlog_recover_process_unlinked(
>  		if (error)
>  			break;
>  	}
> +
> +	return error;
>  }
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 21/28] xfs: refactor releasing finished intents during log recovery
  2020-05-05  1:12 ` [PATCH 21/28] xfs: refactor releasing finished intents during log recovery Darrick J. Wong
@ 2020-05-06  4:06   ` Chandan Babu R
  2020-05-06 15:29   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-06  4:06 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:47 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Replace the open-coded AIL item walking with a proper helper when we're
> trying to release an intent item that has been finished.
>

The functionality is the same as was before applying this patch.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_log_recover.h |    3 +++
>  fs/xfs/xfs_bmap_item.c          |   42 +++++++++------------------------------
>  fs/xfs/xfs_extfree_item.c       |   42 +++++++++------------------------------
>  fs/xfs/xfs_log_recover.c        |   35 ++++++++++++++++++++++++++++++++-
>  fs/xfs/xfs_refcount_item.c      |   42 +++++++++------------------------------
>  fs/xfs/xfs_rmap_item.c          |   42 +++++++++------------------------------
>  fs/xfs/xfs_trans.h              |    1 +
>  7 files changed, 78 insertions(+), 129 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index d4d6d4f84fda..b875819a1c04 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -126,4 +126,7 @@ bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  void xlog_recover_iodone(struct xfs_buf *bp);
>  int xlog_recover_process_unlinked(struct xlog *log);
>  
> +void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
> +		uint64_t intent_id);
> +
>  #endif	/* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index f88ebf8634c4..96627ea800c8 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -578,12 +578,21 @@ xfs_bui_item_recover(
>  	return error;
>  }
>  
> +STATIC bool
> +xfs_bui_item_match(
> +	struct xfs_log_item	*lip,
> +	uint64_t		intent_id)
> +{
> +	return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
> +}
> +
>  static const struct xfs_item_ops xfs_bui_item_ops = {
>  	.iop_size	= xfs_bui_item_size,
>  	.iop_format	= xfs_bui_item_format,
>  	.iop_unpin	= xfs_bui_item_unpin,
>  	.iop_release	= xfs_bui_item_release,
>  	.iop_recover	= xfs_bui_item_recover,
> +	.iop_match	= xfs_bui_item_match,
>  };
>  
>  /*
> @@ -675,45 +684,14 @@ xlog_recover_bmap_done_commit_pass2(
>  	xfs_lsn_t			lsn)
>  {
>  	struct xfs_bud_log_format	*bud_formatp;
> -	struct xfs_bui_log_item		*buip = NULL;
> -	struct xfs_log_item		*lip;
> -	uint64_t			bui_id;
> -	struct xfs_ail_cursor		cur;
> -	struct xfs_ail			*ailp = log->l_ailp;
>  
>  	bud_formatp = item->ri_buf[0].i_addr;
>  	if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
>  		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
>  		return -EFSCORRUPTED;
>  	}
> -	bui_id = bud_formatp->bud_bui_id;
> -
> -	/*
> -	 * Search for the BUI with the id in the BUD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_BUI) {
> -			buip = (struct xfs_bui_log_item *)lip;
> -			if (buip->bui_format.bui_id == bui_id) {
> -				/*
> -				 * Drop the BUD reference to the BUI. This
> -				 * removes the BUI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_bui_release(buip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
>  
> +	xlog_recover_release_intent(log, XFS_LI_BUI, bud_formatp->bud_bui_id);
>  	return 0;
>  }
>  
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index 3fc8a9864217..4e1b10ab17a5 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -665,12 +665,21 @@ xfs_efi_item_recover(
>  	return error;
>  }
>  
> +STATIC bool
> +xfs_efi_item_match(
> +	struct xfs_log_item	*lip,
> +	uint64_t		intent_id)
> +{
> +	return EFI_ITEM(lip)->efi_format.efi_id == intent_id;
> +}
> +
>  static const struct xfs_item_ops xfs_efi_item_ops = {
>  	.iop_size	= xfs_efi_item_size,
>  	.iop_format	= xfs_efi_item_format,
>  	.iop_unpin	= xfs_efi_item_unpin,
>  	.iop_release	= xfs_efi_item_release,
>  	.iop_recover	= xfs_efi_item_recover,
> +	.iop_match	= xfs_efi_item_match,
>  };
>  
>  
> @@ -734,46 +743,15 @@ xlog_recover_extfree_done_commit_pass2(
>  	struct xlog_recover_item	*item,
>  	xfs_lsn_t			lsn)
>  {
> -	struct xfs_ail_cursor		cur;
>  	struct xfs_efd_log_format	*efd_formatp;
> -	struct xfs_efi_log_item		*efip = NULL;
> -	struct xfs_log_item		*lip;
> -	struct xfs_ail			*ailp = log->l_ailp;
> -	uint64_t			efi_id;
>  
>  	efd_formatp = item->ri_buf[0].i_addr;
>  	ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
>  		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
>  	       (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
>  		((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
> -	efi_id = efd_formatp->efd_efi_id;
> -
> -	/*
> -	 * Search for the EFI with the id in the EFD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_EFI) {
> -			efip = (struct xfs_efi_log_item *)lip;
> -			if (efip->efi_format.efi_id == efi_id) {
> -				/*
> -				 * Drop the EFD reference to the EFI. This
> -				 * removes the EFI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_efi_release(efip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
>  
> +	xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id);
>  	return 0;
>  }
>  
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 0ccc09c004f1..55477b9b9311 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -1779,6 +1779,38 @@ xlog_clear_stale_blocks(
>  	return 0;
>  }
>  
> +/*
> + * Release the recovered intent item in the AIL that matches the given intent
> + * type and intent id.
> + */
> +void
> +xlog_recover_release_intent(
> +	struct xlog		*log,
> +	unsigned short		intent_type,
> +	uint64_t		intent_id)
> +{
> +	struct xfs_ail_cursor	cur;
> +	struct xfs_log_item	*lip;
> +	struct xfs_ail		*ailp = log->l_ailp;
> +
> +	spin_lock(&ailp->ail_lock);
> +	for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL;
> +	     lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
> +		if (lip->li_type != intent_type)
> +			continue;
> +		if (!lip->li_ops->iop_match(lip, intent_id))
> +			continue;
> +
> +		spin_unlock(&ailp->ail_lock);
> +		lip->li_ops->iop_release(lip);
> +		spin_lock(&ailp->ail_lock);
> +		break;
> +	}
> +
> +	xfs_trans_ail_cursor_done(&cur);
> +	spin_unlock(&ailp->ail_lock);
> +}
> +
>  /******************************************************************************
>   *
>   *		Log recover routines
> @@ -2590,7 +2622,8 @@ xlog_finish_defer_ops(
>  /* Is this log item a deferred action intent? */
>  static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
>  {
> -	return lip->li_ops->iop_recover != NULL;
> +	return lip->li_ops->iop_recover != NULL &&
> +	       lip->li_ops->iop_match != NULL;
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index 5b72eebd8764..27126b136b5a 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -591,12 +591,21 @@ xfs_cui_item_recover(
>  	return error;
>  }
>  
> +STATIC bool
> +xfs_cui_item_match(
> +	struct xfs_log_item	*lip,
> +	uint64_t		intent_id)
> +{
> +	return CUI_ITEM(lip)->cui_format.cui_id == intent_id;
> +}
> +
>  static const struct xfs_item_ops xfs_cui_item_ops = {
>  	.iop_size	= xfs_cui_item_size,
>  	.iop_format	= xfs_cui_item_format,
>  	.iop_unpin	= xfs_cui_item_unpin,
>  	.iop_release	= xfs_cui_item_release,
>  	.iop_recover	= xfs_cui_item_recover,
> +	.iop_match	= xfs_cui_item_match,
>  };
>  
>  /*
> @@ -684,45 +693,14 @@ xlog_recover_refcount_done_commit_pass2(
>  	xfs_lsn_t			lsn)
>  {
>  	struct xfs_cud_log_format	*cud_formatp;
> -	struct xfs_cui_log_item		*cuip = NULL;
> -	struct xfs_log_item		*lip;
> -	uint64_t			cui_id;
> -	struct xfs_ail_cursor		cur;
> -	struct xfs_ail			*ailp = log->l_ailp;
>  
>  	cud_formatp = item->ri_buf[0].i_addr;
>  	if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
>  		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
>  		return -EFSCORRUPTED;
>  	}
> -	cui_id = cud_formatp->cud_cui_id;
> -
> -	/*
> -	 * Search for the CUI with the id in the CUD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_CUI) {
> -			cuip = (struct xfs_cui_log_item *)lip;
> -			if (cuip->cui_format.cui_id == cui_id) {
> -				/*
> -				 * Drop the CUD reference to the CUI. This
> -				 * removes the CUI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_cui_release(cuip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
>  
> +	xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id);
>  	return 0;
>  }
>  
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index e763dd8ed0a6..3987f217415c 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -606,12 +606,21 @@ xfs_rui_item_recover(
>  	return error;
>  }
>  
> +STATIC bool
> +xfs_rui_item_match(
> +	struct xfs_log_item	*lip,
> +	uint64_t		intent_id)
> +{
> +	return RUI_ITEM(lip)->rui_format.rui_id == intent_id;
> +}
> +
>  static const struct xfs_item_ops xfs_rui_item_ops = {
>  	.iop_size	= xfs_rui_item_size,
>  	.iop_format	= xfs_rui_item_format,
>  	.iop_unpin	= xfs_rui_item_unpin,
>  	.iop_release	= xfs_rui_item_release,
>  	.iop_recover	= xfs_rui_item_recover,
> +	.iop_match	= xfs_rui_item_match,
>  };
>  
>  /*
> @@ -675,42 +684,11 @@ xlog_recover_rmap_done_commit_pass2(
>  	xfs_lsn_t			lsn)
>  {
>  	struct xfs_rud_log_format	*rud_formatp;
> -	struct xfs_rui_log_item		*ruip = NULL;
> -	struct xfs_log_item		*lip;
> -	uint64_t			rui_id;
> -	struct xfs_ail_cursor		cur;
> -	struct xfs_ail			*ailp = log->l_ailp;
>  
>  	rud_formatp = item->ri_buf[0].i_addr;
>  	ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
> -	rui_id = rud_formatp->rud_rui_id;
> -
> -	/*
> -	 * Search for the RUI with the id in the RUD format structure in the
> -	 * AIL.
> -	 */
> -	spin_lock(&ailp->ail_lock);
> -	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
> -	while (lip != NULL) {
> -		if (lip->li_type == XFS_LI_RUI) {
> -			ruip = (struct xfs_rui_log_item *)lip;
> -			if (ruip->rui_format.rui_id == rui_id) {
> -				/*
> -				 * Drop the RUD reference to the RUI. This
> -				 * removes the RUI from the AIL and frees it.
> -				 */
> -				spin_unlock(&ailp->ail_lock);
> -				xfs_rui_release(ruip);
> -				spin_lock(&ailp->ail_lock);
> -				break;
> -			}
> -		}
> -		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> -	}
> -
> -	xfs_trans_ail_cursor_done(&cur);
> -	spin_unlock(&ailp->ail_lock);
>  
> +	xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id);
>  	return 0;
>  }
>  
> diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
> index 3f6a79108991..3e8808bb07c5 100644
> --- a/fs/xfs/xfs_trans.h
> +++ b/fs/xfs/xfs_trans.h
> @@ -78,6 +78,7 @@ struct xfs_item_ops {
>  	xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
>  	void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
>  	int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp);
> +	bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
>  };
>  
>  /*
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item
  2020-05-05  1:12 ` [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item Darrick J. Wong
@ 2020-05-06  4:45   ` Chandan Babu R
  2020-05-06 15:32   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-06  4:45 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:42:59 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Rename XFS_{EFI,BUI,RUI,CUI}_RECOVERED to XFS_LI_RECOVERED so that we
> track recovery status in the log item, then get rid of the now unused
> flags fields in each of those log item types.
>

The functionality is the same as was before applying this patch.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_bmap_item.c     |   10 +++++-----
>  fs/xfs/xfs_bmap_item.h     |    6 ------
>  fs/xfs/xfs_extfree_item.c  |    8 ++++----
>  fs/xfs/xfs_extfree_item.h  |    6 ------
>  fs/xfs/xfs_refcount_item.c |    8 ++++----
>  fs/xfs/xfs_refcount_item.h |    6 ------
>  fs/xfs/xfs_rmap_item.c     |    8 ++++----
>  fs/xfs/xfs_rmap_item.h     |    6 ------
>  fs/xfs/xfs_trans.h         |    4 +++-
>  9 files changed, 20 insertions(+), 42 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 090dc1c53c92..8dd157fc44fa 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -441,11 +441,11 @@ xfs_bui_recover(
>  	struct xfs_bmbt_irec		irec;
>  	struct xfs_mount		*mp = parent_tp->t_mountp;
>  
> -	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
> +	ASSERT(!test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags));
>  
>  	/* Only one mapping operation per BUI... */
>  	if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
> -		set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
> +		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
>  		xfs_bui_release(buip);
>  		return -EFSCORRUPTED;
>  	}
> @@ -479,7 +479,7 @@ xfs_bui_recover(
>  		 * This will pull the BUI from the AIL and
>  		 * free the memory associated with it.
>  		 */
> -		set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
> +		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
>  		xfs_bui_release(buip);
>  		return -EFSCORRUPTED;
>  	}
> @@ -537,7 +537,7 @@ xfs_bui_recover(
>  		xfs_bmap_unmap_extent(tp, ip, &irec);
>  	}
>  
> -	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
> +	set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
>  	xfs_defer_move(parent_tp, tp);
>  	error = xfs_trans_commit(tp);
>  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> @@ -568,7 +568,7 @@ xfs_bui_item_recover(
>  	/*
>  	 * Skip BUIs that we've already processed.
>  	 */
> -	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
> +	if (test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags))
>  		return 0;
>  
>  	spin_unlock(&ailp->ail_lock);
> diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
> index 44d06e62f8f9..b9be62f8bd52 100644
> --- a/fs/xfs/xfs_bmap_item.h
> +++ b/fs/xfs/xfs_bmap_item.h
> @@ -32,11 +32,6 @@ struct kmem_zone;
>   */
>  #define	XFS_BUI_MAX_FAST_EXTENTS	1
>  
> -/*
> - * Define BUI flag bits. Manipulated by set/clear/test_bit operators.
> - */
> -#define	XFS_BUI_RECOVERED		1
> -
>  /*
>   * This is the "bmap update intent" log item.  It is used to log the fact that
>   * some reverse mappings need to change.  It is used in conjunction with the
> @@ -49,7 +44,6 @@ struct xfs_bui_log_item {
>  	struct xfs_log_item		bui_item;
>  	atomic_t			bui_refcount;
>  	atomic_t			bui_next_extent;
> -	unsigned long			bui_flags;	/* misc flags */
>  	struct xfs_bui_log_format	bui_format;
>  };
>  
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index dc6ebb5fb8d3..635c99fdda85 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -592,7 +592,7 @@ xfs_efi_recover(
>  	xfs_extent_t		*extp;
>  	xfs_fsblock_t		startblock_fsb;
>  
> -	ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
> +	ASSERT(!test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags));
>  
>  	/*
>  	 * First check the validity of the extents described by the
> @@ -611,7 +611,7 @@ xfs_efi_recover(
>  			 * This will pull the EFI from the AIL and
>  			 * free the memory associated with it.
>  			 */
> -			set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
> +			set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
>  			xfs_efi_release(efip);
>  			return -EFSCORRUPTED;
>  		}
> @@ -632,7 +632,7 @@ xfs_efi_recover(
>  
>  	}
>  
> -	set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
> +	set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
>  	error = xfs_trans_commit(tp);
>  	return error;
>  
> @@ -655,7 +655,7 @@ xfs_efi_item_recover(
>  	 * Skip EFIs that we've already processed.
>  	 */
>  	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
> -	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
> +	if (test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags))
>  		return 0;
>  
>  	spin_unlock(&ailp->ail_lock);
> diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
> index 4b2c2c5c5985..cd2860c875bf 100644
> --- a/fs/xfs/xfs_extfree_item.h
> +++ b/fs/xfs/xfs_extfree_item.h
> @@ -16,11 +16,6 @@ struct kmem_zone;
>   */
>  #define	XFS_EFI_MAX_FAST_EXTENTS	16
>  
> -/*
> - * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
> - */
> -#define	XFS_EFI_RECOVERED	1
> -
>  /*
>   * This is the "extent free intention" log item.  It is used to log the fact
>   * that some extents need to be free.  It is used in conjunction with the
> @@ -54,7 +49,6 @@ struct xfs_efi_log_item {
>  	struct xfs_log_item	efi_item;
>  	atomic_t		efi_refcount;
>  	atomic_t		efi_next_extent;
> -	unsigned long		efi_flags;	/* misc flags */
>  	xfs_efi_log_format_t	efi_format;
>  };
>  
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index fdc18576a023..4b242b3b33a3 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -441,7 +441,7 @@ xfs_cui_recover(
>  	bool				requeue_only = false;
>  	struct xfs_mount		*mp = parent_tp->t_mountp;
>  
> -	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
> +	ASSERT(!test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags));
>  
>  	/*
>  	 * First check the validity of the extents described by the
> @@ -472,7 +472,7 @@ xfs_cui_recover(
>  			 * This will pull the CUI from the AIL and
>  			 * free the memory associated with it.
>  			 */
> -			set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
> +			set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
>  			xfs_cui_release(cuip);
>  			return -EFSCORRUPTED;
>  		}
> @@ -556,7 +556,7 @@ xfs_cui_recover(
>  	}
>  
>  	xfs_refcount_finish_one_cleanup(tp, rcur, error);
> -	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
> +	set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
>  	xfs_defer_move(parent_tp, tp);
>  	error = xfs_trans_commit(tp);
>  	return error;
> @@ -581,7 +581,7 @@ xfs_cui_item_recover(
>  	/*
>  	 * Skip CUIs that we've already processed.
>  	 */
> -	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
> +	if (test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags))
>  		return 0;
>  
>  	spin_unlock(&ailp->ail_lock);
> diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
> index cfaa857673a6..f4f2e836540b 100644
> --- a/fs/xfs/xfs_refcount_item.h
> +++ b/fs/xfs/xfs_refcount_item.h
> @@ -32,11 +32,6 @@ struct kmem_zone;
>   */
>  #define	XFS_CUI_MAX_FAST_EXTENTS	16
>  
> -/*
> - * Define CUI flag bits. Manipulated by set/clear/test_bit operators.
> - */
> -#define	XFS_CUI_RECOVERED		1
> -
>  /*
>   * This is the "refcount update intent" log item.  It is used to log
>   * the fact that some reverse mappings need to change.  It is used in
> @@ -51,7 +46,6 @@ struct xfs_cui_log_item {
>  	struct xfs_log_item		cui_item;
>  	atomic_t			cui_refcount;
>  	atomic_t			cui_next_extent;
> -	unsigned long			cui_flags;	/* misc flags */
>  	struct xfs_cui_log_format	cui_format;
>  };
>  
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index f9cd3ff18736..625eaf954d74 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -480,7 +480,7 @@ xfs_rui_recover(
>  	struct xfs_trans		*tp;
>  	struct xfs_btree_cur		*rcur = NULL;
>  
> -	ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
> +	ASSERT(!test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags));
>  
>  	/*
>  	 * First check the validity of the extents described by the
> @@ -515,7 +515,7 @@ xfs_rui_recover(
>  			 * This will pull the RUI from the AIL and
>  			 * free the memory associated with it.
>  			 */
> -			set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
> +			set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
>  			xfs_rui_release(ruip);
>  			return -EFSCORRUPTED;
>  		}
> @@ -573,7 +573,7 @@ xfs_rui_recover(
>  	}
>  
>  	xfs_rmap_finish_one_cleanup(tp, rcur, error);
> -	set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
> +	set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
>  	error = xfs_trans_commit(tp);
>  	return error;
>  
> @@ -596,7 +596,7 @@ xfs_rui_item_recover(
>  	/*
>  	 * Skip RUIs that we've already processed.
>  	 */
> -	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
> +	if (test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags))
>  		return 0;
>  
>  	spin_unlock(&ailp->ail_lock);
> diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
> index 48a77a6f5c94..31e6cdfff71f 100644
> --- a/fs/xfs/xfs_rmap_item.h
> +++ b/fs/xfs/xfs_rmap_item.h
> @@ -35,11 +35,6 @@ struct kmem_zone;
>   */
>  #define	XFS_RUI_MAX_FAST_EXTENTS	16
>  
> -/*
> - * Define RUI flag bits. Manipulated by set/clear/test_bit operators.
> - */
> -#define	XFS_RUI_RECOVERED		1
> -
>  /*
>   * This is the "rmap update intent" log item.  It is used to log the fact that
>   * some reverse mappings need to change.  It is used in conjunction with the
> @@ -52,7 +47,6 @@ struct xfs_rui_log_item {
>  	struct xfs_log_item		rui_item;
>  	atomic_t			rui_refcount;
>  	atomic_t			rui_next_extent;
> -	unsigned long			rui_flags;	/* misc flags */
>  	struct xfs_rui_log_format	rui_format;
>  };
>  
> diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
> index 3e8808bb07c5..8308bf6d7e40 100644
> --- a/fs/xfs/xfs_trans.h
> +++ b/fs/xfs/xfs_trans.h
> @@ -59,12 +59,14 @@ struct xfs_log_item {
>  #define	XFS_LI_ABORTED	1
>  #define	XFS_LI_FAILED	2
>  #define	XFS_LI_DIRTY	3	/* log item dirty in transaction */
> +#define	XFS_LI_RECOVERED 4	/* log intent item has been recovered */
>  
>  #define XFS_LI_FLAGS \
>  	{ (1 << XFS_LI_IN_AIL),		"IN_AIL" }, \
>  	{ (1 << XFS_LI_ABORTED),	"ABORTED" }, \
>  	{ (1 << XFS_LI_FAILED),		"FAILED" }, \
> -	{ (1 << XFS_LI_DIRTY),		"DIRTY" }
> +	{ (1 << XFS_LI_DIRTY),		"DIRTY" }, \
> +	{ (1 << XFS_LI_RECOVERED),	"RECOVERED" }
>  
>  struct xfs_item_ops {
>  	unsigned flags;
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 24/28] xfs: refactor intent item iop_recover calls
  2020-05-05  1:13 ` [PATCH 24/28] xfs: refactor intent item iop_recover calls Darrick J. Wong
@ 2020-05-06  5:14   ` Chandan Babu R
  2020-05-06 15:34   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-06  5:14 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:43:06 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Now that we've made the recovered item tests all the same, we can hoist
> the test and the ail locking code to the ->iop_recover caller and call
> the recovery function directly.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_bmap_item.c     |   48 ++++++++++++--------------------------------
>  fs/xfs/xfs_extfree_item.c  |   44 ++++++++++------------------------------
>  fs/xfs/xfs_log_recover.c   |    8 ++++++-
>  fs/xfs/xfs_refcount_item.c |   46 +++++++++++-------------------------------
>  fs/xfs/xfs_rmap_item.c     |   45 +++++++++++------------------------------
>  5 files changed, 54 insertions(+), 137 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 8dd157fc44fa..8f0dc6d550d1 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -421,25 +421,26 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
>   * We need to update some inode's bmbt.
>   */
>  STATIC int
> -xfs_bui_recover(
> -	struct xfs_trans		*parent_tp,
> -	struct xfs_bui_log_item		*buip)
> +xfs_bui_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*parent_tp)
>  {
> -	int				error = 0;
> -	unsigned int			bui_type;
> +	struct xfs_bmbt_irec		irec;
> +	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);
> +	struct xfs_trans		*tp;
> +	struct xfs_inode		*ip = NULL;
> +	struct xfs_mount		*mp = parent_tp->t_mountp;
>  	struct xfs_map_extent		*bmap;
> +	struct xfs_bud_log_item		*budp;
>  	xfs_fsblock_t			startblock_fsb;
>  	xfs_fsblock_t			inode_fsb;
>  	xfs_filblks_t			count;
> -	bool				op_ok;
> -	struct xfs_bud_log_item		*budp;
> +	xfs_exntst_t			state;
>  	enum xfs_bmap_intent_type	type;
> +	bool				op_ok;
> +	unsigned int			bui_type;
>  	int				whichfork;
> -	xfs_exntst_t			state;
> -	struct xfs_trans		*tp;
> -	struct xfs_inode		*ip = NULL;
> -	struct xfs_bmbt_irec		irec;
> -	struct xfs_mount		*mp = parent_tp->t_mountp;
> +	int				error = 0;
>  
>  	ASSERT(!test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags));
>  
> @@ -555,29 +556,6 @@ xfs_bui_recover(
>  	return error;
>  }
>  
> -/* Recover the BUI if necessary. */
> -STATIC int
> -xfs_bui_item_recover(
> -	struct xfs_log_item		*lip,
> -	struct xfs_trans		*tp)
> -{
> -	struct xfs_ail			*ailp = lip->li_ailp;
> -	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);
> -	int				error;
> -
> -	/*
> -	 * Skip BUIs that we've already processed.
> -	 */
> -	if (test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_bui_recover(tp, buip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
>  STATIC bool
>  xfs_bui_item_match(
>  	struct xfs_log_item	*lip,
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index 635c99fdda85..ec8a79fe6cab 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -581,16 +581,18 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
>   * the log.  We need to free the extents that it describes.
>   */
>  STATIC int
> -xfs_efi_recover(
> -	struct xfs_mount	*mp,
> -	struct xfs_efi_log_item	*efip)
> +xfs_efi_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*parent_tp)
>  {
> -	struct xfs_efd_log_item	*efdp;
> -	struct xfs_trans	*tp;
> -	int			i;
> -	int			error = 0;
> -	xfs_extent_t		*extp;
> -	xfs_fsblock_t		startblock_fsb;
> +	struct xfs_efi_log_item		*efip = EFI_ITEM(lip);
> +	struct xfs_mount		*mp = parent_tp->t_mountp;
> +	struct xfs_efd_log_item		*efdp;
> +	struct xfs_trans		*tp;
> +	struct xfs_extent		*extp;
> +	xfs_fsblock_t			startblock_fsb;
> +	int				i;
> +	int				error = 0;
>  
>  	ASSERT(!test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags));
>  
> @@ -641,30 +643,6 @@ xfs_efi_recover(
>  	return error;
>  }
>  
> -/* Recover the EFI if necessary. */
> -STATIC int
> -xfs_efi_item_recover(
> -	struct xfs_log_item		*lip,
> -	struct xfs_trans		*tp)
> -{
> -	struct xfs_ail			*ailp = lip->li_ailp;
> -	struct xfs_efi_log_item		*efip;
> -	int				error;
> -
> -	/*
> -	 * Skip EFIs that we've already processed.
> -	 */
> -	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
> -	if (test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_efi_recover(tp->t_mountp, efip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
>  STATIC bool
>  xfs_efi_item_match(
>  	struct xfs_log_item	*lip,
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index a2c03d87c374..8ff957da2845 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2667,7 +2667,7 @@ xlog_recover_process_intents(
>  	struct xfs_ail_cursor	cur;
>  	struct xfs_log_item	*lip;
>  	struct xfs_ail		*ailp;
> -	int			error;
> +	int			error = 0;

'error' variable's value gets set unconditionally by the return value of
xfs_trans_alloc_empty(). Hence it does not need to be initialized
explicitly. This is also seen in the individual ->iop_recover() methods as
well (However those weren't set by this patch).

Apart from the above nit, the rest looks good to me.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

>  #if defined(DEBUG) || defined(XFS_WARN)
>  	xfs_lsn_t		last_lsn;
>  #endif
> @@ -2717,7 +2717,11 @@ xlog_recover_process_intents(
>  		 * this routine or else those subsequent intents will get
>  		 * replayed in the wrong order!
>  		 */
> -		error = lip->li_ops->iop_recover(lip, parent_tp);
> +		if (!test_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
> +			spin_unlock(&ailp->ail_lock);
> +			error = lip->li_ops->iop_recover(lip, parent_tp);
> +			spin_lock(&ailp->ail_lock);
> +		}
>  		if (error)
>  			goto out;
>  		lip = xfs_trans_ail_cursor_next(ailp, &cur);
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index 4b242b3b33a3..fab821fce76b 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -421,25 +421,26 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
>   * We need to update the refcountbt.
>   */
>  STATIC int
> -xfs_cui_recover(
> -	struct xfs_trans		*parent_tp,
> -	struct xfs_cui_log_item		*cuip)
> +xfs_cui_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*parent_tp)
>  {
> -	int				i;
> -	int				error = 0;
> -	unsigned int			refc_type;
> +	struct xfs_bmbt_irec		irec;
> +	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
>  	struct xfs_phys_extent		*refc;
> -	xfs_fsblock_t			startblock_fsb;
> -	bool				op_ok;
>  	struct xfs_cud_log_item		*cudp;
>  	struct xfs_trans		*tp;
>  	struct xfs_btree_cur		*rcur = NULL;
> -	enum xfs_refcount_intent_type	type;
> +	struct xfs_mount		*mp = parent_tp->t_mountp;
> +	xfs_fsblock_t			startblock_fsb;
>  	xfs_fsblock_t			new_fsb;
>  	xfs_extlen_t			new_len;
> -	struct xfs_bmbt_irec		irec;
> +	unsigned int			refc_type;
> +	bool				op_ok;
>  	bool				requeue_only = false;
> -	struct xfs_mount		*mp = parent_tp->t_mountp;
> +	enum xfs_refcount_intent_type	type;
> +	int				i;
> +	int				error = 0;
>  
>  	ASSERT(!test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags));
>  
> @@ -568,29 +569,6 @@ xfs_cui_recover(
>  	return error;
>  }
>  
> -/* Recover the CUI if necessary. */
> -STATIC int
> -xfs_cui_item_recover(
> -	struct xfs_log_item		*lip,
> -	struct xfs_trans		*tp)
> -{
> -	struct xfs_ail			*ailp = lip->li_ailp;
> -	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);
> -	int				error;
> -
> -	/*
> -	 * Skip CUIs that we've already processed.
> -	 */
> -	if (test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_cui_recover(tp, cuip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
>  STATIC bool
>  xfs_cui_item_match(
>  	struct xfs_log_item	*lip,
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index 625eaf954d74..c9233a220551 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -464,21 +464,23 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
>   * We need to update the rmapbt.
>   */
>  STATIC int
> -xfs_rui_recover(
> -	struct xfs_mount		*mp,
> -	struct xfs_rui_log_item		*ruip)
> +xfs_rui_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct xfs_trans		*parent_tp)
>  {
> -	int				i;
> -	int				error = 0;
> +	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);
>  	struct xfs_map_extent		*rmap;
> -	xfs_fsblock_t			startblock_fsb;
> -	bool				op_ok;
>  	struct xfs_rud_log_item		*rudp;
> -	enum xfs_rmap_intent_type	type;
> -	int				whichfork;
> -	xfs_exntst_t			state;
>  	struct xfs_trans		*tp;
>  	struct xfs_btree_cur		*rcur = NULL;
> +	struct xfs_mount		*mp = parent_tp->t_mountp;
> +	xfs_fsblock_t			startblock_fsb;
> +	enum xfs_rmap_intent_type	type;
> +	xfs_exntst_t			state;
> +	bool				op_ok;
> +	int				i;
> +	int				whichfork;
> +	int				error = 0;
>  
>  	ASSERT(!test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags));
>  
> @@ -583,29 +585,6 @@ xfs_rui_recover(
>  	return error;
>  }
>  
> -/* Recover the RUI if necessary. */
> -STATIC int
> -xfs_rui_item_recover(
> -	struct xfs_log_item		*lip,
> -	struct xfs_trans		*tp)
> -{
> -	struct xfs_ail			*ailp = lip->li_ailp;
> -	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);
> -	int				error;
> -
> -	/*
> -	 * Skip RUIs that we've already processed.
> -	 */
> -	if (test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags))
> -		return 0;
> -
> -	spin_unlock(&ailp->ail_lock);
> -	error = xfs_rui_recover(tp->t_mountp, ruip);
> -	spin_lock(&ailp->ail_lock);
> -
> -	return error;
> -}
> -
>  STATIC bool
>  xfs_rui_item_match(
>  	struct xfs_log_item	*lip,
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller
  2020-05-05  1:13 ` [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller Darrick J. Wong
@ 2020-05-06  5:34   ` Chandan Babu R
  2020-05-06 15:35   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-06  5:34 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:43:12 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> The only purpose of XFS_LI_RECOVERED is to prevent log recovery from
> trying to replay recovered intents more than once.  Therefore, we can
> move the bit setting up to the ->iop_recover caller.
>

The functionality is the same as was before applying this patch.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_bmap_item.c     |    5 -----
>  fs/xfs/xfs_extfree_item.c  |    4 ----
>  fs/xfs/xfs_log_recover.c   |    2 +-
>  fs/xfs/xfs_refcount_item.c |    4 ----
>  fs/xfs/xfs_rmap_item.c     |    4 ----
>  5 files changed, 1 insertion(+), 18 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 8f0dc6d550d1..0793c317defb 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -442,11 +442,8 @@ xfs_bui_item_recover(
>  	int				whichfork;
>  	int				error = 0;
>  
> -	ASSERT(!test_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags));
> -
>  	/* Only one mapping operation per BUI... */
>  	if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
> -		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
>  		xfs_bui_release(buip);
>  		return -EFSCORRUPTED;
>  	}
> @@ -480,7 +477,6 @@ xfs_bui_item_recover(
>  		 * This will pull the BUI from the AIL and
>  		 * free the memory associated with it.
>  		 */
> -		set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
>  		xfs_bui_release(buip);
>  		return -EFSCORRUPTED;
>  	}
> @@ -538,7 +534,6 @@ xfs_bui_item_recover(
>  		xfs_bmap_unmap_extent(tp, ip, &irec);
>  	}
>  
> -	set_bit(XFS_LI_RECOVERED, &buip->bui_item.li_flags);
>  	xfs_defer_move(parent_tp, tp);
>  	error = xfs_trans_commit(tp);
>  	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
> index ec8a79fe6cab..b92678bede24 100644
> --- a/fs/xfs/xfs_extfree_item.c
> +++ b/fs/xfs/xfs_extfree_item.c
> @@ -594,8 +594,6 @@ xfs_efi_item_recover(
>  	int				i;
>  	int				error = 0;
>  
> -	ASSERT(!test_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags));
> -
>  	/*
>  	 * First check the validity of the extents described by the
>  	 * EFI.  If any are bad, then assume that all are bad and
> @@ -613,7 +611,6 @@ xfs_efi_item_recover(
>  			 * This will pull the EFI from the AIL and
>  			 * free the memory associated with it.
>  			 */
> -			set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
>  			xfs_efi_release(efip);
>  			return -EFSCORRUPTED;
>  		}
> @@ -634,7 +631,6 @@ xfs_efi_item_recover(
>  
>  	}
>  
> -	set_bit(XFS_LI_RECOVERED, &efip->efi_item.li_flags);
>  	error = xfs_trans_commit(tp);
>  	return error;
>  
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 8ff957da2845..a49435db3be0 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2717,7 +2717,7 @@ xlog_recover_process_intents(
>  		 * this routine or else those subsequent intents will get
>  		 * replayed in the wrong order!
>  		 */
> -		if (!test_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
> +		if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) {
>  			spin_unlock(&ailp->ail_lock);
>  			error = lip->li_ops->iop_recover(lip, parent_tp);
>  			spin_lock(&ailp->ail_lock);
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index fab821fce76b..e6d355a09bb3 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -442,8 +442,6 @@ xfs_cui_item_recover(
>  	int				i;
>  	int				error = 0;
>  
> -	ASSERT(!test_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags));
> -
>  	/*
>  	 * First check the validity of the extents described by the
>  	 * CUI.  If any are bad, then assume that all are bad and
> @@ -473,7 +471,6 @@ xfs_cui_item_recover(
>  			 * This will pull the CUI from the AIL and
>  			 * free the memory associated with it.
>  			 */
> -			set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
>  			xfs_cui_release(cuip);
>  			return -EFSCORRUPTED;
>  		}
> @@ -557,7 +554,6 @@ xfs_cui_item_recover(
>  	}
>  
>  	xfs_refcount_finish_one_cleanup(tp, rcur, error);
> -	set_bit(XFS_LI_RECOVERED, &cuip->cui_item.li_flags);
>  	xfs_defer_move(parent_tp, tp);
>  	error = xfs_trans_commit(tp);
>  	return error;
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index c9233a220551..4a5e2b1cf75a 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -482,8 +482,6 @@ xfs_rui_item_recover(
>  	int				whichfork;
>  	int				error = 0;
>  
> -	ASSERT(!test_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags));
> -
>  	/*
>  	 * First check the validity of the extents described by the
>  	 * RUI.  If any are bad, then assume that all are bad and
> @@ -517,7 +515,6 @@ xfs_rui_item_recover(
>  			 * This will pull the RUI from the AIL and
>  			 * free the memory associated with it.
>  			 */
> -			set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
>  			xfs_rui_release(ruip);
>  			return -EFSCORRUPTED;
>  		}
> @@ -575,7 +572,6 @@ xfs_rui_item_recover(
>  	}
>  
>  	xfs_rmap_finish_one_cleanup(tp, rcur, error);
> -	set_bit(XFS_LI_RECOVERED, &ruip->rui_item.li_flags);
>  	error = xfs_trans_commit(tp);
>  	return error;
>  
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c
  2020-05-05  1:13 ` [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c Darrick J. Wong
@ 2020-05-06  6:42   ` Chandan Babu R
  2020-05-06 15:35   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-06  6:42 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:43:18 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the helpers that handle incore buffer cancellation records to
> xfs_buf_item_recover.c since they're not directly related to the main
> log recovery machinery.  No functional changes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_buf_item_recover.c |  104 +++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_recover.c      |  102 ----------------------------------------
>  2 files changed, 104 insertions(+), 102 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
> index 4ca6d47d6c95..99ec6ebbc7f4 100644
> --- a/fs/xfs/xfs_buf_item_recover.c
> +++ b/fs/xfs/xfs_buf_item_recover.c
> @@ -23,6 +23,110 @@
>  #include "xfs_dir2.h"
>  #include "xfs_quota.h"
>  
> +/*
> + * This structure is used during recovery to record the buf log items which
> + * have been canceled and should not be replayed.
> + */
> +struct xfs_buf_cancel {
> +	xfs_daddr_t		bc_blkno;
> +	uint			bc_len;
> +	int			bc_refcount;
> +	struct list_head	bc_list;
> +};
> +
> +static struct xfs_buf_cancel *
> +xlog_find_buffer_cancelled(
> +	struct xlog		*log,
> +	xfs_daddr_t		blkno,
> +	uint			len)
> +{
> +	struct list_head	*bucket;
> +	struct xfs_buf_cancel	*bcp;
> +
> +	if (!log->l_buf_cancel_table)
> +		return NULL;
> +
> +	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
> +	list_for_each_entry(bcp, bucket, bc_list) {
> +		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
> +			return bcp;
> +	}
> +
> +	return NULL;
> +}
> +
> +bool
> +xlog_add_buffer_cancelled(
> +	struct xlog		*log,
> +	xfs_daddr_t		blkno,
> +	uint			len)

The users of xlog_add_buffer_cancelled() are within xfs_buf_item_recover.c and
hence this can be made static and the corresponding prototype declaration in
xfs_log_recover.h can be removed.

Other than that trivial issue everything else looks good to me.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>


> +{
> +	struct xfs_buf_cancel	*bcp;
> +
> +	/*
> +	 * If we find an existing cancel record, this indicates that the buffer
> +	 * was cancelled multiple times.  To ensure that during pass 2 we keep
> +	 * the record in the table until we reach its last occurrence in the
> +	 * log, a reference count is kept to tell how many times we expect to
> +	 * see this record during the second pass.
> +	 */
> +	bcp = xlog_find_buffer_cancelled(log, blkno, len);
> +	if (bcp) {
> +		bcp->bc_refcount++;
> +		return false;
> +	}
> +
> +	bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
> +	bcp->bc_blkno = blkno;
> +	bcp->bc_len = len;
> +	bcp->bc_refcount = 1;
> +	list_add_tail(&bcp->bc_list, XLOG_BUF_CANCEL_BUCKET(log, blkno));
> +	return true;
> +}
> +
> +/*
> + * Check if there is and entry for blkno, len in the buffer cancel record table.
> + */
> +bool
> +xlog_is_buffer_cancelled(
> +	struct xlog		*log,
> +	xfs_daddr_t		blkno,
> +	uint			len)
> +{
> +	return xlog_find_buffer_cancelled(log, blkno, len) != NULL;
> +}
> +
> +/*
> + * Check if there is and entry for blkno, len in the buffer cancel record table,
> + * and decremented the reference count on it if there is one.
> + *
> + * Remove the cancel record once the refcount hits zero, so that if the same
> + * buffer is re-used again after its last cancellation we actually replay the
> + * changes made at that point.
> + */
> +bool
> +xlog_put_buffer_cancelled(
> +	struct xlog		*log,
> +	xfs_daddr_t		blkno,
> +	uint			len)
> +{
> +	struct xfs_buf_cancel	*bcp;
> +
> +	bcp = xlog_find_buffer_cancelled(log, blkno, len);
> +	if (!bcp) {
> +		ASSERT(0);
> +		return false;
> +	}
> +
> +	if (--bcp->bc_refcount == 0) {
> +		list_del(&bcp->bc_list);
> +		kmem_free(bcp);
> +	}
> +	return true;
> +}
> +
> +/* log buffer item recovery */
> +
>  STATIC enum xlog_recover_reorder
>  xlog_recover_buf_reorder(
>  	struct xlog_recover_item	*item)
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index a49435db3be0..0c8a1f4bf4ad 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -55,17 +55,6 @@ STATIC int
>  xlog_do_recovery_pass(
>          struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
>  
> -/*
> - * This structure is used during recovery to record the buf log items which
> - * have been canceled and should not be replayed.
> - */
> -struct xfs_buf_cancel {
> -	xfs_daddr_t		bc_blkno;
> -	uint			bc_len;
> -	int			bc_refcount;
> -	struct list_head	bc_list;
> -};
> -
>  /*
>   * Sector aligned buffer routines for buffer create/read/write/access
>   */
> @@ -1981,97 +1970,6 @@ xlog_recover_reorder_trans(
>  	return error;
>  }
>  
> -static struct xfs_buf_cancel *
> -xlog_find_buffer_cancelled(
> -	struct xlog		*log,
> -	xfs_daddr_t		blkno,
> -	uint			len)
> -{
> -	struct list_head	*bucket;
> -	struct xfs_buf_cancel	*bcp;
> -
> -	if (!log->l_buf_cancel_table)
> -		return NULL;
> -
> -	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
> -	list_for_each_entry(bcp, bucket, bc_list) {
> -		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
> -			return bcp;
> -	}
> -
> -	return NULL;
> -}
> -
> -bool
> -xlog_add_buffer_cancelled(
> -	struct xlog		*log,
> -	xfs_daddr_t		blkno,
> -	uint			len)
> -{
> -	struct xfs_buf_cancel	*bcp;
> -
> -	/*
> -	 * If we find an existing cancel record, this indicates that the buffer
> -	 * was cancelled multiple times.  To ensure that during pass 2 we keep
> -	 * the record in the table until we reach its last occurrence in the
> -	 * log, a reference count is kept to tell how many times we expect to
> -	 * see this record during the second pass.
> -	 */
> -	bcp = xlog_find_buffer_cancelled(log, blkno, len);
> -	if (bcp) {
> -		bcp->bc_refcount++;
> -		return false;
> -	}
> -
> -	bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
> -	bcp->bc_blkno = blkno;
> -	bcp->bc_len = len;
> -	bcp->bc_refcount = 1;
> -	list_add_tail(&bcp->bc_list, XLOG_BUF_CANCEL_BUCKET(log, blkno));
> -	return true;
> -}
> -
> -/*
> - * Check if there is and entry for blkno, len in the buffer cancel record table.
> - */
> -bool
> -xlog_is_buffer_cancelled(
> -	struct xlog		*log,
> -	xfs_daddr_t		blkno,
> -	uint			len)
> -{
> -	return xlog_find_buffer_cancelled(log, blkno, len) != NULL;
> -}
> -
> -/*
> - * Check if there is and entry for blkno, len in the buffer cancel record table,
> - * and decremented the reference count on it if there is one.
> - *
> - * Remove the cancel record once the refcount hits zero, so that if the same
> - * buffer is re-used again after its last cancellation we actually replay the
> - * changes made at that point.
> - */
> -bool
> -xlog_put_buffer_cancelled(
> -	struct xlog		*log,
> -	xfs_daddr_t		blkno,
> -	uint			len)
> -{
> -	struct xfs_buf_cancel	*bcp;
> -
> -	bcp = xlog_find_buffer_cancelled(log, blkno, len);
> -	if (!bcp) {
> -		ASSERT(0);
> -		return false;
> -	}
> -
> -	if (--bcp->bc_refcount == 0) {
> -		list_del(&bcp->bc_list);
> -		kmem_free(bcp);
> -	}
> -	return true;
> -}
> -
>  void
>  xlog_buf_readahead(
>  	struct xlog		*log,
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 27/28] xfs: remove unnecessary includes from xfs_log_recover.c
  2020-05-05  1:13 ` [PATCH 27/28] xfs: remove unnecessary includes from xfs_log_recover.c Darrick J. Wong
@ 2020-05-06  7:21   ` Chandan Babu R
  0 siblings, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-06  7:21 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Tuesday 5 May 2020 6:43:24 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Remove unnecessary includes from the log recovery code.
> 
> Suggested-by: Christoph Hellwig <hch@infradead.org>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_log_recover.c |    8 --------
>  1 file changed, 8 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 0c8a1f4bf4ad..a9cc546535e0 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -18,21 +18,13 @@
>  #include "xfs_log.h"
>  #include "xfs_log_priv.h"
>  #include "xfs_log_recover.h"
> -#include "xfs_inode_item.h"
> -#include "xfs_extfree_item.h"
>  #include "xfs_trans_priv.h"
>  #include "xfs_alloc.h"
>  #include "xfs_ialloc.h"
> -#include "xfs_quota.h"
>  #include "xfs_trace.h"
>  #include "xfs_icache.h"

Inclusion of xfs_icache.h can be removed as well. I have clarified this by
removing the above statement and successfully building the kernel.

The rest look good to me.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> -#include "xfs_bmap_btree.h"
>  #include "xfs_error.h"
> -#include "xfs_dir2.h"
> -#include "xfs_rmap_item.h"
>  #include "xfs_buf_item.h"
> -#include "xfs_refcount_item.h"
> -#include "xfs_bmap_item.h"
>  
>  #define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
>  
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata
  2020-05-05  1:13 ` [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata Darrick J. Wong
@ 2020-05-06  7:57   ` Chandan Babu R
  2020-05-06 15:36   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Chandan Babu R @ 2020-05-06  7:57 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tuesday 5 May 2020 6:43:30 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Run all the unlinked metadata clearing work in parallel so that we can
> take advantage of higher-performance storage devices.
>

The changes look good to me.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_unlink_recover.c |   42 +++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 39 insertions(+), 3 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> index fe7fa3d623f2..92ea81969e02 100644
> --- a/fs/xfs/xfs_unlink_recover.c
> +++ b/fs/xfs/xfs_unlink_recover.c
> @@ -21,6 +21,7 @@
>  #include "xfs_trans_priv.h"
>  #include "xfs_ialloc.h"
>  #include "xfs_icache.h"
> +#include "xfs_pwork.h"
>  
>  /*
>   * This routine performs a transaction to null out a bad inode pointer
> @@ -195,19 +196,54 @@ xlog_recover_process_iunlinked(
>  	return 0;
>  }
>  
> +struct xlog_recover_unlinked {
> +	struct xfs_pwork	pwork;
> +	xfs_agnumber_t		agno;
> +};
> +
> +static int
> +xlog_recover_process_unlinked_ag(
> +	struct xfs_mount		*mp,
> +	struct xfs_pwork		*pwork)
> +{
> +	struct xlog_recover_unlinked	*ru;
> +	int				error = 0;
> +
> +	ru = container_of(pwork, struct xlog_recover_unlinked, pwork);
> +	if (xfs_pwork_want_abort(pwork))
> +		goto out;
> +
> +	error = xlog_recover_process_iunlinked(mp, ru->agno);
> +out:
> +	kmem_free(ru);
> +	return error;
> +}
> +
>  int
>  xlog_recover_process_unlinked(
>  	struct xlog		*log)
>  {
>  	struct xfs_mount	*mp = log->l_mp;
> +	struct xfs_pwork_ctl	pctl;
> +	struct xlog_recover_unlinked *ru;
> +	unsigned int		nr_threads;
>  	xfs_agnumber_t		agno;
>  	int			error;
>  
> +	nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
> +	error = xfs_pwork_init(mp, &pctl, xlog_recover_process_unlinked_ag,
> +			"xlog_recover", nr_threads);
> +	if (error)
> +		return error;
> +
>  	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> -		error = xlog_recover_process_iunlinked(mp, agno);
> -		if (error)
> +		if (xfs_pwork_ctl_want_abort(&pctl))
>  			break;
> +
> +		ru = kmem_zalloc(sizeof(struct xlog_recover_unlinked), 0);
> +		ru->agno = agno;
> +		xfs_pwork_queue(&pctl, &ru->pwork);
>  	}
>  
> -	return error;
> +	return xfs_pwork_destroy(&pctl);
>  }
> 
> 


-- 
chandan




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item
  2020-05-05  1:10 ` [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item Darrick J. Wong
  2020-05-05  3:33   ` Chandan Babu R
@ 2020-05-06 14:59   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 14:59 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:10:39PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Remove the old typedefs.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure
  2020-05-05  1:10 ` [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure Darrick J. Wong
  2020-05-05  4:11   ` Chandan Babu R
@ 2020-05-06 15:03   ` Christoph Hellwig
  2020-05-06 18:36     ` Darrick J. Wong
  1 sibling, 1 reply; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:03 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:10:45PM -0700, Darrick J. Wong wrote:
> +const struct xlog_recover_item_ops xlog_bmap_intent_item_ops = {
> +	.item_type		= XFS_LI_BUI,
> +};
> +
> +const struct xlog_recover_item_ops xlog_bmap_done_item_ops = {
> +	.item_type		= XFS_LI_BUD,
> +};

Pretty much everything else in this file seems to use bui/bud names.
The same also applies to the four other intent/done pairs and their
shortnames.  Not really a major thing, but it might be worth fixing
to fit the flow.

> +STATIC enum xlog_recover_reorder
> +xlog_recover_buf_reorder(
> +	struct xlog_recover_item	*item)
> +{
> +	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
> +
> +	if (buf_f->blf_flags & XFS_BLF_CANCEL)
> +		return XLOG_REORDER_CANCEL_LIST;
> +	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
> +		return XLOG_REORDER_INODE_BUFFER_LIST;
> +	return XLOG_REORDER_BUFFER_LIST;
> +}

While you split this out a comment explaining the reordering would
be nice here.

Otherwise this looks great:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions
  2020-05-05  1:10 ` [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions Darrick J. Wong
  2020-05-05  4:32   ` Chandan Babu R
@ 2020-05-06 15:04   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:04 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:10:51PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the pass2 readhead code into the per-item source code files and use
> the dispatch function to call them.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions
  2020-05-05  1:10 ` [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions Darrick J. Wong
  2020-05-05  4:40   ` Chandan Babu R
@ 2020-05-06 15:07   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:07 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

>  	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
> +		trace_xfs_log_recover_item_recover(log, trans, item, pass);
> +
> +		if (!item->ri_ops) {
> +			xfs_warn(log->l_mp, "%s: invalid item type (%d)",
> +				__func__, ITEM_TYPE(item));
> +			ASSERT(0);
> +			return -EFSCORRUPTED;
> +		}

Given that we check for ri_ops during the reorder phase this can't
happen.  I think we should remove this check.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 " Darrick J. Wong
  2020-05-05  5:03   ` Chandan Babu R
@ 2020-05-06 15:09   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:11:03PM -0700, Darrick J. Wong wrote:
> +	if (item->ri_ops && item->ri_ops->commit_pass2)
> +		return item->ri_ops->commit_pass2(log, buffer_list, item,
> +				trans->r_lsn);
> +

I don't think ri_ops can ever be NULL here, so the check should be
removed.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 06/28] xfs: refactor log recovery inode item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 06/28] xfs: refactor log recovery inode " Darrick J. Wong
  2020-05-05  5:09   ` Chandan Babu R
@ 2020-05-06 15:10   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:10 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:11:10PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the log inode item pass2 commit code into the per-item source code
> files and use the dispatch function to call it.  We do these one at a
> time because there's a lot of code to move.  No functional changes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 07/28] xfs: refactor log recovery dquot item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 07/28] xfs: refactor log recovery dquot " Darrick J. Wong
  2020-05-05  5:13   ` Chandan Babu R
@ 2020-05-06 15:11   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:11 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:11:16PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the log dquot item pass2 commit code into the per-item source code
> files and use the dispatch function to call it.  We do these one at a
> time because there's a lot of code to move.  No functional changes.

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 08/28] xfs: refactor log recovery icreate item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 08/28] xfs: refactor log recovery icreate " Darrick J. Wong
  2020-05-05  6:10   ` Chandan Babu R
@ 2020-05-06 15:11   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:11 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:11:22PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the log icreate item pass2 commit code into the per-item source code
> files and use the dispatch function to call it.  We do these one at a
> time because there's a lot of code to move.  No functional changes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 09/28] xfs: refactor log recovery EFI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 09/28] xfs: refactor log recovery EFI " Darrick J. Wong
  2020-05-05  6:46   ` Chandan Babu R
@ 2020-05-06 15:12   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:12 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 10/28] xfs: refactor log recovery RUI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 10/28] xfs: refactor log recovery RUI " Darrick J. Wong
  2020-05-05  7:02   ` Chandan Babu R
@ 2020-05-06 15:12   ` Christoph Hellwig
  2020-05-06 15:13   ` Christoph Hellwig
  2 siblings, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:12 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:11:35PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the rmap update intent and intent-done pass2 commit code into the
> per-item source code files and use dispatch functions to call them.  We
> do these one at a time because there's a lot of code to move.  No
> functional changes.

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 10/28] xfs: refactor log recovery RUI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 10/28] xfs: refactor log recovery RUI " Darrick J. Wong
  2020-05-05  7:02   ` Chandan Babu R
  2020-05-06 15:12   ` Christoph Hellwig
@ 2020-05-06 15:13   ` Christoph Hellwig
  2 siblings, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:13 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 11/28] xfs: refactor log recovery CUI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 11/28] xfs: refactor log recovery CUI " Darrick J. Wong
  2020-05-05  7:06   ` Chandan Babu R
@ 2020-05-06 15:13   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:13 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:11:41PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the refcount update intent and intent-done pass2 commit code into
> the per-item source code files and use dispatch functions to call them.
> We do these one at a time because there's a lot of code to move.  No
> functional changes.

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 12/28] xfs: refactor log recovery BUI item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 12/28] xfs: refactor log recovery BUI " Darrick J. Wong
  2020-05-05  7:14   ` Chandan Babu R
@ 2020-05-06 15:14   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:14 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:11:47PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the bmap update intent and intent-done pass2 commit code into the
> per-item source code files and use dispatch functions to call them.  We
> do these one at a time because there's a lot of code to move.  No
> functional changes.

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 13/28] xfs: remove log recovery quotaoff item dispatch for pass2 commit functions
  2020-05-05  1:11 ` [PATCH 13/28] xfs: remove log recovery quotaoff " Darrick J. Wong
  2020-05-05  7:32   ` Chandan Babu R
@ 2020-05-06 15:16   ` Christoph Hellwig
  2020-05-06 16:48     ` Darrick J. Wong
  1 sibling, 1 reply; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:16 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

> diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
> index 07ff943972a3..a07c1c8344d8 100644
> --- a/fs/xfs/xfs_dquot_item_recover.c
> +++ b/fs/xfs/xfs_dquot_item_recover.c
> @@ -197,4 +197,5 @@ xlog_recover_quotaoff_commit_pass1(
>  const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
>  	.item_type		= XFS_LI_QUOTAOFF,
>  	.commit_pass1		= xlog_recover_quotaoff_commit_pass1,
> +	.commit_pass2		= NULL, /* nothing to do in pass2 */

No need to initialize 0 or NULL fields in static structures.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 14/28] xfs: refactor recovered EFI log item playback
  2020-05-05  1:12 ` [PATCH 14/28] xfs: refactor recovered EFI log item playback Darrick J. Wong
  2020-05-05  9:03   ` Chandan Babu R
@ 2020-05-06 15:18   ` Christoph Hellwig
  2020-05-06 18:59     ` Darrick J. Wong
  1 sibling, 1 reply; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:18 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

> +static const struct xfs_item_ops xfs_efi_item_ops = {
> +	.iop_size	= xfs_efi_item_size,
> +	.iop_format	= xfs_efi_item_format,
> +	.iop_unpin	= xfs_efi_item_unpin,
> +	.iop_release	= xfs_efi_item_release,
> +	.iop_recover	= xfs_efi_item_recover,
> +};
> +
> +

I guess we can drop the second empty line here.

>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
> -			error = xlog_recover_process_efi(log->l_mp, ailp, lip);
> +			error = lip->li_ops->iop_recover(lip, parent_tp);
>  			break;
>  		case XFS_LI_RUI:
>  			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
> @@ -2893,7 +2853,9 @@ xlog_recover_cancel_intents(
>  
>  		switch (lip->li_type) {
>  		case XFS_LI_EFI:
> -			xlog_recover_cancel_efi(log->l_mp, ailp, lip);
> +			spin_unlock(&ailp->ail_lock);
> +			lip->li_ops->iop_release(lip);
> +			spin_lock(&ailp->ail_lock);

This looks a little weird, as I'd expect the default statement
to handle the "generic" case.  But then again this is all transitional,
so except for minor nitpick above:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 15/28] xfs: refactor recovered RUI log item playback
  2020-05-05  1:12 ` [PATCH 15/28] xfs: refactor recovered RUI " Darrick J. Wong
  2020-05-05  9:10   ` Chandan Babu R
@ 2020-05-06 15:18   ` Christoph Hellwig
  2020-05-06 15:19   ` Christoph Hellwig
  2 siblings, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:18 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:12:07PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the code that processes the log items created from the recovered
> log items into the per-item source code files and use dispatch functions
> to call them.  No functional changes.

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 15/28] xfs: refactor recovered RUI log item playback
  2020-05-05  1:12 ` [PATCH 15/28] xfs: refactor recovered RUI " Darrick J. Wong
  2020-05-05  9:10   ` Chandan Babu R
  2020-05-06 15:18   ` Christoph Hellwig
@ 2020-05-06 15:19   ` Christoph Hellwig
  2 siblings, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:19 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 16/28] xfs: refactor recovered CUI log item playback
  2020-05-05  1:12 ` [PATCH 16/28] xfs: refactor recovered CUI " Darrick J. Wong
  2020-05-05  9:29   ` Chandan Babu R
@ 2020-05-06 15:19   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:19 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 17/28] xfs: refactor recovered BUI log item playback
  2020-05-05  1:12 ` [PATCH 17/28] xfs: refactor recovered BUI " Darrick J. Wong
  2020-05-05  9:49   ` Chandan Babu R
@ 2020-05-06 15:21   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:21 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

The xlog_item_is_intent change needs to be documented, or even better
split into a separate patch.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 18/28] xfs: refactor unlinked inode recovery
  2020-05-05  1:12 ` [PATCH 18/28] xfs: refactor unlinked inode recovery Darrick J. Wong
  2020-05-05 13:05   ` Chandan Babu R
@ 2020-05-06 15:26   ` Christoph Hellwig
  2020-05-06 16:51     ` Darrick J. Wong
  1 sibling, 1 reply; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:26 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:12:29PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the code that processes unlinked inodes into a separate file in
> preparation for centralizing the log recovery bits that have to walk
> every AG.  No functional changes.

Is this really worth another tiny source file?

At least the interface seems very right.

> +out_error:
> +	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
> +	return;
> +}

No need for a return at the end of a void function.

> +	struct xfs_mount	*mp;
> +	struct xfs_agi		*agi;
> +	struct xfs_buf		*agibp;
> +	xfs_agnumber_t		agno;
> +	xfs_agino_t		agino;
> +	int			bucket;
> +	int			error;
> +
> +	mp = log->l_mp;

Please initialize mp on the line where it is declared.

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked
  2020-05-05  1:12 ` [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked Darrick J. Wong
  2020-05-05 13:19   ` Chandan Babu R
@ 2020-05-06 15:27   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:27 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:12:35PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Hoist the unlinked inode processing logic out of the AG loop and into
> its own function.  No functional changes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_unlink_recover.c |   91 +++++++++++++++++++++++++------------------
>  1 file changed, 52 insertions(+), 39 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> index 2a19d096e88d..413b34085640 100644
> --- a/fs/xfs/xfs_unlink_recover.c
> +++ b/fs/xfs/xfs_unlink_recover.c
> @@ -145,54 +145,67 @@ xlog_recover_process_one_iunlink(
>   * scheduled on this CPU to ensure other scheduled work can run without undue
>   * latency.
>   */
> -void
> -xlog_recover_process_unlinked(
> -	struct xlog		*log)
> +STATIC int
> +xlog_recover_process_iunlinked(

xlog_recover_process_ag_iunlinked?


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 20/28] xfs: report iunlink recovery failure upwards
  2020-05-05  1:12 ` [PATCH 20/28] xfs: report iunlink recovery failure upwards Darrick J. Wong
  2020-05-05 13:43   ` Chandan Babu R
@ 2020-05-06 15:27   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:27 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:12:41PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> If we fail to recover unlinked inodes due to corruption or whatnot, we
> should report this upwards and fail the mount instead of continuing on
> like nothing's wrong.  Eventually the user will trip over the busted
> AGI anyway.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 21/28] xfs: refactor releasing finished intents during log recovery
  2020-05-05  1:12 ` [PATCH 21/28] xfs: refactor releasing finished intents during log recovery Darrick J. Wong
  2020-05-06  4:06   ` Chandan Babu R
@ 2020-05-06 15:29   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:29 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:12:47PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Replace the open-coded AIL item walking with a proper helper when we're
> trying to release an intent item that has been finished.

The changelog should probably mention the addition of the new iop_match
method.  

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 22/28] xfs: refactor adding recovered intent items to the log
  2020-05-05  1:12 ` [PATCH 22/28] xfs: refactor adding recovered intent items to the log Darrick J. Wong
@ 2020-05-06 15:31   ` Christoph Hellwig
  2020-05-06 19:28     ` Darrick J. Wong
  0 siblings, 1 reply; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:31 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:12:53PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> During recovery, every intent that we recover from the log has to be
> added to the AIL.  Replace the open-coded addition with a helper.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>

Second thoughts: given that the helper is totally generic, maybe
name it xfs_trans_ail_insert and keep it in the generic AIL code?

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item
  2020-05-05  1:12 ` [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item Darrick J. Wong
  2020-05-06  4:45   ` Chandan Babu R
@ 2020-05-06 15:32   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:32 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:12:59PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Rename XFS_{EFI,BUI,RUI,CUI}_RECOVERED to XFS_LI_RECOVERED so that we
> track recovery status in the log item, then get rid of the now unused
> flags fields in each of those log item types.

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 24/28] xfs: refactor intent item iop_recover calls
  2020-05-05  1:13 ` [PATCH 24/28] xfs: refactor intent item iop_recover calls Darrick J. Wong
  2020-05-06  5:14   ` Chandan Babu R
@ 2020-05-06 15:34   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:34 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:13:06PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Now that we've made the recovered item tests all the same, we can hoist
> the test and the ail locking code to the ->iop_recover caller and call
> the recovery function directly.

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller
  2020-05-05  1:13 ` [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller Darrick J. Wong
  2020-05-06  5:34   ` Chandan Babu R
@ 2020-05-06 15:35   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:35 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:13:12PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> The only purpose of XFS_LI_RECOVERED is to prevent log recovery from
> trying to replay recovered intents more than once.  Therefore, we can
> move the bit setting up to the ->iop_recover caller.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c
  2020-05-05  1:13 ` [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c Darrick J. Wong
  2020-05-06  6:42   ` Chandan Babu R
@ 2020-05-06 15:35   ` Christoph Hellwig
  1 sibling, 0 replies; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:35 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:13:18PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Move the helpers that handle incore buffer cancellation records to
> xfs_buf_item_recover.c since they're not directly related to the main
> log recovery machinery.  No functional changes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata
  2020-05-05  1:13 ` [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata Darrick J. Wong
  2020-05-06  7:57   ` Chandan Babu R
@ 2020-05-06 15:36   ` Christoph Hellwig
  2020-05-06 16:54     ` Darrick J. Wong
  1 sibling, 1 reply; 94+ messages in thread
From: Christoph Hellwig @ 2020-05-06 15:36 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Mon, May 04, 2020 at 06:13:30PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Run all the unlinked metadata clearing work in parallel so that we can
> take advantage of higher-performance storage devices.

Can you keep this out of the series (and maybe the whole iunlink move)?
The series already is huge, no need to add performance work to the huge
refactoring bucket.

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 13/28] xfs: remove log recovery quotaoff item dispatch for pass2 commit functions
  2020-05-06 15:16   ` Christoph Hellwig
@ 2020-05-06 16:48     ` Darrick J. Wong
  0 siblings, 0 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-06 16:48 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 06, 2020 at 08:16:11AM -0700, Christoph Hellwig wrote:
> > diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
> > index 07ff943972a3..a07c1c8344d8 100644
> > --- a/fs/xfs/xfs_dquot_item_recover.c
> > +++ b/fs/xfs/xfs_dquot_item_recover.c
> > @@ -197,4 +197,5 @@ xlog_recover_quotaoff_commit_pass1(
> >  const struct xlog_recover_item_ops xlog_quotaoff_item_ops = {
> >  	.item_type		= XFS_LI_QUOTAOFF,
> >  	.commit_pass1		= xlog_recover_quotaoff_commit_pass1,
> > +	.commit_pass2		= NULL, /* nothing to do in pass2 */
> 
> No need to initialize 0 or NULL fields in static structures.

Ok, I'll trim this line to only have the comment, to make it explicit
that quotaoff does no work for commit_pass2.

--D

> Otherwise looks good:
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 18/28] xfs: refactor unlinked inode recovery
  2020-05-06 15:26   ` Christoph Hellwig
@ 2020-05-06 16:51     ` Darrick J. Wong
  0 siblings, 0 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-06 16:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 06, 2020 at 08:26:09AM -0700, Christoph Hellwig wrote:
> On Mon, May 04, 2020 at 06:12:29PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Move the code that processes unlinked inodes into a separate file in
> > preparation for centralizing the log recovery bits that have to walk
> > every AG.  No functional changes.
> 
> Is this really worth another tiny source file?

Later I plan to move into this file the code that cleans out stale COW
staging extents, since it should only be necessary to do that as part of
log recovery.

> At least the interface seems very right.
> 
> > +out_error:
> > +	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
> > +	return;
> > +}
> 
> No need for a return at the end of a void function.
> 
> > +	struct xfs_mount	*mp;
> > +	struct xfs_agi		*agi;
> > +	struct xfs_buf		*agibp;
> > +	xfs_agnumber_t		agno;
> > +	xfs_agino_t		agino;
> > +	int			bucket;
> > +	int			error;
> > +
> > +	mp = log->l_mp;
> 
> Please initialize mp on the line where it is declared.

Ok, will fix.

--D

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata
  2020-05-06 15:36   ` Christoph Hellwig
@ 2020-05-06 16:54     ` Darrick J. Wong
  0 siblings, 0 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-06 16:54 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 06, 2020 at 08:36:32AM -0700, Christoph Hellwig wrote:
> On Mon, May 04, 2020 at 06:13:30PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Run all the unlinked metadata clearing work in parallel so that we can
> > take advantage of higher-performance storage devices.
> 
> Can you keep this out of the series (and maybe the whole iunlink move)?
> The series already is huge, no need to add performance work to the huge
> refactoring bucket.

Ok.  I'll make the unlinks clearing patches a separate series.

--D

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure
  2020-05-06 15:03   ` Christoph Hellwig
@ 2020-05-06 18:36     ` Darrick J. Wong
  0 siblings, 0 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-06 18:36 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 06, 2020 at 08:03:24AM -0700, Christoph Hellwig wrote:
> On Mon, May 04, 2020 at 06:10:45PM -0700, Darrick J. Wong wrote:
> > +const struct xlog_recover_item_ops xlog_bmap_intent_item_ops = {
> > +	.item_type		= XFS_LI_BUI,
> > +};
> > +
> > +const struct xlog_recover_item_ops xlog_bmap_done_item_ops = {
> > +	.item_type		= XFS_LI_BUD,
> > +};
> 
> Pretty much everything else in this file seems to use bui/bud names.
> The same also applies to the four other intent/done pairs and their
> shortnames.  Not really a major thing, but it might be worth fixing
> to fit the flow.

Ok.

> > +STATIC enum xlog_recover_reorder
> > +xlog_recover_buf_reorder(
> > +	struct xlog_recover_item	*item)
> > +{
> > +	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
> > +
> > +	if (buf_f->blf_flags & XFS_BLF_CANCEL)
> > +		return XLOG_REORDER_CANCEL_LIST;
> > +	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
> > +		return XLOG_REORDER_INODE_BUFFER_LIST;
> > +	return XLOG_REORDER_BUFFER_LIST;
> > +}
> 
> While you split this out a comment explaining the reordering would
> be nice here.

Ok.

/*
 * Sort buffer items for log recovery.  Most buffer items should end up
 * on the buffer list and are recovered first, with the following
 * exceptions:
 *
 * 1. XFS_BLF_CANCEL buffers must be processed last because some log
 *    items might depend on the incor ecancellation record, and
 *    replaying a cancelled buffer item can remove the incore record.
 *
 * 2. XFS_BLF_INODE_BUF buffers are handled after most regular items so
 *    that we replay di_next_unlinked only after flushing the inode
 *    'free' state to the inode buffer.
 *
 * See xlog_recover_reorder_trans for more details.
 */

--D

> Otherwise this looks great:
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 14/28] xfs: refactor recovered EFI log item playback
  2020-05-06 15:18   ` Christoph Hellwig
@ 2020-05-06 18:59     ` Darrick J. Wong
  0 siblings, 0 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-06 18:59 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 06, 2020 at 08:18:12AM -0700, Christoph Hellwig wrote:
> > +static const struct xfs_item_ops xfs_efi_item_ops = {
> > +	.iop_size	= xfs_efi_item_size,
> > +	.iop_format	= xfs_efi_item_format,
> > +	.iop_unpin	= xfs_efi_item_unpin,
> > +	.iop_release	= xfs_efi_item_release,
> > +	.iop_recover	= xfs_efi_item_recover,
> > +};
> > +
> > +
> 
> I guess we can drop the second empty line here.
> 
> >  		switch (lip->li_type) {
> >  		case XFS_LI_EFI:
> > -			error = xlog_recover_process_efi(log->l_mp, ailp, lip);
> > +			error = lip->li_ops->iop_recover(lip, parent_tp);
> >  			break;
> >  		case XFS_LI_RUI:
> >  			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
> > @@ -2893,7 +2853,9 @@ xlog_recover_cancel_intents(
> >  
> >  		switch (lip->li_type) {
> >  		case XFS_LI_EFI:
> > -			xlog_recover_cancel_efi(log->l_mp, ailp, lip);
> > +			spin_unlock(&ailp->ail_lock);
> > +			lip->li_ops->iop_release(lip);
> > +			spin_lock(&ailp->ail_lock);
> 
> This looks a little weird, as I'd expect the default statement
> to handle the "generic" case.  But then again this is all transitional,
> so except for minor nitpick above:

Hmm, that does make more sense; I'll change it.

--D

> Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked
  2020-05-05 13:19   ` Chandan Babu R
  2020-05-05 13:30     ` Chandan Babu R
@ 2020-05-06 19:11     ` Darrick J. Wong
  1 sibling, 0 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-06 19:11 UTC (permalink / raw)
  To: Chandan Babu R; +Cc: linux-xfs

On Tue, May 05, 2020 at 06:49:17PM +0530, Chandan Babu R wrote:
> On Tuesday 5 May 2020 6:42:35 AM IST Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Hoist the unlinked inode processing logic out of the AG loop and into
> > its own function.  No functional changes.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/xfs_unlink_recover.c |   91 +++++++++++++++++++++++++------------------
> >  1 file changed, 52 insertions(+), 39 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> > index 2a19d096e88d..413b34085640 100644
> > --- a/fs/xfs/xfs_unlink_recover.c
> > +++ b/fs/xfs/xfs_unlink_recover.c
> > @@ -145,54 +145,67 @@ xlog_recover_process_one_iunlink(
> >   * scheduled on this CPU to ensure other scheduled work can run without undue
> >   * latency.
> >   */
> > -void
> > -xlog_recover_process_unlinked(
> > -	struct xlog		*log)
> > +STATIC int
> > +xlog_recover_process_iunlinked(
> > +	struct xfs_mount	*mp,
> > +	xfs_agnumber_t		agno)
> >  {
> > -	struct xfs_mount	*mp;
> >  	struct xfs_agi		*agi;
> >  	struct xfs_buf		*agibp;
> > -	xfs_agnumber_t		agno;
> >  	xfs_agino_t		agino;
> >  	int			bucket;
> >  	int			error;
> >  
> > -	mp = log->l_mp;
> > -
> > -	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> > -		/*
> > -		 * Find the agi for this ag.
> > -		 */
> > -		error = xfs_read_agi(mp, NULL, agno, &agibp);
> > -		if (error) {
> > -			/*
> > -			 * AGI is b0rked. Don't process it.
> > -			 *
> > -			 * We should probably mark the filesystem as corrupt
> > -			 * after we've recovered all the ag's we can....
> > -			 */
> > -			continue;
> > -		}
> > +	/*
> > +	 * Find the agi for this ag.
> > +	 */
> > +	error = xfs_read_agi(mp, NULL, agno, &agibp);
> > +	if (error) {
> >  		/*
> > -		 * Unlock the buffer so that it can be acquired in the normal
> > -		 * course of the transaction to truncate and free each inode.
> > -		 * Because we are not racing with anyone else here for the AGI
> > -		 * buffer, we don't even need to hold it locked to read the
> > -		 * initial unlinked bucket entries out of the buffer. We keep
> > -		 * buffer reference though, so that it stays pinned in memory
> > -		 * while we need the buffer.
> > +		 * AGI is b0rked. Don't process it.
> > +		 *
> > +		 * We should probably mark the filesystem as corrupt
> > +		 * after we've recovered all the ag's we can....
> >  		 */
> > -		agi = agibp->b_addr;
> > -		xfs_buf_unlock(agibp);
> > -
> > -		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> > -			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> > -			while (agino != NULLAGINO) {
> > -				agino = xlog_recover_process_one_iunlink(mp,
> > -							agno, agino, bucket);
> > -				cond_resched();
> > -			}
> > +		return error;
> 
> 
> This causes a change in behaviour i.e. an error return from here would cause
> xlog_recover_process_unlinked() to break "loop on all AGs". Before this
> change, XFS would continue to process all the remaining AGs as described by
> the above comment.

Hm, you're right.  I'll make this function return void and then mess
with the return values and whatnot later.

--D

> 
> > +	}
> > +
> > +	/*
> > +	 * Unlock the buffer so that it can be acquired in the normal
> > +	 * course of the transaction to truncate and free each inode.
> > +	 * Because we are not racing with anyone else here for the AGI
> > +	 * buffer, we don't even need to hold it locked to read the
> > +	 * initial unlinked bucket entries out of the buffer. We keep
> > +	 * buffer reference though, so that it stays pinned in memory
> > +	 * while we need the buffer.
> > +	 */
> > +	agi = agibp->b_addr;
> > +	xfs_buf_unlock(agibp);
> > +
> > +	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> > +		agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> > +		while (agino != NULLAGINO) {
> > +			agino = xlog_recover_process_one_iunlink(mp,
> > +						agno, agino, bucket);
> > +			cond_resched();
> >  		}
> > -		xfs_buf_rele(agibp);
> > +	}
> > +	xfs_buf_rele(agibp);
> > +
> > +	return 0;
> > +}
> > +
> > +void
> > +xlog_recover_process_unlinked(
> > +	struct xlog		*log)
> > +{
> > +	struct xfs_mount	*mp = log->l_mp;
> > +	xfs_agnumber_t		agno;
> > +	int			error;
> > +
> > +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> > +		error = xlog_recover_process_iunlinked(mp, agno);
> > +		if (error)
> > +			break;
> >  	}
> >  }
> > 
> > 
> 
> 
> -- 
> chandan
> 
> 
> 

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 22/28] xfs: refactor adding recovered intent items to the log
  2020-05-06 15:31   ` Christoph Hellwig
@ 2020-05-06 19:28     ` Darrick J. Wong
  0 siblings, 0 replies; 94+ messages in thread
From: Darrick J. Wong @ 2020-05-06 19:28 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Wed, May 06, 2020 at 08:31:10AM -0700, Christoph Hellwig wrote:
> On Mon, May 04, 2020 at 06:12:53PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > During recovery, every intent that we recover from the log has to be
> > added to the AIL.  Replace the open-coded addition with a helper.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> 
> Second thoughts: given that the helper is totally generic, maybe
> name it xfs_trans_ail_insert and keep it in the generic AIL code?

Ok, renamed to xfs_trans_ail_insert and moved to xfs_trans_ail.c.

--D

^ permalink raw reply	[flat|nested] 94+ messages in thread

end of thread, other threads:[~2020-05-06 19:28 UTC | newest]

Thread overview: 94+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-05  1:10 [PATCH v3 00/28] xfs: refactor log recovery Darrick J. Wong
2020-05-05  1:10 ` [PATCH 01/28] xfs: convert xfs_log_recover_item_t to struct xfs_log_recover_item Darrick J. Wong
2020-05-05  3:33   ` Chandan Babu R
2020-05-06 14:59   ` Christoph Hellwig
2020-05-05  1:10 ` [PATCH 02/28] xfs: refactor log recovery item sorting into a generic dispatch structure Darrick J. Wong
2020-05-05  4:11   ` Chandan Babu R
2020-05-06 15:03   ` Christoph Hellwig
2020-05-06 18:36     ` Darrick J. Wong
2020-05-05  1:10 ` [PATCH 03/28] xfs: refactor log recovery item dispatch for pass2 readhead functions Darrick J. Wong
2020-05-05  4:32   ` Chandan Babu R
2020-05-06 15:04   ` Christoph Hellwig
2020-05-05  1:10 ` [PATCH 04/28] xfs: refactor log recovery item dispatch for pass1 commit functions Darrick J. Wong
2020-05-05  4:40   ` Chandan Babu R
2020-05-06 15:07   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 05/28] xfs: refactor log recovery buffer item dispatch for pass2 " Darrick J. Wong
2020-05-05  5:03   ` Chandan Babu R
2020-05-06 15:09   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 06/28] xfs: refactor log recovery inode " Darrick J. Wong
2020-05-05  5:09   ` Chandan Babu R
2020-05-06 15:10   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 07/28] xfs: refactor log recovery dquot " Darrick J. Wong
2020-05-05  5:13   ` Chandan Babu R
2020-05-06 15:11   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 08/28] xfs: refactor log recovery icreate " Darrick J. Wong
2020-05-05  6:10   ` Chandan Babu R
2020-05-06 15:11   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 09/28] xfs: refactor log recovery EFI " Darrick J. Wong
2020-05-05  6:46   ` Chandan Babu R
2020-05-06 15:12   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 10/28] xfs: refactor log recovery RUI " Darrick J. Wong
2020-05-05  7:02   ` Chandan Babu R
2020-05-06 15:12   ` Christoph Hellwig
2020-05-06 15:13   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 11/28] xfs: refactor log recovery CUI " Darrick J. Wong
2020-05-05  7:06   ` Chandan Babu R
2020-05-06 15:13   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 12/28] xfs: refactor log recovery BUI " Darrick J. Wong
2020-05-05  7:14   ` Chandan Babu R
2020-05-06 15:14   ` Christoph Hellwig
2020-05-05  1:11 ` [PATCH 13/28] xfs: remove log recovery quotaoff " Darrick J. Wong
2020-05-05  7:32   ` Chandan Babu R
2020-05-06 15:16   ` Christoph Hellwig
2020-05-06 16:48     ` Darrick J. Wong
2020-05-05  1:12 ` [PATCH 14/28] xfs: refactor recovered EFI log item playback Darrick J. Wong
2020-05-05  9:03   ` Chandan Babu R
2020-05-06 15:18   ` Christoph Hellwig
2020-05-06 18:59     ` Darrick J. Wong
2020-05-05  1:12 ` [PATCH 15/28] xfs: refactor recovered RUI " Darrick J. Wong
2020-05-05  9:10   ` Chandan Babu R
2020-05-06 15:18   ` Christoph Hellwig
2020-05-06 15:19   ` Christoph Hellwig
2020-05-05  1:12 ` [PATCH 16/28] xfs: refactor recovered CUI " Darrick J. Wong
2020-05-05  9:29   ` Chandan Babu R
2020-05-05  9:29     ` Chandan Babu R
2020-05-06 15:19   ` Christoph Hellwig
2020-05-05  1:12 ` [PATCH 17/28] xfs: refactor recovered BUI " Darrick J. Wong
2020-05-05  9:49   ` Chandan Babu R
2020-05-06 15:21   ` Christoph Hellwig
2020-05-05  1:12 ` [PATCH 18/28] xfs: refactor unlinked inode recovery Darrick J. Wong
2020-05-05 13:05   ` Chandan Babu R
2020-05-06 15:26   ` Christoph Hellwig
2020-05-06 16:51     ` Darrick J. Wong
2020-05-05  1:12 ` [PATCH 19/28] xfs: refactor xlog_recover_process_unlinked Darrick J. Wong
2020-05-05 13:19   ` Chandan Babu R
2020-05-05 13:30     ` Chandan Babu R
2020-05-06 19:11     ` Darrick J. Wong
2020-05-06 15:27   ` Christoph Hellwig
2020-05-05  1:12 ` [PATCH 20/28] xfs: report iunlink recovery failure upwards Darrick J. Wong
2020-05-05 13:43   ` Chandan Babu R
2020-05-06 15:27   ` Christoph Hellwig
2020-05-05  1:12 ` [PATCH 21/28] xfs: refactor releasing finished intents during log recovery Darrick J. Wong
2020-05-06  4:06   ` Chandan Babu R
2020-05-06 15:29   ` Christoph Hellwig
2020-05-05  1:12 ` [PATCH 22/28] xfs: refactor adding recovered intent items to the log Darrick J. Wong
2020-05-06 15:31   ` Christoph Hellwig
2020-05-06 19:28     ` Darrick J. Wong
2020-05-05  1:12 ` [PATCH 23/28] xfs: refactor intent item RECOVERED flag into the log item Darrick J. Wong
2020-05-06  4:45   ` Chandan Babu R
2020-05-06 15:32   ` Christoph Hellwig
2020-05-05  1:13 ` [PATCH 24/28] xfs: refactor intent item iop_recover calls Darrick J. Wong
2020-05-06  5:14   ` Chandan Babu R
2020-05-06 15:34   ` Christoph Hellwig
2020-05-05  1:13 ` [PATCH 25/28] xfs: hoist setting of XFS_LI_RECOVERED to caller Darrick J. Wong
2020-05-06  5:34   ` Chandan Babu R
2020-05-06 15:35   ` Christoph Hellwig
2020-05-05  1:13 ` [PATCH 26/28] xfs: move log recovery buffer cancellation code to xfs_buf_item_recover.c Darrick J. Wong
2020-05-06  6:42   ` Chandan Babu R
2020-05-06 15:35   ` Christoph Hellwig
2020-05-05  1:13 ` [PATCH 27/28] xfs: remove unnecessary includes from xfs_log_recover.c Darrick J. Wong
2020-05-06  7:21   ` Chandan Babu R
2020-05-05  1:13 ` [PATCH 28/28] xfs: use parallel processing to clear unlinked metadata Darrick J. Wong
2020-05-06  7:57   ` Chandan Babu R
2020-05-06 15:36   ` Christoph Hellwig
2020-05-06 16:54     ` Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.