linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pingfan Liu <kernelfans@gmail.com>
To: linux-xfs@vger.kernel.org
Cc: Pingfan Liu <kernelfans@gmail.com>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Dave Chinner <dchinner@redhat.com>,
	Eric Sandeen <esandeen@redhat.com>,
	Hari Bathini <hbathini@linux.ibm.com>,
	linuxppc-dev@lists.ozlabs.org
Subject: [PATCH] xfs: introduce "metasync" api to sync metadata to fsblock
Date: Sun, 13 Oct 2019 22:37:00 +0800	[thread overview]
Message-ID: <1570977420-3944-1-git-send-email-kernelfans@gmail.com> (raw)

When using fadump (fireware assist dump) mode on powerpc, a mismatch
between grub xfs driver and kernel xfs driver has been obsevered.  Note:
fadump boots up in the following sequence: fireware -> grub reads kernel
and initramfs -> kernel boots.

The process to reproduce this mismatch:
  - On powerpc, boot kernel with fadump=on and edit /etc/kdump.conf.
  - Replacing "path /var/crash" with "path /var/crashnew", then, "kdumpctl
    restart" to rebuild the initramfs. Detail about the rebuilding looks
    like: mkdumprd /boot/initramfs-`uname -r`.img.tmp;
          mv /boot/initramfs-`uname -r`.img.tmp /boot/initramfs-`uname -r`.img
          sync
  - "echo c >/proc/sysrq-trigger".

The result:
The dump image will not be saved under /var/crashnew/* as expected, but
still saved under /var/crash.

The root cause:
As Eric pointed out that on xfs, 'sync' ensures the consistency by writing
back metadata to xlog, but not necessary to fsblock. This raises issue if
grub can not replay the xlog before accessing the xfs files. Since the
above dir entry of initramfs should be saved as inline data with xfs_inode,
so xfs_fs_sync_fs() does not guarantee it written to fsblock.

umount can be used to write metadata fsblock, but the filesystem can not be
umounted if still in use.

There are two ways to fix this mismatch, either grub or xfs. It may be
easier to do this in xfs side by introducing an interface to flush metadata
to fsblock explicitly.

With this patch, metadata can be written to fsblock by:
  # update AIL
  sync
  # new introduced interface to flush metadata to fsblock
  mount -o remount,metasync mountpoint

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Eric Sandeen <esandeen@redhat.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
To: linux-xfs@vger.kernel.org
---
 fs/xfs/xfs_mount.h      |  1 +
 fs/xfs/xfs_super.c      | 15 ++++++++++++++-
 fs/xfs/xfs_trans.h      |  2 ++
 fs/xfs/xfs_trans_ail.c  | 26 +++++++++++++++++++++++++-
 fs/xfs/xfs_trans_priv.h |  1 +
 5 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index fdb60e0..85f32e6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -243,6 +243,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
 						   allocator */
 #define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
+#define XFS_MOUNT_METASYNC	(1ull << 26)	/* write meta to fsblock */
 
 #define XFS_MOUNT_DAX		(1ULL << 62)	/* TEST ONLY! */
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8d1df9f..41df810 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -59,7 +59,7 @@ enum {
 	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
 	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
-	Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
+	Opt_discard, Opt_nodiscard, Opt_dax, Opt_metasync, Opt_err
 };
 
 static const match_table_t tokens = {
@@ -106,6 +106,7 @@ static const match_table_t tokens = {
 	{Opt_discard,	"discard"},	/* Discard unused blocks */
 	{Opt_nodiscard,	"nodiscard"},	/* Do not discard unused blocks */
 	{Opt_dax,	"dax"},		/* Enable direct access to bdev pages */
+	{Opt_metasync,	"metasync"},	/* one shot to write meta to fsblock */
 	{Opt_err,	NULL},
 };
 
@@ -338,6 +339,9 @@ xfs_parseargs(
 			mp->m_flags |= XFS_MOUNT_DAX;
 			break;
 #endif
+		case Opt_metasync:
+			mp->m_flags |= XFS_MOUNT_METASYNC;
+			break;
 		default:
 			xfs_warn(mp, "unknown mount option [%s].", p);
 			return -EINVAL;
@@ -1259,6 +1263,9 @@ xfs_fs_remount(
 			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 			mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
 			break;
+		case Opt_metasync:
+			mp->m_flags |= XFS_MOUNT_METASYNC;
+			break;
 		default:
 			/*
 			 * Logically we would return an error here to prevent
@@ -1286,6 +1293,12 @@ xfs_fs_remount(
 		}
 	}
 
+	if (mp->m_flags & XFS_MOUNT_METASYNC) {
+		xfs_ail_push_sync(mp->m_ail);
+		/* one shot flag */
+		mp->m_flags &= ~XFS_MOUNT_METASYNC;
+	}
+
 	/* ro -> rw */
 	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
 		if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 64d7f17..fcdb902 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -242,6 +242,8 @@ void		xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
 void		xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
 					struct xfs_buf *src_bp);
 
+void		xfs_ail_push_sync(struct xfs_ail *ailp);
+
 extern kmem_zone_t	*xfs_trans_zone;
 
 #endif	/* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6ccfd75..b8d8df1 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -488,7 +488,11 @@ xfsaild_push(
 	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->ail_lock);
 
-	if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
+	if (unlikely(mp->m_flags & XFS_MOUNT_METASYNC)) {
+		xfs_buf_delwri_submit(&ailp->ail_buf_list);
+		ailp->ail_log_flush++;
+		wake_up_all(&ailp->pushed_que);
+	} else if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
 		ailp->ail_log_flush++;
 
 	if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
@@ -641,6 +645,25 @@ xfs_ail_push(
 	wake_up_process(ailp->ail_task);
 }
 
+void
+xfs_ail_push_sync(
+	struct xfs_ail		*ailp)
+{
+	xfs_lsn_t		sync_lsn;
+	DEFINE_WAIT(wait);
+
+	sync_lsn = xfs_ail_max_lsn(ailp);
+	for (;;) {
+		xfs_ail_push(ailp, sync_lsn);
+		prepare_to_wait(&ailp->pushed_que, &wait, TASK_INTERRUPTIBLE);
+		if (XFS_LSN_CMP(READ_ONCE(ailp->ail_target_prev),
+			sync_lsn) >= 0)
+			break;
+		schedule();
+	}
+	finish_wait(&ailp->pushed_que, &wait);
+}
+
 /*
  * Push out all items in the AIL immediately
  */
@@ -834,6 +857,7 @@ xfs_trans_ail_init(
 	spin_lock_init(&ailp->ail_lock);
 	INIT_LIST_HEAD(&ailp->ail_buf_list);
 	init_waitqueue_head(&ailp->ail_empty);
+	init_waitqueue_head(&ailp->pushed_que);
 
 	ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
 			ailp->ail_mount->m_fsname);
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 2e073c1..9fe3cc6 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -61,6 +61,7 @@ struct xfs_ail {
 	int			ail_log_flush;
 	struct list_head	ail_buf_list;
 	wait_queue_head_t	ail_empty;
+	wait_queue_head_t	pushed_que;
 };
 
 /*
-- 
2.7.5


             reply	other threads:[~2019-10-13 14:37 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-13 14:37 Pingfan Liu [this message]
2019-10-13 16:34 ` [PATCH] xfs: introduce "metasync" api to sync metadata to fsblock Darrick J. Wong
2019-10-14  8:33   ` Pingfan Liu
2019-10-14  9:43     ` Jan Kara
2019-10-14 13:23       ` Eric Sandeen
2019-10-14 20:03         ` Jan Kara
2019-10-14 20:09           ` Eric Sandeen
2019-10-15  8:01             ` Christoph Hellwig
2019-10-15 13:10               ` Theodore Y. Ts'o
2019-10-15 16:18                 ` Darrick J. Wong
2019-10-15  2:20           ` Pingfan Liu
2019-10-15  2:12         ` Pingfan Liu
2019-10-14  8:40 ` Christoph Hellwig
2019-10-15  1:56   ` Pingfan Liu
2019-10-15  8:01     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1570977420-3944-1-git-send-email-kernelfans@gmail.com \
    --to=kernelfans@gmail.com \
    --cc=darrick.wong@oracle.com \
    --cc=dchinner@redhat.com \
    --cc=esandeen@redhat.com \
    --cc=hbathini@linux.ibm.com \
    --cc=linux-xfs@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).