All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH v2 2/3] xfs: transaction subsystem quiesce mechanism
Date: Tue,  6 Apr 2021 10:42:37 -0400	[thread overview]
Message-ID: <20210406144238.814558-3-bfoster@redhat.com> (raw)
In-Reply-To: <20210406144238.814558-1-bfoster@redhat.com>

The updated quotaoff logging algorithm depends on a runtime quiesce
of the transaction subsystem to guarantee all transactions after a
certain point detect quota subsystem changes. Implement this
mechanism using an internal lock, similar to the external filesystem
freeze mechanism. This is also somewhat analogous to the old percpu
transaction counter mechanism, but we don't actually need a counter.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_aops.c  |  2 ++
 fs/xfs/xfs_mount.h |  3 +++
 fs/xfs/xfs_super.c |  8 ++++++++
 fs/xfs/xfs_trans.c |  4 ++--
 fs/xfs/xfs_trans.h | 20 ++++++++++++++++++++
 5 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1cc7c36d98e9..dce52943e5a7 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -58,6 +58,7 @@ xfs_setfilesize_trans_alloc(
 	 * we released it.
 	 */
 	__sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
+	percpu_rwsem_release(&mp->m_trans_rwsem, true, _THIS_IP_);
 	/*
 	 * We hand off the transaction to the completion thread now, so
 	 * clear the flag here.
@@ -127,6 +128,7 @@ xfs_setfilesize_ioend(
 	 */
 	xfs_trans_set_context(tp);
 	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
+	percpu_rwsem_acquire(&ip->i_mount->m_trans_rwsem, true, _THIS_IP_);
 
 	/* we abort the update if there was an IO error */
 	if (error) {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 81829d19596e..27a2a53abb4f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -171,6 +171,9 @@ typedef struct xfs_mount {
 	 */
 	struct percpu_counter	m_delalloc_blks;
 
+	/* lock for transaction quiesce (used by quotaoff) */
+	struct percpu_rw_semaphore	m_trans_rwsem;
+
 	struct radix_tree_root	m_perag_tree;	/* per-ag accounting info */
 	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */
 	uint64_t		m_resblks;	/* total reserved blocks */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8d079c5e7099..64feab042dea 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1001,8 +1001,15 @@ xfs_init_percpu_counters(
 	if (error)
 		goto free_fdblocks;
 
+	/* not a counter, but close enough... */
+	error = percpu_init_rwsem(&mp->m_trans_rwsem);
+	if (error)
+		goto free_delalloc;
+
 	return 0;
 
+free_delalloc:
+	percpu_counter_destroy(&mp->m_delalloc_blks);
 free_fdblocks:
 	percpu_counter_destroy(&mp->m_fdblocks);
 free_ifree:
@@ -1025,6 +1032,7 @@ static void
 xfs_destroy_percpu_counters(
 	struct xfs_mount	*mp)
 {
+	percpu_free_rwsem(&mp->m_trans_rwsem);
 	percpu_counter_destroy(&mp->m_icount);
 	percpu_counter_destroy(&mp->m_ifree);
 	percpu_counter_destroy(&mp->m_fdblocks);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bc25afc10245..c46943f0fc77 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -74,7 +74,7 @@ xfs_trans_free(
 	trace_xfs_trans_free(tp, _RET_IP_);
 	xfs_trans_clear_context(tp);
 	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
-		sb_end_intwrite(tp->t_mountp->m_super);
+		xfs_trans_end(tp->t_mountp);
 	xfs_trans_free_dqinfo(tp);
 	kmem_cache_free(xfs_trans_zone, tp);
 }
@@ -265,7 +265,7 @@ xfs_trans_alloc(
 retry:
 	tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
 	if (!(flags & XFS_TRANS_NO_WRITECOUNT))
-		sb_start_intwrite(mp->m_super);
+		xfs_trans_start(mp);
 	xfs_trans_set_context(tp);
 
 	/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9dd745cf77c9..95da3e179150 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -226,6 +226,26 @@ xfs_trans_read_buf(
 				      flags, bpp, ops);
 }
 
+/*
+ * Context tracking helpers for external (i.e. fs freeze) and internal
+ * transaction quiesce.
+ */
+static inline void
+xfs_trans_start(
+	struct xfs_mount	*mp)
+{
+	sb_start_intwrite(mp->m_super);
+	percpu_down_read(&mp->m_trans_rwsem);
+}
+
+static inline void
+xfs_trans_end(
+	struct xfs_mount	*mp)
+{
+	percpu_up_read(&mp->m_trans_rwsem);
+	sb_end_intwrite(mp->m_super);
+}
+
 struct xfs_buf	*xfs_trans_getsb(struct xfs_trans *);
 
 void		xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
-- 
2.26.3


  parent reply	other threads:[~2021-04-06 14:42 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-06 14:42 [PATCH v2 0/3] xfs: rework quotaoff to avoid log deadlock Brian Foster
2021-04-06 14:42 ` [PATCH v2 1/3] xfs: skip dquot reservations if quota is inactive Brian Foster
2021-04-07  7:58   ` Christoph Hellwig
2021-04-07 15:51   ` Darrick J. Wong
2021-04-06 14:42 ` Brian Foster [this message]
2021-04-07  8:00   ` [PATCH v2 2/3] xfs: transaction subsystem quiesce mechanism Christoph Hellwig
2021-04-07 11:36     ` Brian Foster
2021-04-07 13:24       ` Christoph Hellwig
2021-04-07 15:50         ` Darrick J. Wong
2021-04-06 14:42 ` [PATCH v2 3/3] xfs: rework quotaoff logging to avoid log deadlock on active fs Brian Foster

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210406144238.814558-3-bfoster@redhat.com \
    --to=bfoster@redhat.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.