[PATCH RFC] xfs: automatic log item relogging experiment

* [PATCH RFC] xfs: automatic log item relogging experiment
@ 2019-10-24 17:28 Brian Foster
  2019-10-24 22:43 ` Dave Chinner
  0 siblings, 1 reply; 7+ messages in thread
From: Brian Foster @ 2019-10-24 17:28 UTC (permalink / raw)
  To: linux-xfs

An experimental mechanism to automatically relog specified log
items.  This is useful for long running operations, like quotaoff,
that otherwise risk deadlocking on log space usage.

Not-signed-off-by: Brian Foster <bfoster@redhat.com>
---

Hi all,

This is an experiment that came out of a discussion with Darrick[1] on
some design bits of the latest online btree repair work. Specifically,
it logs free intents in the same transaction as block allocation to
guard against inconsistency in the event of a crash during the repair
sequence. These intents happen pin the log tail for an indeterminate
amount of time. Darrick was looking for some form of auto relog
mechanism to facilitate this general approach. It occurred to us that
this is the same problem we've had with quotaoff for some time, so I
figured it might be worth prototyping something against that to try and
prove the concept.

Note that this is RFC because the code and interfaces are a complete
mess and this is broken in certain ways. This occasionally triggers log
reservation overrun shutdowns because transaction reservation checking
has not yet been added, the cancellation path is overkill, etc. IOW, the
purpose of this patch is purely to test a concept.

The concept is essentially to flag a log item for relogging on first
transaction commit such that once it commits to the AIL, the next
transaction that happens to commit with sufficient unused reservation
opportunistically relogs the item to the current CIL context. For the
log intent case, the transaction that commits the done item is required
to cancel the relog state of the original intent to prevent further
relogging.

In practice, this allows a log item to automatically roll through CIL
checkpoints and not pin the tail of the log while something like a
quotaoff is running for a potentially long period of time. This is
applied to quotaoff and focused testing shows that it avoids the
associated deadlock.

Thoughts, reviews, flames appreciated.

[1] https://lore.kernel.org/linux-xfs/20191018143856.GA25763@bfoster/

 fs/xfs/xfs_log_cil.c     | 69 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_priv.h    |  6 ++++
 fs/xfs/xfs_qm_syscalls.c | 13 ++++++++
 fs/xfs/xfs_trace.h       |  2 ++
 fs/xfs/xfs_trans.c       |  4 +++
 fs/xfs/xfs_trans.h       |  4 ++-
 6 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index a1204424a938..b2d8b2d54df6 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -75,6 +75,33 @@ xlog_cil_iovec_space(
 			sizeof(uint64_t));
 }
 
+static void
+xlog_cil_relog_items(
+	struct xlog		*log,
+	struct xfs_trans	*tp)
+{
+	struct xfs_cil		*cil = log->l_cilp;
+	struct xfs_log_item	*lip;
+
+	ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
+
+	if (list_empty(&cil->xc_relog))
+		return;
+
+	/* XXX: need to check trans reservation, process multiple items, etc. */
+	spin_lock(&cil->xc_relog_lock);
+	lip = list_first_entry_or_null(&cil->xc_relog, struct xfs_log_item, li_cil);
+	if (lip)
+		list_del_init(&lip->li_cil);
+	spin_unlock(&cil->xc_relog_lock);
+
+	if (lip) {
+		xfs_trans_add_item(tp, lip);
+		set_bit(XFS_LI_DIRTY, &lip->li_flags);
+		trace_xfs_cil_relog(lip);
+	}
+}
+
 /*
  * Allocate or pin log vector buffers for CIL insertion.
  *
@@ -1001,6 +1028,8 @@ xfs_log_commit_cil(
 	struct xfs_log_item	*lip, *next;
 	xfs_lsn_t		xc_commit_lsn;
 
+	xlog_cil_relog_items(log, tp);
+
 	/*
 	 * Do all necessary memory allocation before we lock the CIL.
 	 * This ensures the allocation does not deadlock with a CIL
@@ -1196,6 +1225,8 @@ xlog_cil_init(
 	spin_lock_init(&cil->xc_push_lock);
 	init_rwsem(&cil->xc_ctx_lock);
 	init_waitqueue_head(&cil->xc_commit_wait);
+	INIT_LIST_HEAD(&cil->xc_relog);
+	spin_lock_init(&cil->xc_relog_lock);
 
 	INIT_LIST_HEAD(&ctx->committing);
 	INIT_LIST_HEAD(&ctx->busy_extents);
@@ -1223,3 +1254,41 @@ xlog_cil_destroy(
 	kmem_free(log->l_cilp);
 }
 
+void
+xfs_cil_relog_item(
+	struct xlog		*log,
+	struct xfs_log_item	*lip)
+{
+	struct xfs_cil		*cil = log->l_cilp;
+
+	ASSERT(test_bit(XFS_LI_RELOG, &lip->li_flags));
+	ASSERT(list_empty(&lip->li_cil));
+
+	spin_lock(&cil->xc_relog_lock);
+	list_add_tail(&lip->li_cil, &cil->xc_relog);
+	spin_unlock(&cil->xc_relog_lock);
+
+	trace_xfs_cil_relog_queue(lip);
+}
+
+bool
+xfs_cil_relog_steal(
+	struct xlog		*log,
+	struct xfs_log_item	*lip)
+{
+	struct xfs_cil		*cil = log->l_cilp;
+	struct xfs_log_item	*pos, *ppos;
+	bool			ret = false;
+
+	spin_lock(&cil->xc_relog_lock);
+	list_for_each_entry_safe(pos, ppos, &cil->xc_relog, li_cil) {
+		if (pos == lip) {
+			list_del_init(&pos->li_cil);
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock(&cil->xc_relog_lock);
+
+	return ret;
+}
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4f19375f6592..f75a0a9f6984 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -10,6 +10,7 @@ struct xfs_buf;
 struct xlog;
 struct xlog_ticket;
 struct xfs_mount;
+struct xfs_log_item;
 
 /*
  * Flags for log structure
@@ -275,6 +276,9 @@ struct xfs_cil {
 	wait_queue_head_t	xc_commit_wait;
 	xfs_lsn_t		xc_current_sequence;
 	struct work_struct	xc_push_work;
+
+	struct list_head	xc_relog;
+	spinlock_t		xc_relog_lock;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -511,6 +515,8 @@ int	xlog_cil_init(struct xlog *log);
 void	xlog_cil_init_post_recovery(struct xlog *log);
 void	xlog_cil_destroy(struct xlog *log);
 bool	xlog_cil_empty(struct xlog *log);
+void	xfs_cil_relog_item(struct xlog *log, struct xfs_log_item *lip);
+bool	xfs_cil_relog_steal(struct xlog *log, struct xfs_log_item *lip);
 
 /*
  * CIL force routines
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index da7ad0383037..5e529190029d 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -18,6 +18,8 @@
 #include "xfs_quota.h"
 #include "xfs_qm.h"
 #include "xfs_icache.h"
+#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
 
 STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
 STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
@@ -556,6 +558,16 @@ xfs_qm_log_quotaoff_end(
 					flags & XFS_ALL_QUOTA_ACCT);
 	xfs_trans_log_quotaoff_item(tp, qoffi);
 
+	/*
+	 * XXX: partly open coded relog of the start item to ensure no relogging
+	 * after this point.
+	 */
+	clear_bit(XFS_LI_RELOG, &startqoff->qql_item.li_flags);
+	if (xfs_cil_relog_steal(mp->m_log, &startqoff->qql_item)) {
+		xfs_trans_add_item(tp, &startqoff->qql_item);
+		xfs_trans_log_quotaoff_item(tp, startqoff);
+	}
+
 	/*
 	 * We have to make sure that the transaction is secure on disk before we
 	 * return and actually stop quota accounting. So, make it synchronous.
@@ -583,6 +595,7 @@ xfs_qm_log_quotaoff(
 		goto out;
 
 	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+	set_bit(XFS_LI_RELOG, &qoffi->qql_item.li_flags);
 	xfs_trans_log_quotaoff_item(tp, qoffi);
 
 	spin_lock(&mp->m_sb_lock);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 926f4d10dc02..6fe31a00e362 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1063,6 +1063,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
 DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
 DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
 DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
+DEFINE_LOG_ITEM_EVENT(xfs_cil_relog);
+DEFINE_LOG_ITEM_EVENT(xfs_cil_relog_queue);
 
 DECLARE_EVENT_CLASS(xfs_ail_class,
 	TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f4795fdb7389..95c74c6de4e2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -19,6 +19,7 @@
 #include "xfs_trace.h"
 #include "xfs_error.h"
 #include "xfs_defer.h"
+#include "xfs_log_priv.h"
 
 kmem_zone_t	*xfs_trans_zone;
 
@@ -863,6 +864,9 @@ xfs_trans_committed_bulk(
 		if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
 			continue;
 
+		if (test_bit(XFS_LI_RELOG, &lip->li_flags))
+			xfs_cil_relog_item(lip->li_mountp->m_log, lip);
+
 		/*
 		 * if we are aborting the operation, no point in inserting the
 		 * object into the AIL as we are in a shutdown situation.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 64d7f171ebd3..596102405acf 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -59,12 +59,14 @@ struct xfs_log_item {
 #define	XFS_LI_ABORTED	1
 #define	XFS_LI_FAILED	2
 #define	XFS_LI_DIRTY	3	/* log item dirty in transaction */
+#define XFS_LI_RELOG	4	/* relog item on checkpoint commit */
 
 #define XFS_LI_FLAGS \
 	{ (1 << XFS_LI_IN_AIL),		"IN_AIL" }, \
 	{ (1 << XFS_LI_ABORTED),	"ABORTED" }, \
 	{ (1 << XFS_LI_FAILED),		"FAILED" }, \
-	{ (1 << XFS_LI_DIRTY),		"DIRTY" }
+	{ (1 << XFS_LI_DIRTY),		"DIRTY" }, \
+	{ (1 << XFS_LI_RELOG),		"RELOG" }
 
 struct xfs_item_ops {
 	unsigned flags;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread