All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH RFC 08/10] xfs: buffer relogging support prototype
Date: Wed,  1 Jul 2020 12:51:14 -0400	[thread overview]
Message-ID: <20200701165116.47344-9-bfoster@redhat.com> (raw)
In-Reply-To: <20200701165116.47344-1-bfoster@redhat.com>

Implement buffer relogging support. There is currently no use case
for buffer relogging. This is for testing and experimental purposes
and serves as an example to demonstrate the ability to relog
arbitrary items in the future, if necessary.

Add helpers to manage relogged buffers, update the buffer log item
push handler to support relogged BLIs and add a log item relog
callback to properly join buffers to the relog transaction. Note
that buffers associated with higher level log items (i.e., inodes
and dquots) are skipped.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_buf.c       |  4 +++
 fs/xfs/xfs_buf_item.c  | 60 ++++++++++++++++++++++++++++++++++----
 fs/xfs/xfs_trans.h     |  5 +++-
 fs/xfs/xfs_trans_buf.c | 66 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 20b748f7e186..eec482204336 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -16,6 +16,8 @@
 #include "xfs_log.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
 
 static kmem_zone_t *xfs_buf_zone;
 
@@ -1500,6 +1502,8 @@ __xfs_buf_submit(
 	trace_xfs_buf_submit(bp, _RET_IP_);
 
 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
+	ASSERT(!bp->b_log_item ||
+	       !test_bit(XFS_LI_RELOG, &bp->b_log_item->bli_item.li_flags));
 
 	/* on shutdown we stale and complete the buffer immediately */
 	if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 9e75e8d6042e..eb827a31b47f 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -16,7 +16,7 @@
 #include "xfs_trans_priv.h"
 #include "xfs_trace.h"
 #include "xfs_log.h"
-
+#include "xfs_log_priv.h"
 
 kmem_zone_t	*xfs_buf_item_zone;
 
@@ -141,7 +141,6 @@ xfs_buf_item_size(
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 	int			i;
 
-	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 	if (bip->bli_flags & XFS_BLI_STALE) {
 		/*
 		 * The buffer is stale, so all we need to log
@@ -157,7 +156,7 @@ xfs_buf_item_size(
 		return;
 	}
 
-	ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
+	ASSERT(bip->bli_flags & XFS_BLI_DIRTY);
 
 	if (bip->bli_flags & XFS_BLI_ORDERED) {
 		/*
@@ -418,6 +417,10 @@ xfs_buf_item_unpin(
 
 	trace_xfs_buf_item_unpin(bip);
 
+	/* cancel relogging on abort before we drop the bli reference */
+	if (remove)
+		xfs_trans_relog_buf_cancel(NULL, bp);
+
 	freed = atomic_dec_and_test(&bip->bli_refcount);
 
 	if (atomic_dec_and_test(&bp->b_pin_count))
@@ -462,6 +465,13 @@ xfs_buf_item_unpin(
 			list_del_init(&bp->b_li_list);
 			bp->b_iodone = NULL;
 		} else {
+			/* racy */
+			ASSERT(!test_bit(XFS_LI_RELOG_QUEUED, &lip->li_flags));
+			if (test_bit(XFS_LI_RELOG, &lip->li_flags)) {
+				atomic_dec(&bp->b_pin_count);
+				xfs_trans_relog_item_cancel(NULL, lip, true);
+			}
+
 			xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
 			xfs_buf_item_relse(bp);
 			ASSERT(bp->b_log_item == NULL);
@@ -488,8 +498,6 @@ xfs_buf_item_push(
 	struct xfs_buf		*bp = bip->bli_buf;
 	uint			rval = XFS_ITEM_SUCCESS;
 
-	if (xfs_buf_ispinned(bp))
-		return XFS_ITEM_PINNED;
 	if (!xfs_buf_trylock(bp)) {
 		/*
 		 * If we have just raced with a buffer being pinned and it has
@@ -503,6 +511,15 @@ xfs_buf_item_push(
 		return XFS_ITEM_LOCKED;
 	}
 
+	/* relog bufs are pinned so check relog state first */
+	if (xfs_item_needs_relog(lip))
+		return XFS_ITEM_RELOG;
+
+	if (xfs_buf_ispinned(bp)) {
+		xfs_buf_unlock(bp);
+		return XFS_ITEM_PINNED;
+	}
+
 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 
 	trace_xfs_buf_item_push(bip);
@@ -532,6 +549,7 @@ xfs_buf_item_put(
 	struct xfs_buf_log_item	*bip)
 {
 	struct xfs_log_item	*lip = &bip->bli_item;
+	struct xfs_buf		*bp = bip->bli_buf;
 	bool			aborted;
 	bool			dirty;
 
@@ -557,8 +575,10 @@ xfs_buf_item_put(
 	 * transaction that invalidated a dirty bli and cleared the dirty
 	 * state.
 	 */
-	if (aborted)
+	if (aborted) {
+		xfs_trans_relog_buf_cancel(NULL, bp);
 		xfs_trans_ail_delete(lip, 0);
+	}
 	xfs_buf_item_relse(bip->bli_buf);
 	return true;
 }
@@ -668,6 +688,28 @@ xfs_buf_item_committed(
 	return lsn;
 }
 
+STATIC void
+xfs_buf_item_relog(
+	struct xfs_log_item	*lip,
+	struct xfs_trans	*tp)
+{
+	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
+	int			res;
+
+	/*
+	 * Grab a reference to the buffer for the transaction before we join
+	 * and dirty it.
+	 */
+	xfs_buf_hold(bip->bli_buf);
+	xfs_trans_bjoin(tp, bip->bli_buf);
+	xfs_trans_dirty_buf(tp, bip->bli_buf);
+
+	res = xfs_relog_calc_res(lip);
+	tp->t_ticket->t_curr_res += res;
+	tp->t_ticket->t_unit_res += res;
+	tp->t_log_res += res;
+}
+
 static const struct xfs_item_ops xfs_buf_item_ops = {
 	.iop_size	= xfs_buf_item_size,
 	.iop_format	= xfs_buf_item_format,
@@ -677,6 +719,7 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
 	.iop_committing	= xfs_buf_item_committing,
 	.iop_committed	= xfs_buf_item_committed,
 	.iop_push	= xfs_buf_item_push,
+	.iop_relog	= xfs_buf_item_relog,
 };
 
 STATIC void
@@ -930,6 +973,11 @@ STATIC void
 xfs_buf_item_free(
 	struct xfs_buf_log_item	*bip)
 {
+	ASSERT(!test_bit(XFS_LI_RELOG, &bip->bli_item.li_flags));
+#ifdef DEBUG
+	ASSERT(!atomic64_read(&bip->bli_item.li_relog_res));
+#endif
+
 	xfs_buf_item_free_format(bip);
 	kmem_free(bip->bli_item.li_lv_shadow);
 	kmem_cache_free(xfs_buf_item_zone, bip);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7f409b0d456a..0262a883969f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -243,7 +243,7 @@ xfs_relog_calc_res(
 	 * xfs_log_calc_unit_res().
 	 */
 	lip->li_ops->iop_size(lip, &niovecs, &nbytes);
-	ASSERT(niovecs == 1);
+	ASSERT(niovecs == 1 || lip->li_type == XFS_LI_BUF);
 
 	nbytes += niovecs * sizeof(xlog_op_header_t);
 	nbytes = xfs_log_calc_unit_res(lip->li_mountp, nbytes);
@@ -262,6 +262,9 @@ void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
 bool		xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
+bool		xfs_trans_relog_buf(struct xfs_trans *, struct xfs_buf *);
+void		xfs_trans_relog_buf_cancel(struct xfs_trans *,
+					   struct xfs_buf *);
 void		xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
 void		xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 08174ffa2118..b5b552a4bcfb 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -588,6 +588,8 @@ xfs_trans_binval(
 		return;
 	}
 
+	/* return relog res before we reset dirty state */
+	xfs_trans_relog_buf_cancel(tp, bp);
 	xfs_buf_stale(bp);
 
 	bip->bli_flags |= XFS_BLI_STALE;
@@ -787,3 +789,67 @@ xfs_trans_dquot_buf(
 
 	xfs_trans_buf_set_type(tp, bp, type);
 }
+
+/*
+ * Enable automatic relogging on a buffer. This essentially pins a dirty buffer
+ * in-core until relogging is disabled.
+ */
+bool
+xfs_trans_relog_buf(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
+{
+	struct xfs_buf_log_item	*bip = bp->b_log_item;
+	enum xfs_blft		blft;
+
+	ASSERT(xfs_buf_islocked(bp));
+
+	if (bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE))
+		return false;
+	/*
+	 * Don't bother with queued buffers since we're about to pin it for an
+	 * indeterminate amount of time and we don't want the responsibility of
+	 * failing it if an abort happens to remove it from the AIL.
+	 */
+	if (bp->b_flags & _XBF_DELWRI_Q)
+		return false;
+
+	/*
+	 * Skip buffers with higher level log items. Those items must be
+	 * relogged directly to move in the log.
+	 */
+	blft = xfs_blft_from_flags(&bip->__bli_format);
+	switch (blft) {
+	case XFS_BLFT_DINO_BUF:
+	case XFS_BLFT_UDQUOT_BUF:
+	case XFS_BLFT_PDQUOT_BUF:
+	case XFS_BLFT_GDQUOT_BUF:
+		return false;
+	default:
+		break;
+	}
+
+	/*
+	 * Relog expects a worst case reservation from ->iop_size. Hack that in
+	 * here by logging the entire buffer in this transaction. Also grab a
+	 * buffer pin to prevent it from being written out.
+	 */
+	xfs_buf_item_log(bip, 0, BBTOB(bp->b_length) - 1);
+	atomic_inc(&bp->b_pin_count);
+	xfs_trans_relog_item(tp, &bip->bli_item);
+	return true;
+}
+
+void
+xfs_trans_relog_buf_cancel(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
+{
+	struct xfs_buf_log_item	*bip = bp->b_log_item;
+
+	if (!test_bit(XFS_LI_RELOG, &bip->bli_item.li_flags))
+		return;
+
+	atomic_dec(&bp->b_pin_count);
+	xfs_trans_relog_item_cancel(tp, &bip->bli_item, false);
+}
-- 
2.21.3


  parent reply	other threads:[~2020-07-01 16:51 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-01 16:51 [PATCH 00/10] xfs: automatic relogging Brian Foster
2020-07-01 16:51 ` [PATCH 01/10] xfs: automatic relogging item management Brian Foster
2020-07-01 16:51 ` [PATCH 02/10] xfs: create helper for ticket-less log res ungrant Brian Foster
2020-07-01 16:51 ` [PATCH 03/10] xfs: extra runtime reservation overhead for relog transactions Brian Foster
2020-07-01 16:51 ` [PATCH 04/10] xfs: relog log reservation stealing and accounting Brian Foster
2020-07-01 16:51 ` [PATCH 05/10] xfs: automatic log item relog mechanism Brian Foster
2020-07-03  6:08   ` Dave Chinner
2020-07-06 16:06     ` Brian Foster
2020-07-01 16:51 ` [PATCH 06/10] xfs: automatically relog the quotaoff start intent Brian Foster
2020-07-01 16:51 ` [PATCH 07/10] xfs: prevent fs freeze with outstanding relog items Brian Foster
2020-07-01 16:51 ` Brian Foster [this message]
2020-07-01 16:51 ` [PATCH RFC 09/10] xfs: create an error tag for random relog reservation Brian Foster
2020-07-01 16:51 ` [PATCH RFC 10/10] xfs: relog random buffers based on errortag Brian Foster
2020-07-02 11:51 ` [PATCH 00/10] xfs: automatic relogging Dave Chinner
2020-07-02 18:52   ` Brian Foster
2020-07-03  0:49     ` Dave Chinner
2020-07-06 16:03       ` Brian Foster
2020-07-06 17:42         ` Darrick J. Wong
2020-07-07 11:37           ` Brian Foster
2020-07-08 16:44             ` Darrick J. Wong
2020-07-09 12:15               ` Brian Foster
2020-07-09 16:32                 ` Darrick J. Wong
2020-07-20  3:58                 ` Dave Chinner
2020-08-26 12:17                   ` Brian Foster
2020-07-10  4:09         ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200701165116.47344-9-bfoster@redhat.com \
    --to=bfoster@redhat.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.