All of lore.kernel.org
 help / color / mirror / Atom feed
* tidy up the buffer cache implementation
@ 2020-07-09 15:04 Christoph Hellwig
  2020-07-09 15:04 ` [PATCH 01/13] xfs: refactor the buf ioend disposition code Christoph Hellwig
                   ` (12 more replies)
  0 siblings, 13 replies; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Hi all,

this series ties up some old and new bits in the XFS buffer
cache, and consolidates a fair amount of code.

Diffstat:
 libxfs/xfs_log_recover.h |    1 
 libxfs/xfs_trans_inode.c |    6 -
 xfs_buf.c                |  217 +++++++++++++++++++++++++++++++-------
 xfs_buf.h                |   18 ---
 xfs_buf_item.c           |  264 +----------------------------------------------
 xfs_buf_item.h           |    3 
 xfs_buf_item_recover.c   |    2 
 xfs_dquot.c              |   14 ++
 xfs_inode.c              |    6 -
 xfs_inode_item.c         |   12 +-
 xfs_inode_item.h         |    1 
 xfs_log_recover.c        |   37 ------
 xfs_trace.h              |    2 
 13 files changed, 227 insertions(+), 356 deletions(-)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 01/13] xfs: refactor the buf ioend disposition code
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:09   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 02/13] xfs: mark xfs_buf_ioend static Christoph Hellwig
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Handle the no-error case in xfs_buf_iodone_error as well, and to clarify
the code rename the function, use the actual enum type as return value
and then switch on it in the callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf_item.c | 115 +++++++++++++++++++++++-------------------
 1 file changed, 62 insertions(+), 53 deletions(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index e9428c30862a98..19896884189973 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1046,24 +1046,27 @@ xfs_buf_ioerror_permanent(
  *
  * Multi-state return value:
  *
- * XBF_IOERROR_FINISH: clear IO error retry state and run callback completions
- * XBF_IOERROR_DONE: resubmitted immediately, do not run any completions
- * XBF_IOERROR_FAIL: transient error, run failure callback completions and then
+ * XBF_IOEND_FINISH: run callback completions
+ * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
+ * XBF_IOEND_FAIL: transient error, run failure callback completions and then
  *    release the buffer
  */
-enum {
-	XBF_IOERROR_FINISH,
-	XBF_IOERROR_DONE,
-	XBF_IOERROR_FAIL,
+enum xfs_buf_ioend_disposition {
+	XBF_IOEND_FINISH,
+	XBF_IOEND_DONE,
+	XBF_IOEND_FAIL,
 };
 
-static int
-xfs_buf_iodone_error(
+static enum xfs_buf_ioend_disposition
+xfs_buf_ioend_disposition(
 	struct xfs_buf		*bp)
 {
 	struct xfs_mount	*mp = bp->b_mount;
 	struct xfs_error_cfg	*cfg;
 
+	if (likely(!bp->b_error))
+		return XBF_IOEND_FINISH;
+
 	if (xfs_buf_ioerror_fail_without_retry(bp))
 		goto out_stale;
 
@@ -1073,7 +1076,7 @@ xfs_buf_iodone_error(
 	if (xfs_buf_ioerror_retry(bp, cfg)) {
 		xfs_buf_ioerror(bp, 0);
 		xfs_buf_submit(bp);
-		return XBF_IOERROR_DONE;
+		return XBF_IOEND_DONE;
 	}
 
 	/*
@@ -1086,13 +1089,13 @@ xfs_buf_iodone_error(
 	}
 
 	/* Still considered a transient error. Caller will schedule retries. */
-	return XBF_IOERROR_FAIL;
+	return XBF_IOEND_FAIL;
 
 out_stale:
 	xfs_buf_stale(bp);
 	bp->b_flags |= XBF_DONE;
 	trace_xfs_buf_error_relse(bp, _RET_IP_);
-	return XBF_IOERROR_FINISH;
+	return XBF_IOEND_FINISH;
 }
 
 static void
@@ -1128,6 +1131,19 @@ xfs_buf_clear_ioerror_retry_state(
 	bp->b_first_retry_time = 0;
 }
 
+static void
+xfs_buf_inode_io_fail(
+	struct xfs_buf		*bp)
+{
+	struct xfs_log_item	*lip;
+
+	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+		set_bit(XFS_LI_FAILED, &lip->li_flags);
+
+	xfs_buf_ioerror(bp, 0);
+	xfs_buf_relse(bp);
+}
+
 /*
  * Inode buffer iodone callback function.
  */
@@ -1135,30 +1151,36 @@ void
 xfs_buf_inode_iodone(
 	struct xfs_buf		*bp)
 {
-	if (bp->b_error) {
-		struct xfs_log_item *lip;
-		int ret = xfs_buf_iodone_error(bp);
-
-		if (ret == XBF_IOERROR_FINISH)
-			goto finish_iodone;
-		if (ret == XBF_IOERROR_DONE)
-			return;
-		ASSERT(ret == XBF_IOERROR_FAIL);
-		list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
-			set_bit(XFS_LI_FAILED, &lip->li_flags);
-		}
-		xfs_buf_ioerror(bp, 0);
-		xfs_buf_relse(bp);
+	switch (xfs_buf_ioend_disposition(bp)) {
+	case XBF_IOEND_DONE:
+		return;
+	case XBF_IOEND_FAIL:
+		xfs_buf_inode_io_fail(bp);
 		return;
+	default:
+		break;
 	}
 
-finish_iodone:
 	xfs_buf_clear_ioerror_retry_state(bp);
 	xfs_buf_item_done(bp);
 	xfs_iflush_done(bp);
 	xfs_buf_ioend_finish(bp);
 }
 
+static void
+xfs_buf_dquot_io_fail(
+	struct xfs_buf		*bp)
+{
+	struct xfs_log_item	*lip;
+
+	spin_lock(&bp->b_mount->m_ail->ail_lock);
+	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+		xfs_set_li_failed(lip, bp);
+	spin_unlock(&bp->b_mount->m_ail->ail_lock);
+	xfs_buf_ioerror(bp, 0);
+	xfs_buf_relse(bp);
+}
+
 /*
  * Dquot buffer iodone callback function.
  */
@@ -1166,26 +1188,16 @@ void
 xfs_buf_dquot_iodone(
 	struct xfs_buf		*bp)
 {
-	if (bp->b_error) {
-		struct xfs_log_item *lip;
-		int ret = xfs_buf_iodone_error(bp);
-
-		if (ret == XBF_IOERROR_FINISH)
-			goto finish_iodone;
-		if (ret == XBF_IOERROR_DONE)
-			return;
-		ASSERT(ret == XBF_IOERROR_FAIL);
-		spin_lock(&bp->b_mount->m_ail->ail_lock);
-		list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
-			xfs_set_li_failed(lip, bp);
-		}
-		spin_unlock(&bp->b_mount->m_ail->ail_lock);
-		xfs_buf_ioerror(bp, 0);
-		xfs_buf_relse(bp);
+	switch (xfs_buf_ioend_disposition(bp)) {
+	case XBF_IOEND_DONE:
+		return;
+	case XBF_IOEND_FAIL:
+		xfs_buf_dquot_io_fail(bp);
 		return;
+	default:
+		break;
 	}
 
-finish_iodone:
 	xfs_buf_clear_ioerror_retry_state(bp);
 	/* a newly allocated dquot buffer might have a log item attached */
 	xfs_buf_item_done(bp);
@@ -1203,21 +1215,18 @@ void
 xfs_buf_iodone(
 	struct xfs_buf		*bp)
 {
-	if (bp->b_error) {
-		int ret = xfs_buf_iodone_error(bp);
-
-		if (ret == XBF_IOERROR_FINISH)
-			goto finish_iodone;
-		if (ret == XBF_IOERROR_DONE)
-			return;
-		ASSERT(ret == XBF_IOERROR_FAIL);
+	switch (xfs_buf_ioend_disposition(bp)) {
+	case XBF_IOEND_DONE:
+		return;
+	case XBF_IOEND_FAIL:
 		ASSERT(list_empty(&bp->b_li_list));
 		xfs_buf_ioerror(bp, 0);
 		xfs_buf_relse(bp);
 		return;
+	default:
+		break;
 	}
 
-finish_iodone:
 	xfs_buf_clear_ioerror_retry_state(bp);
 	xfs_buf_item_done(bp);
 	xfs_buf_ioend_finish(bp);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 02/13] xfs: mark xfs_buf_ioend static
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
  2020-07-09 15:04 ` [PATCH 01/13] xfs: refactor the buf ioend disposition code Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:10   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 03/13] xfs: refactor xfs_buf_ioend Christoph Hellwig
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 2 +-
 fs/xfs/xfs_buf.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index dda0c94458797a..1bce6457a9b943 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1176,7 +1176,7 @@ xfs_buf_wait_unpin(
  *	Buffer Utility Routines
  */
 
-void
+static void
 xfs_buf_ioend(
 	struct xfs_buf	*bp)
 {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 755b652e695ace..bea20d43a38191 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -269,7 +269,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
 
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_buf *bp);
-extern void xfs_buf_ioend(struct xfs_buf *bp);
 static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
 {
 	if (bp->b_flags & XBF_ASYNC)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 03/13] xfs: refactor xfs_buf_ioend
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
  2020-07-09 15:04 ` [PATCH 01/13] xfs: refactor the buf ioend disposition code Christoph Hellwig
  2020-07-09 15:04 ` [PATCH 02/13] xfs: mark xfs_buf_ioend static Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:31   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 04/13] xfs: move the buffer retry logic to xfs_buf.c Christoph Hellwig
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Refactor the logic for the different I/O completions to prepare for
more code sharing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c         | 41 +++++++++++++++++-----------------------
 fs/xfs/xfs_log_recover.c | 14 +++++++-------
 2 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1bce6457a9b943..7c22d63f43f754 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1199,33 +1199,26 @@ xfs_buf_ioend(
 		if (!bp->b_error)
 			bp->b_flags |= XBF_DONE;
 		xfs_buf_ioend_finish(bp);
-		return;
-	}
-
-	if (!bp->b_error) {
-		bp->b_flags &= ~XBF_WRITE_FAIL;
-		bp->b_flags |= XBF_DONE;
-	}
-
-	/*
-	 * If this is a log recovery buffer, we aren't doing transactional IO
-	 * yet so we need to let it handle IO completions.
-	 */
-	if (bp->b_flags & _XBF_LOGRECOVERY) {
+	} else if (bp->b_flags & _XBF_LOGRECOVERY) {
+		/*
+		 * If this is a log recovery buffer, we aren't doing
+		 * transactional I/O yet so we need to let the log recovery code
+		 * handle I/O completions:
+		 */
 		xlog_recover_iodone(bp);
-		return;
-	}
-
-	if (bp->b_flags & _XBF_INODES) {
-		xfs_buf_inode_iodone(bp);
-		return;
-	}
+	} else {
+		if (!bp->b_error) {
+			bp->b_flags &= ~XBF_WRITE_FAIL;
+			bp->b_flags |= XBF_DONE;
+		}
 
-	if (bp->b_flags & _XBF_DQUOTS) {
-		xfs_buf_dquot_iodone(bp);
-		return;
+		if (bp->b_flags & _XBF_INODES)
+			xfs_buf_inode_iodone(bp);
+		else if (bp->b_flags & _XBF_DQUOTS)
+			xfs_buf_dquot_iodone(bp);
+		else
+			xfs_buf_iodone(bp);
 	}
-	xfs_buf_iodone(bp);
 }
 
 static void
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 52a65a74208ffe..cf6dabb98f2327 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -269,15 +269,15 @@ void
 xlog_recover_iodone(
 	struct xfs_buf	*bp)
 {
-	if (bp->b_error) {
+	if (!bp->b_error) {
+		bp->b_flags |= XBF_DONE;
+	} else if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
 		/*
-		 * We're not going to bother about retrying
-		 * this during recovery. One strike!
+		 * We're not going to bother about retrying this during
+		 * recovery. One strike!
 		 */
-		if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
-			xfs_buf_ioerror_alert(bp, __this_address);
-			xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
-		}
+		xfs_buf_ioerror_alert(bp, __this_address);
+		xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
 	}
 
 	/*
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 04/13] xfs: move the buffer retry logic to xfs_buf.c
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (2 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 03/13] xfs: refactor xfs_buf_ioend Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:39   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 05/13] xfs: fold xfs_buf_ioend_finish into xfs_ioend Christoph Hellwig
                   ` (8 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Move the buffer retry state machine logic to xfs_buf.c and call it once
from xfs_ioend instead of duplicating it three times for the three kinds
of buffers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_trans_inode.c |   6 +-
 fs/xfs/xfs_buf.c                | 173 ++++++++++++++++++++-
 fs/xfs/xfs_buf_item.c           | 260 +-------------------------------
 fs/xfs/xfs_buf_item.h           |   3 +
 fs/xfs/xfs_dquot.c              |  14 +-
 fs/xfs/xfs_inode.c              |   6 +-
 fs/xfs/xfs_inode_item.c         |  12 +-
 fs/xfs/xfs_inode_item.h         |   1 -
 fs/xfs/xfs_trace.h              |   2 +-
 9 files changed, 206 insertions(+), 271 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index e15129647e00c9..3071ab55c44518 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -177,9 +177,9 @@ xfs_trans_log_inode(
 
 	/*
 	 * Always OR in the bits from the ili_last_fields field.  This is to
-	 * coordinate with the xfs_iflush() and xfs_iflush_done() routines in
-	 * the eventual clearing of the ili_fields bits.  See the big comment in
-	 * xfs_iflush() for an explanation of this coordination mechanism.
+	 * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
+	 * in the eventual clearing of the ili_fields bits.  See the big comment
+	 * in xfs_iflush() for an explanation of this coordination mechanism.
 	 */
 	iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
 	spin_unlock(&iip->ili_lock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 7c22d63f43f754..443d11bdfcf502 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1173,8 +1173,145 @@ xfs_buf_wait_unpin(
 }
 
 /*
- *	Buffer Utility Routines
+ * Decide if we're going to retry the write after a failure, and prepare
+ * the buffer for retrying the write.
  */
+static bool
+xfs_buf_ioerror_fail_without_retry(
+	struct xfs_buf		*bp)
+{
+	struct xfs_mount	*mp = bp->b_mount;
+	static unsigned long	lasttime;
+	static struct xfs_buftarg *lasttarg;
+
+	/*
+	 * If we've already decided to shutdown the filesystem because of
+	 * I/O errors, there's no point in giving this a retry.
+	 */
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return true;
+
+	if (bp->b_target != lasttarg ||
+	    time_after(jiffies, (lasttime + 5*HZ))) {
+		lasttime = jiffies;
+		xfs_buf_ioerror_alert(bp, __this_address);
+	}
+	lasttarg = bp->b_target;
+
+	/* synchronous writes will have callers process the error */
+	if (!(bp->b_flags & XBF_ASYNC))
+		return true;
+	return false;
+}
+
+static bool
+xfs_buf_ioerror_retry(
+	struct xfs_buf		*bp,
+	struct xfs_error_cfg	*cfg)
+{
+	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
+	    bp->b_last_error == bp->b_error)
+		return false;
+
+	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
+	bp->b_last_error = bp->b_error;
+	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
+	    !bp->b_first_retry_time)
+		bp->b_first_retry_time = jiffies;
+	return true;
+}
+
+/*
+ * Account for this latest trip around the retry handler, and decide if
+ * we've failed enough times to constitute a permanent failure.
+ */
+static bool
+xfs_buf_ioerror_permanent(
+	struct xfs_buf		*bp,
+	struct xfs_error_cfg	*cfg)
+{
+	struct xfs_mount	*mp = bp->b_mount;
+
+	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
+	    ++bp->b_retries > cfg->max_retries)
+		return true;
+	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
+	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
+		return true;
+
+	/* At unmount we may treat errors differently */
+	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
+		return true;
+
+	return false;
+}
+
+/*
+ * On a sync write or shutdown we just want to stale the buffer and let the
+ * caller handle the error in bp->b_error appropriately.
+ *
+ * If the write was asynchronous then no one will be looking for the error.  If
+ * this is the first failure of this type, clear the error state and write the
+ * buffer out again. This means we always retry an async write failure at least
+ * once, but we also need to set the buffer up to behave correctly now for
+ * repeated failures.
+ *
+ * If we get repeated async write failures, then we take action according to the
+ * error configuration we have been set up to use.
+ *
+ * Multi-state return value:
+ *
+ * XBF_IOEND_FINISH: run callback completions
+ * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
+ * XBF_IOEND_FAIL: transient error, run failure callback completions and then
+ *    release the buffer
+ */
+enum xfs_buf_ioend_disposition {
+	XBF_IOEND_FINISH,
+	XBF_IOEND_DONE,
+	XBF_IOEND_FAIL,
+};
+
+static enum xfs_buf_ioend_disposition
+xfs_buf_ioend_disposition(
+	struct xfs_buf		*bp)
+{
+	struct xfs_mount	*mp = bp->b_mount;
+	struct xfs_error_cfg	*cfg;
+
+	if (likely(!bp->b_error))
+		return XBF_IOEND_FINISH;
+
+	if (xfs_buf_ioerror_fail_without_retry(bp))
+		goto out_stale;
+
+	trace_xfs_buf_iodone_async(bp, _RET_IP_);
+
+	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
+	if (xfs_buf_ioerror_retry(bp, cfg)) {
+		xfs_buf_ioerror(bp, 0);
+		xfs_buf_submit(bp);
+		return XBF_IOEND_DONE;
+	}
+
+	/*
+	 * Permanent error - we need to trigger a shutdown if we haven't already
+	 * to indicate that inconsistency will result from this action.
+	 */
+	if (xfs_buf_ioerror_permanent(bp, cfg)) {
+		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+		goto out_stale;
+	}
+
+	/* Still considered a transient error. Caller will schedule retries. */
+	return XBF_IOEND_FAIL;
+
+out_stale:
+	xfs_buf_stale(bp);
+	bp->b_flags |= XBF_DONE;
+	trace_xfs_buf_error_relse(bp, _RET_IP_);
+	return XBF_IOEND_FINISH;
+}
 
 static void
 xfs_buf_ioend(
@@ -1212,12 +1349,42 @@ xfs_buf_ioend(
 			bp->b_flags |= XBF_DONE;
 		}
 
+		switch (xfs_buf_ioend_disposition(bp)) {
+		case XBF_IOEND_DONE:
+			return;
+		case XBF_IOEND_FAIL:
+			if (bp->b_flags & _XBF_INODES)
+				xfs_buf_inode_io_fail(bp);
+			else if (bp->b_flags & _XBF_DQUOTS)
+				xfs_buf_dquot_io_fail(bp);
+			else
+				ASSERT(list_empty(&bp->b_li_list));
+			xfs_buf_ioerror(bp, 0);
+			xfs_buf_relse(bp);
+			return;
+		default:
+			break;
+		}
+
+		/* clear the retry state */
+		bp->b_last_error = 0;
+		bp->b_retries = 0;
+		bp->b_first_retry_time = 0;
+
+		/*
+		 * Note that for things like remote attribute buffers, there may
+		 * not be a buffer log item here, so processing the buffer log
+		 * item must remain optional.
+		 */
+		if (bp->b_log_item)
+			xfs_buf_item_done(bp);
+
 		if (bp->b_flags & _XBF_INODES)
 			xfs_buf_inode_iodone(bp);
 		else if (bp->b_flags & _XBF_DQUOTS)
 			xfs_buf_dquot_iodone(bp);
-		else
-			xfs_buf_iodone(bp);
+
+		xfs_buf_ioend_finish(bp);
 	}
 }
 
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 19896884189973..ccc9d69683fae4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -31,8 +31,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
 	return container_of(lip, struct xfs_buf_log_item, bli_item);
 }
 
-static void xfs_buf_item_done(struct xfs_buf *bp);
-
 /* Is this log iovec plausibly large enough to contain the buffer log format? */
 bool
 xfs_buf_log_check_iovec(
@@ -464,7 +462,7 @@ xfs_buf_item_unpin(
 		 */
 		if (bip->bli_flags & XFS_BLI_STALE_INODE) {
 			xfs_buf_item_done(bp);
-			xfs_iflush_done(bp);
+			xfs_buf_inode_iodone(bp);
 			ASSERT(list_empty(&bp->b_li_list));
 		} else {
 			xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
@@ -957,156 +955,12 @@ xfs_buf_item_relse(
 	xfs_buf_item_free(bip);
 }
 
-/*
- * Decide if we're going to retry the write after a failure, and prepare
- * the buffer for retrying the write.
- */
-static bool
-xfs_buf_ioerror_fail_without_retry(
-	struct xfs_buf		*bp)
-{
-	struct xfs_mount	*mp = bp->b_mount;
-	static ulong		lasttime;
-	static xfs_buftarg_t	*lasttarg;
-
-	/*
-	 * If we've already decided to shutdown the filesystem because of
-	 * I/O errors, there's no point in giving this a retry.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return true;
-
-	if (bp->b_target != lasttarg ||
-	    time_after(jiffies, (lasttime + 5*HZ))) {
-		lasttime = jiffies;
-		xfs_buf_ioerror_alert(bp, __this_address);
-	}
-	lasttarg = bp->b_target;
-
-	/* synchronous writes will have callers process the error */
-	if (!(bp->b_flags & XBF_ASYNC))
-		return true;
-	return false;
-}
-
-static bool
-xfs_buf_ioerror_retry(
-	struct xfs_buf		*bp,
-	struct xfs_error_cfg	*cfg)
-{
-	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
-	    bp->b_last_error == bp->b_error)
-		return false;
-
-	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
-	bp->b_last_error = bp->b_error;
-	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
-	    !bp->b_first_retry_time)
-		bp->b_first_retry_time = jiffies;
-	return true;
-}
-
-/*
- * Account for this latest trip around the retry handler, and decide if
- * we've failed enough times to constitute a permanent failure.
- */
-static bool
-xfs_buf_ioerror_permanent(
-	struct xfs_buf		*bp,
-	struct xfs_error_cfg	*cfg)
-{
-	struct xfs_mount	*mp = bp->b_mount;
-
-	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
-	    ++bp->b_retries > cfg->max_retries)
-		return true;
-	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
-	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
-		return true;
-
-	/* At unmount we may treat errors differently */
-	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
-		return true;
-
-	return false;
-}
-
-/*
- * On a sync write or shutdown we just want to stale the buffer and let the
- * caller handle the error in bp->b_error appropriately.
- *
- * If the write was asynchronous then no one will be looking for the error.  If
- * this is the first failure of this type, clear the error state and write the
- * buffer out again. This means we always retry an async write failure at least
- * once, but we also need to set the buffer up to behave correctly now for
- * repeated failures.
- *
- * If we get repeated async write failures, then we take action according to the
- * error configuration we have been set up to use.
- *
- * Multi-state return value:
- *
- * XBF_IOEND_FINISH: run callback completions
- * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
- * XBF_IOEND_FAIL: transient error, run failure callback completions and then
- *    release the buffer
- */
-enum xfs_buf_ioend_disposition {
-	XBF_IOEND_FINISH,
-	XBF_IOEND_DONE,
-	XBF_IOEND_FAIL,
-};
-
-static enum xfs_buf_ioend_disposition
-xfs_buf_ioend_disposition(
-	struct xfs_buf		*bp)
-{
-	struct xfs_mount	*mp = bp->b_mount;
-	struct xfs_error_cfg	*cfg;
-
-	if (likely(!bp->b_error))
-		return XBF_IOEND_FINISH;
-
-	if (xfs_buf_ioerror_fail_without_retry(bp))
-		goto out_stale;
-
-	trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
-
-	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
-	if (xfs_buf_ioerror_retry(bp, cfg)) {
-		xfs_buf_ioerror(bp, 0);
-		xfs_buf_submit(bp);
-		return XBF_IOEND_DONE;
-	}
-
-	/*
-	 * Permanent error - we need to trigger a shutdown if we haven't already
-	 * to indicate that inconsistency will result from this action.
-	 */
-	if (xfs_buf_ioerror_permanent(bp, cfg)) {
-		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-		goto out_stale;
-	}
-
-	/* Still considered a transient error. Caller will schedule retries. */
-	return XBF_IOEND_FAIL;
-
-out_stale:
-	xfs_buf_stale(bp);
-	bp->b_flags |= XBF_DONE;
-	trace_xfs_buf_error_relse(bp, _RET_IP_);
-	return XBF_IOEND_FINISH;
-}
-
-static void
+void
 xfs_buf_item_done(
 	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
-	if (!bip)
-		return;
-
 	/*
 	 * If we are forcibly shutting down, this may well be off the AIL
 	 * already. That's because we simulate the log-committed callbacks to
@@ -1121,113 +975,3 @@ xfs_buf_item_done(
 	xfs_buf_item_free(bip);
 	xfs_buf_rele(bp);
 }
-
-static inline void
-xfs_buf_clear_ioerror_retry_state(
-	struct xfs_buf		*bp)
-{
-	bp->b_last_error = 0;
-	bp->b_retries = 0;
-	bp->b_first_retry_time = 0;
-}
-
-static void
-xfs_buf_inode_io_fail(
-	struct xfs_buf		*bp)
-{
-	struct xfs_log_item	*lip;
-
-	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
-		set_bit(XFS_LI_FAILED, &lip->li_flags);
-
-	xfs_buf_ioerror(bp, 0);
-	xfs_buf_relse(bp);
-}
-
-/*
- * Inode buffer iodone callback function.
- */
-void
-xfs_buf_inode_iodone(
-	struct xfs_buf		*bp)
-{
-	switch (xfs_buf_ioend_disposition(bp)) {
-	case XBF_IOEND_DONE:
-		return;
-	case XBF_IOEND_FAIL:
-		xfs_buf_inode_io_fail(bp);
-		return;
-	default:
-		break;
-	}
-
-	xfs_buf_clear_ioerror_retry_state(bp);
-	xfs_buf_item_done(bp);
-	xfs_iflush_done(bp);
-	xfs_buf_ioend_finish(bp);
-}
-
-static void
-xfs_buf_dquot_io_fail(
-	struct xfs_buf		*bp)
-{
-	struct xfs_log_item	*lip;
-
-	spin_lock(&bp->b_mount->m_ail->ail_lock);
-	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
-		xfs_set_li_failed(lip, bp);
-	spin_unlock(&bp->b_mount->m_ail->ail_lock);
-	xfs_buf_ioerror(bp, 0);
-	xfs_buf_relse(bp);
-}
-
-/*
- * Dquot buffer iodone callback function.
- */
-void
-xfs_buf_dquot_iodone(
-	struct xfs_buf		*bp)
-{
-	switch (xfs_buf_ioend_disposition(bp)) {
-	case XBF_IOEND_DONE:
-		return;
-	case XBF_IOEND_FAIL:
-		xfs_buf_dquot_io_fail(bp);
-		return;
-	default:
-		break;
-	}
-
-	xfs_buf_clear_ioerror_retry_state(bp);
-	/* a newly allocated dquot buffer might have a log item attached */
-	xfs_buf_item_done(bp);
-	xfs_dquot_done(bp);
-	xfs_buf_ioend_finish(bp);
-}
-
-/*
- * Dirty buffer iodone callback function.
- *
- * Note that for things like remote attribute buffers, there may not be a buffer
- * log item here, so processing the buffer log item must remain be optional.
- */
-void
-xfs_buf_iodone(
-	struct xfs_buf		*bp)
-{
-	switch (xfs_buf_ioend_disposition(bp)) {
-	case XBF_IOEND_DONE:
-		return;
-	case XBF_IOEND_FAIL:
-		ASSERT(list_empty(&bp->b_li_list));
-		xfs_buf_ioerror(bp, 0);
-		xfs_buf_relse(bp);
-		return;
-	default:
-		break;
-	}
-
-	xfs_buf_clear_ioerror_retry_state(bp);
-	xfs_buf_item_done(bp);
-	xfs_buf_ioend_finish(bp);
-}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 23507cbb4c4132..55da71fb6e22fc 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -50,12 +50,15 @@ struct xfs_buf_log_item {
 };
 
 int	xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
+void	xfs_buf_item_done(struct xfs_buf *bp);
 void	xfs_buf_item_relse(struct xfs_buf *);
 bool	xfs_buf_item_put(struct xfs_buf_log_item *);
 void	xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
 bool	xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
 void	xfs_buf_inode_iodone(struct xfs_buf *);
+void	xfs_buf_inode_io_fail(struct xfs_buf *bp);
 void	xfs_buf_dquot_iodone(struct xfs_buf *);
+void	xfs_buf_dquot_io_fail(struct xfs_buf *bp);
 void	xfs_buf_iodone(struct xfs_buf *);
 bool	xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
 
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 76353c9a723ee0..2f309767d8c03b 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1087,7 +1087,7 @@ xfs_qm_dqflush_done(
 }
 
 void
-xfs_dquot_done(
+xfs_buf_dquot_iodone(
 	struct xfs_buf		*bp)
 {
 	struct xfs_log_item	*lip, *n;
@@ -1098,6 +1098,18 @@ xfs_dquot_done(
 	}
 }
 
+void
+xfs_buf_dquot_io_fail(
+	struct xfs_buf		*bp)
+{
+	struct xfs_log_item	*lip;
+
+	spin_lock(&bp->b_mount->m_ail->ail_lock);
+	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+		xfs_set_li_failed(lip, bp);
+	spin_unlock(&bp->b_mount->m_ail->ail_lock);
+}
+
 /*
  * Write a modified dquot to disk.
  * The dquot must be locked and the flush lock too taken by caller.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5c07bf491d9f5f..98240126914fc8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3569,8 +3569,8 @@ xfs_iflush(
 	 *
 	 * What we do is move the bits to the ili_last_fields field.  When
 	 * logging the inode, these bits are moved back to the ili_fields field.
-	 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
-	 * know that the information those bits represent is permanently on
+	 * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
+	 * we know that the information those bits represent is permanently on
 	 * disk.  As long as the flush completes before the inode is logged
 	 * again, then both ili_fields and ili_last_fields will be cleared.
 	 */
@@ -3584,7 +3584,7 @@ xfs_iflush(
 
 	/*
 	 * Store the current LSN of the inode so that we can tell whether the
-	 * item has moved in the AIL from xfs_iflush_done().
+	 * item has moved in the AIL from xfs_buf_inode_iodone().
 	 */
 	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
 				&iip->ili_item.li_lsn);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 3840117f8a5e2c..69c3a40a51db10 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -714,7 +714,7 @@ xfs_iflush_finish(
  * as completing the flush and unlocking the inode.
  */
 void
-xfs_iflush_done(
+xfs_buf_inode_iodone(
 	struct xfs_buf		*bp)
 {
 	struct xfs_log_item	*lip, *n;
@@ -753,6 +753,16 @@ xfs_iflush_done(
 		list_splice_tail(&flushed_inodes, &bp->b_li_list);
 }
 
+void
+xfs_buf_inode_io_fail(
+	struct xfs_buf		*bp)
+{
+	struct xfs_log_item	*lip;
+
+	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+		set_bit(XFS_LI_FAILED, &lip->li_flags);
+}
+
 /*
  * This is the inode flushing abort routine.  It is called from xfs_iflush when
  * the filesystem is shutting down to clean up the inode state.  It is
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 048b5e7dee901f..7154d92338a393 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -43,7 +43,6 @@ static inline int xfs_inode_clean(struct xfs_inode *ip)
 
 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
 extern void xfs_inode_item_destroy(struct xfs_inode *);
-extern void xfs_iflush_done(struct xfs_buf *);
 extern void xfs_iflush_abort(struct xfs_inode *);
 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
 					 struct xfs_inode_log_format *);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 50c478374a31b4..90702c6e5bd7ec 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -337,7 +337,7 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);
 DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
 DEFINE_BUF_EVENT(xfs_buf_get_uncached);
 DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
+DEFINE_BUF_EVENT(xfs_buf_iodone_async);
 DEFINE_BUF_EVENT(xfs_buf_error_relse);
 DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
 DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 05/13] xfs: fold xfs_buf_ioend_finish into xfs_ioend
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (3 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 04/13] xfs: move the buffer retry logic to xfs_buf.c Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:40   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 06/13] xfs: refactor xfs_buf_ioerror_fail_without_retry Christoph Hellwig
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

No need to keep a separate helper for this logic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c         | 8 +++++---
 fs/xfs/xfs_buf.h         | 7 -------
 fs/xfs/xfs_log_recover.c | 1 -
 3 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 443d11bdfcf502..e6a73e455caa1a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1335,7 +1335,6 @@ xfs_buf_ioend(
 			bp->b_ops->verify_read(bp);
 		if (!bp->b_error)
 			bp->b_flags |= XBF_DONE;
-		xfs_buf_ioend_finish(bp);
 	} else if (bp->b_flags & _XBF_LOGRECOVERY) {
 		/*
 		 * If this is a log recovery buffer, we aren't doing
@@ -1383,9 +1382,12 @@ xfs_buf_ioend(
 			xfs_buf_inode_iodone(bp);
 		else if (bp->b_flags & _XBF_DQUOTS)
 			xfs_buf_dquot_iodone(bp);
-
-		xfs_buf_ioend_finish(bp);
 	}
+
+	if (bp->b_flags & XBF_ASYNC)
+		xfs_buf_relse(bp);
+	else
+		complete(&bp->b_iowait);
 }
 
 static void
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index bea20d43a38191..9eb4044597c985 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -269,13 +269,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
 
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_buf *bp);
-static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
-{
-	if (bp->b_flags & XBF_ASYNC)
-		xfs_buf_relse(bp);
-	else
-		complete(&bp->b_iowait);
-}
 
 extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
 		xfs_failaddr_t failaddr);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cf6dabb98f2327..741a2c247bc585 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -288,7 +288,6 @@ xlog_recover_iodone(
 		xfs_buf_item_relse(bp);
 	ASSERT(bp->b_log_item == NULL);
 	bp->b_flags &= ~_XBF_LOGRECOVERY;
-	xfs_buf_ioend_finish(bp);
 }
 
 /*
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 06/13] xfs: refactor xfs_buf_ioerror_fail_without_retry
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (4 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 05/13] xfs: fold xfs_buf_ioend_finish into xfs_ioend Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:41   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 07/13] xfs: remove xfs_buf_ioerror_retry Christoph Hellwig
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

xfs_buf_ioerror_fail_without_retry is a somewhat weird function in
that it has two trivial checks that decide the return value, while
the rest implements a ratelimited warning.  Just lift the two checks
into the caller, and give the remainder a suitable name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e6a73e455caa1a..2f2ce3faab0826 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1172,36 +1172,19 @@ xfs_buf_wait_unpin(
 	set_current_state(TASK_RUNNING);
 }
 
-/*
- * Decide if we're going to retry the write after a failure, and prepare
- * the buffer for retrying the write.
- */
-static bool
-xfs_buf_ioerror_fail_without_retry(
+static void
+xfs_buf_ioerror_alert_ratelimited(
 	struct xfs_buf		*bp)
 {
-	struct xfs_mount	*mp = bp->b_mount;
 	static unsigned long	lasttime;
 	static struct xfs_buftarg *lasttarg;
 
-	/*
-	 * If we've already decided to shutdown the filesystem because of
-	 * I/O errors, there's no point in giving this a retry.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return true;
-
 	if (bp->b_target != lasttarg ||
 	    time_after(jiffies, (lasttime + 5*HZ))) {
 		lasttime = jiffies;
 		xfs_buf_ioerror_alert(bp, __this_address);
 	}
 	lasttarg = bp->b_target;
-
-	/* synchronous writes will have callers process the error */
-	if (!(bp->b_flags & XBF_ASYNC))
-		return true;
-	return false;
 }
 
 static bool
@@ -1282,7 +1265,19 @@ xfs_buf_ioend_disposition(
 	if (likely(!bp->b_error))
 		return XBF_IOEND_FINISH;
 
-	if (xfs_buf_ioerror_fail_without_retry(bp))
+	/*
+	 * If we've already decided to shutdown the filesystem because of I/O
+	 * errors, there's no point in giving this a retry.
+	 */
+	if (XFS_FORCED_SHUTDOWN(mp))
+		goto out_stale;
+
+	xfs_buf_ioerror_alert_ratelimited(bp);
+
+	/*
+	 * Synchronous writes will have callers process the error.
+	 */
+	if (!(bp->b_flags & XBF_ASYNC))
 		goto out_stale;
 
 	trace_xfs_buf_iodone_async(bp, _RET_IP_);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 07/13] xfs: remove xfs_buf_ioerror_retry
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (5 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 06/13] xfs: refactor xfs_buf_ioerror_fail_without_retry Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:42   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 08/13] xfs: lift the XBF_IOEND_FAIL handling into xfs_buf_ioend_disposition Christoph Hellwig
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Merge xfs_buf_ioerror_retry into its only caller to make the resubmission
flow a little more obvious.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 2f2ce3faab0826..e5592563dda6a1 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1187,23 +1187,6 @@ xfs_buf_ioerror_alert_ratelimited(
 	lasttarg = bp->b_target;
 }
 
-static bool
-xfs_buf_ioerror_retry(
-	struct xfs_buf		*bp,
-	struct xfs_error_cfg	*cfg)
-{
-	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
-	    bp->b_last_error == bp->b_error)
-		return false;
-
-	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
-	bp->b_last_error = bp->b_error;
-	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
-	    !bp->b_first_retry_time)
-		bp->b_first_retry_time = jiffies;
-	return true;
-}
-
 /*
  * Account for this latest trip around the retry handler, and decide if
  * we've failed enough times to constitute a permanent failure.
@@ -1283,10 +1266,13 @@ xfs_buf_ioend_disposition(
 	trace_xfs_buf_iodone_async(bp, _RET_IP_);
 
 	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
-	if (xfs_buf_ioerror_retry(bp, cfg)) {
-		xfs_buf_ioerror(bp, 0);
-		xfs_buf_submit(bp);
-		return XBF_IOEND_DONE;
+	if (bp->b_last_error != bp->b_error ||
+	    !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) {
+		bp->b_last_error = bp->b_error;
+		if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
+		    !bp->b_first_retry_time)
+			bp->b_first_retry_time = jiffies;
+		goto resubmit;
 	}
 
 	/*
@@ -1301,6 +1287,11 @@ xfs_buf_ioend_disposition(
 	/* Still considered a transient error. Caller will schedule retries. */
 	return XBF_IOEND_FAIL;
 
+resubmit:
+	xfs_buf_ioerror(bp, 0);
+	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
+	xfs_buf_submit(bp);
+	return XBF_IOEND_DONE;
 out_stale:
 	xfs_buf_stale(bp);
 	bp->b_flags |= XBF_DONE;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 08/13] xfs: lift the XBF_IOEND_FAIL handling into xfs_buf_ioend_disposition
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (6 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 07/13] xfs: remove xfs_buf_ioerror_retry Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:48   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 09/13] xfs: simplify the xfs_buf_ioend_disposition calling convention Christoph Hellwig
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Keep all the error handling code together.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e5592563dda6a1..e3e80615c5ed9e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1285,6 +1285,14 @@ xfs_buf_ioend_disposition(
 	}
 
 	/* Still considered a transient error. Caller will schedule retries. */
+	if (bp->b_flags & _XBF_INODES)
+		xfs_buf_inode_io_fail(bp);
+	else if (bp->b_flags & _XBF_DQUOTS)
+		xfs_buf_dquot_io_fail(bp);
+	else
+		ASSERT(list_empty(&bp->b_li_list));
+	xfs_buf_ioerror(bp, 0);
+	xfs_buf_relse(bp);
 	return XBF_IOEND_FAIL;
 
 resubmit:
@@ -1338,14 +1346,6 @@ xfs_buf_ioend(
 		case XBF_IOEND_DONE:
 			return;
 		case XBF_IOEND_FAIL:
-			if (bp->b_flags & _XBF_INODES)
-				xfs_buf_inode_io_fail(bp);
-			else if (bp->b_flags & _XBF_DQUOTS)
-				xfs_buf_dquot_io_fail(bp);
-			else
-				ASSERT(list_empty(&bp->b_li_list));
-			xfs_buf_ioerror(bp, 0);
-			xfs_buf_relse(bp);
 			return;
 		default:
 			break;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 09/13] xfs: simplify the xfs_buf_ioend_disposition calling convention
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (7 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 08/13] xfs: lift the XBF_IOEND_FAIL handling into xfs_buf_ioend_disposition Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:52   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 10/13] xfs: use xfs_buf_item_relse in xfs_buf_item_done Christoph Hellwig
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Now that all the actual error handling is in a single place,
xfs_buf_ioend_disposition just needs to return true if took ownership of
the buffer, or false if not instead of the tristate.  Also move the
error check back in the caller to optimize for the fast path, and give
the function a better fitting name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e3e80615c5ed9e..4a9034a9175504 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1224,30 +1224,14 @@ xfs_buf_ioerror_permanent(
  *
  * If we get repeated async write failures, then we take action according to the
  * error configuration we have been set up to use.
- *
- * Multi-state return value:
- *
- * XBF_IOEND_FINISH: run callback completions
- * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
- * XBF_IOEND_FAIL: transient error, run failure callback completions and then
- *    release the buffer
  */
-enum xfs_buf_ioend_disposition {
-	XBF_IOEND_FINISH,
-	XBF_IOEND_DONE,
-	XBF_IOEND_FAIL,
-};
-
-static enum xfs_buf_ioend_disposition
-xfs_buf_ioend_disposition(
+static bool
+xfs_buf_ioend_handle_error(
 	struct xfs_buf		*bp)
 {
 	struct xfs_mount	*mp = bp->b_mount;
 	struct xfs_error_cfg	*cfg;
 
-	if (likely(!bp->b_error))
-		return XBF_IOEND_FINISH;
-
 	/*
 	 * If we've already decided to shutdown the filesystem because of I/O
 	 * errors, there's no point in giving this a retry.
@@ -1293,18 +1277,18 @@ xfs_buf_ioend_disposition(
 		ASSERT(list_empty(&bp->b_li_list));
 	xfs_buf_ioerror(bp, 0);
 	xfs_buf_relse(bp);
-	return XBF_IOEND_FAIL;
+	return true;
 
 resubmit:
 	xfs_buf_ioerror(bp, 0);
 	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
 	xfs_buf_submit(bp);
-	return XBF_IOEND_DONE;
+	return true;
 out_stale:
 	xfs_buf_stale(bp);
 	bp->b_flags |= XBF_DONE;
 	trace_xfs_buf_error_relse(bp, _RET_IP_);
-	return XBF_IOEND_FINISH;
+	return false;
 }
 
 static void
@@ -1342,14 +1326,8 @@ xfs_buf_ioend(
 			bp->b_flags |= XBF_DONE;
 		}
 
-		switch (xfs_buf_ioend_disposition(bp)) {
-		case XBF_IOEND_DONE:
+		if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
 			return;
-		case XBF_IOEND_FAIL:
-			return;
-		default:
-			break;
-		}
 
 		/* clear the retry state */
 		bp->b_last_error = 0;
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 10/13] xfs: use xfs_buf_item_relse in xfs_buf_item_done
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (8 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 09/13] xfs: simplify the xfs_buf_ioend_disposition calling convention Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:52   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 11/13] xfs: clear the read/write flags later in xfs_buf_ioend Christoph Hellwig
                   ` (2 subsequent siblings)
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Reuse xfs_buf_item_relse instead of duplicating it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf_item.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index ccc9d69683fae4..ccfd747d32e410 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -959,8 +959,6 @@ void
 xfs_buf_item_done(
 	struct xfs_buf		*bp)
 {
-	struct xfs_buf_log_item	*bip = bp->b_log_item;
-
 	/*
 	 * If we are forcibly shutting down, this may well be off the AIL
 	 * already. That's because we simulate the log-committed callbacks to
@@ -970,8 +968,7 @@ xfs_buf_item_done(
 	 *
 	 * Either way, AIL is useless if we're forcing a shutdown.
 	 */
-	xfs_trans_ail_delete(&bip->bli_item, SHUTDOWN_CORRUPT_INCORE);
-	bp->b_log_item = NULL;
-	xfs_buf_item_free(bip);
-	xfs_buf_rele(bp);
+	xfs_trans_ail_delete(&bp->b_log_item->bli_item,
+			     SHUTDOWN_CORRUPT_INCORE);
+	xfs_buf_item_relse(bp);
 }
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 11/13] xfs: clear the read/write flags later in xfs_buf_ioend
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (9 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 10/13] xfs: use xfs_buf_item_relse in xfs_buf_item_done Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:54   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 12/13] xfs: remove xlog_recover_iodone Christoph Hellwig
  2020-07-09 15:04 ` [PATCH 13/13] xfs: reuse _xfs_buf_read for re-reading the superblock Christoph Hellwig
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Clear the flags at the end of xfs_buf_ioend so that they can be used
during the completion.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4a9034a9175504..8bbd28f39a927b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1281,12 +1281,13 @@ xfs_buf_ioend_handle_error(
 
 resubmit:
 	xfs_buf_ioerror(bp, 0);
-	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
+	bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
 	xfs_buf_submit(bp);
 	return true;
 out_stale:
 	xfs_buf_stale(bp);
 	bp->b_flags |= XBF_DONE;
+	bp->b_flags &= ~XBF_WRITE;
 	trace_xfs_buf_error_relse(bp, _RET_IP_);
 	return false;
 }
@@ -1295,12 +1296,8 @@ static void
 xfs_buf_ioend(
 	struct xfs_buf	*bp)
 {
-	bool		read = bp->b_flags & XBF_READ;
-
 	trace_xfs_buf_iodone(bp, _RET_IP_);
 
-	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
-
 	/*
 	 * Pull in IO completion errors now. We are guaranteed to be running
 	 * single threaded, so we don't need the lock to read b_io_error.
@@ -1308,7 +1305,7 @@ xfs_buf_ioend(
 	if (!bp->b_error && bp->b_io_error)
 		xfs_buf_ioerror(bp, bp->b_io_error);
 
-	if (read) {
+	if (bp->b_flags & XBF_READ) {
 		if (!bp->b_error && bp->b_ops)
 			bp->b_ops->verify_read(bp);
 		if (!bp->b_error)
@@ -1348,6 +1345,8 @@ xfs_buf_ioend(
 			xfs_buf_dquot_iodone(bp);
 	}
 
+	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
+
 	if (bp->b_flags & XBF_ASYNC)
 		xfs_buf_relse(bp);
 	else
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 12/13] xfs: remove xlog_recover_iodone
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (10 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 11/13] xfs: clear the read/write flags later in xfs_buf_ioend Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 22:54   ` Darrick J. Wong
  2020-07-09 15:04 ` [PATCH 13/13] xfs: reuse _xfs_buf_read for re-reading the superblock Christoph Hellwig
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

The log recovery I/O completion handler does not substancially differ from
the normal one except for the fact that we:

 a) never retry failed writes
 b) can have log items that aren't on the AIL
 c) never have inode/dquot log items attached and thus don't need to
    handle them

Add conditionals for (a) and (b) to the ioend code, while (c) doesn't
need special handling anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_log_recover.h |  1 -
 fs/xfs/xfs_buf.c                | 20 ++++++++++++--------
 fs/xfs/xfs_buf_item.c           |  4 ++++
 fs/xfs/xfs_buf_item_recover.c   |  2 +-
 fs/xfs/xfs_log_recover.c        | 25 -------------------------
 5 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 641132d0e39ddd..3cca2bfe714cb2 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -121,7 +121,6 @@ struct xlog_recover {
 void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
 		const struct xfs_buf_ops *ops);
 bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
-void xlog_recover_iodone(struct xfs_buf *bp);
 
 void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
 		uint64_t intent_id);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8bbd28f39a927b..1172d5fa06aad2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1241,6 +1241,15 @@ xfs_buf_ioend_handle_error(
 
 	xfs_buf_ioerror_alert_ratelimited(bp);
 
+	/*
+	 * We're not going to bother about retrying this during recovery.
+	 * One strike!
+	 */
+	if (bp->b_flags & _XBF_LOGRECOVERY) {
+		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+		return false;
+	}
+
 	/*
 	 * Synchronous writes will have callers process the error.
 	 */
@@ -1310,13 +1319,6 @@ xfs_buf_ioend(
 			bp->b_ops->verify_read(bp);
 		if (!bp->b_error)
 			bp->b_flags |= XBF_DONE;
-	} else if (bp->b_flags & _XBF_LOGRECOVERY) {
-		/*
-		 * If this is a log recovery buffer, we aren't doing
-		 * transactional I/O yet so we need to let the log recovery code
-		 * handle I/O completions:
-		 */
-		xlog_recover_iodone(bp);
 	} else {
 		if (!bp->b_error) {
 			bp->b_flags &= ~XBF_WRITE_FAIL;
@@ -1343,9 +1345,11 @@ xfs_buf_ioend(
 			xfs_buf_inode_iodone(bp);
 		else if (bp->b_flags & _XBF_DQUOTS)
 			xfs_buf_dquot_iodone(bp);
+
 	}
 
-	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
+	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
+			 _XBF_LOGRECOVERY);
 
 	if (bp->b_flags & XBF_ASYNC)
 		xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index ccfd747d32e410..ab87c1294f7584 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -967,8 +967,12 @@ xfs_buf_item_done(
 	 * xfs_trans_ail_delete() takes care of these.
 	 *
 	 * Either way, AIL is useless if we're forcing a shutdown.
+	 *
+	 * Note that log recovery writes might have buffer items that are not on
+	 * the AIL during normal operations.
 	 */
 	xfs_trans_ail_delete(&bp->b_log_item->bli_item,
+			     (bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
 			     SHUTDOWN_CORRUPT_INCORE);
 	xfs_buf_item_relse(bp);
 }
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 74c851f60eeeb1..1bbae3ab1322f7 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -414,7 +414,7 @@ xlog_recover_validate_buf_type(
 	 *
 	 * Write verifiers update the metadata LSN from log items attached to
 	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
-	 * the verifier. We'll clean it up in our ->iodone() callback.
+	 * the verifier.
 	 */
 	if (bp->b_ops) {
 		struct xfs_buf_log_item	*bip;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 741a2c247bc585..b181f3253e6e74 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -265,31 +265,6 @@ xlog_header_check_mount(
 	return 0;
 }
 
-void
-xlog_recover_iodone(
-	struct xfs_buf	*bp)
-{
-	if (!bp->b_error) {
-		bp->b_flags |= XBF_DONE;
-	} else if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
-		/*
-		 * We're not going to bother about retrying this during
-		 * recovery. One strike!
-		 */
-		xfs_buf_ioerror_alert(bp, __this_address);
-		xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
-	}
-
-	/*
-	 * On v5 supers, a bli could be attached to update the metadata LSN.
-	 * Clean it up.
-	 */
-	if (bp->b_log_item)
-		xfs_buf_item_relse(bp);
-	ASSERT(bp->b_log_item == NULL);
-	bp->b_flags &= ~_XBF_LOGRECOVERY;
-}
-
 /*
  * This routine finds (to an approximation) the first block in the physical
  * log which contains the given cycle.  It uses a binary search algorithm.
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 13/13] xfs: reuse _xfs_buf_read for re-reading the superblock
  2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
                   ` (11 preceding siblings ...)
  2020-07-09 15:04 ` [PATCH 12/13] xfs: remove xlog_recover_iodone Christoph Hellwig
@ 2020-07-09 15:04 ` Christoph Hellwig
  2020-08-18 23:02   ` Darrick J. Wong
  12 siblings, 1 reply; 28+ messages in thread
From: Christoph Hellwig @ 2020-07-09 15:04 UTC (permalink / raw)
  To: linux-xfs

Instead of poking deeply into buffer cache internals when re-reading the
superblock during log recovery just generalize _xfs_buf_read and use it
there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c         | 28 +++++++++++++++++++---------
 fs/xfs/xfs_buf.h         | 10 ++--------
 fs/xfs/xfs_log_recover.c | 11 +++--------
 3 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1172d5fa06aad2..34d88c8b50854a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -52,6 +52,15 @@ static kmem_zone_t *xfs_buf_zone;
  *	  b_lock (trylock due to inversion)
  */
 
+static int __xfs_buf_submit(struct xfs_buf *bp, bool wait);
+
+static inline int
+xfs_buf_submit(
+	struct xfs_buf		*bp)
+{
+	return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
+}
+
 static inline int
 xfs_buf_is_vmapped(
 	struct xfs_buf	*bp)
@@ -753,16 +762,18 @@ xfs_buf_get_map(
 	return 0;
 }
 
-STATIC int
+int
 _xfs_buf_read(
-	xfs_buf_t		*bp,
-	xfs_buf_flags_t		flags)
+	struct xfs_buf		*bp,
+	xfs_buf_flags_t		flags,
+	const struct xfs_buf_ops *ops)
 {
-	ASSERT(!(flags & XBF_WRITE));
 	ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
+	ASSERT(!(flags & XBF_WRITE));
 
-	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
-	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
+	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
+	bp->b_flags |= flags & (XBF_ASYNC | XBF_READ_AHEAD);
+	bp->b_ops = ops;
 
 	return xfs_buf_submit(bp);
 }
@@ -827,8 +838,7 @@ xfs_buf_read_map(
 	if (!(bp->b_flags & XBF_DONE)) {
 		/* Initiate the buffer read and wait. */
 		XFS_STATS_INC(target->bt_mount, xb_get_read);
-		bp->b_ops = ops;
-		error = _xfs_buf_read(bp, flags);
+		error = _xfs_buf_read(bp, flags, ops);
 
 		/* Readahead iodone already dropped the buffer, so exit. */
 		if (flags & XBF_ASYNC)
@@ -1637,7 +1647,7 @@ xfs_buf_iowait(
  * safe to reference the buffer after a call to this function unless the caller
  * holds an additional reference itself.
  */
-int
+static int
 __xfs_buf_submit(
 	struct xfs_buf	*bp,
 	bool		wait)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 9eb4044597c985..db172599d32dc1 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -249,6 +249,8 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags,
 int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
 			  size_t numblks, int flags, struct xfs_buf **bpp,
 			  const struct xfs_buf_ops *ops);
+int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags,
+		const struct xfs_buf_ops *ops);
 void xfs_buf_hold(struct xfs_buf *bp);
 
 /* Releasing Buffers */
@@ -275,14 +277,6 @@ extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
 #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
 extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa);
 void xfs_buf_ioend_fail(struct xfs_buf *);
-
-extern int __xfs_buf_submit(struct xfs_buf *bp, bool);
-static inline int xfs_buf_submit(struct xfs_buf *bp)
-{
-	bool wait = bp->b_flags & XBF_ASYNC ? false : true;
-	return __xfs_buf_submit(bp, wait);
-}
-
 void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
 void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
 #define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b181f3253e6e74..04f76a75886744 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3305,16 +3305,11 @@ xlog_do_recover(
 	xlog_assign_tail_lsn(mp);
 
 	/*
-	 * Now that we've finished replaying all buffer and inode
-	 * updates, re-read in the superblock and reverify it.
+	 * Now that we've finished replaying all buffer and inode updates,
+	 * re-read in the superblock and reverify it.
 	 */
 	bp = xfs_getsb(mp);
-	bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
-	ASSERT(!(bp->b_flags & XBF_WRITE));
-	bp->b_flags |= XBF_READ;
-	bp->b_ops = &xfs_sb_buf_ops;
-
-	error = xfs_buf_submit(bp);
+	error = _xfs_buf_read(bp, XBF_READ, &xfs_sb_buf_ops);
 	if (error) {
 		if (!XFS_FORCED_SHUTDOWN(mp)) {
 			xfs_buf_ioerror_alert(bp, __this_address);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 01/13] xfs: refactor the buf ioend disposition code
  2020-07-09 15:04 ` [PATCH 01/13] xfs: refactor the buf ioend disposition code Christoph Hellwig
@ 2020-08-18 22:09   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:09 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:41PM +0200, Christoph Hellwig wrote:
> Handle the no-error case in xfs_buf_iodone_error as well, and to clarify
> the code rename the function, use the actual enum type as return value
> and then switch on it in the callers.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Seems pretty straightforward to me, sorry I didn't get to this earlier...
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf_item.c | 115 +++++++++++++++++++++++-------------------
>  1 file changed, 62 insertions(+), 53 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
> index e9428c30862a98..19896884189973 100644
> --- a/fs/xfs/xfs_buf_item.c
> +++ b/fs/xfs/xfs_buf_item.c
> @@ -1046,24 +1046,27 @@ xfs_buf_ioerror_permanent(
>   *
>   * Multi-state return value:
>   *
> - * XBF_IOERROR_FINISH: clear IO error retry state and run callback completions
> - * XBF_IOERROR_DONE: resubmitted immediately, do not run any completions
> - * XBF_IOERROR_FAIL: transient error, run failure callback completions and then
> + * XBF_IOEND_FINISH: run callback completions
> + * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
> + * XBF_IOEND_FAIL: transient error, run failure callback completions and then
>   *    release the buffer
>   */
> -enum {
> -	XBF_IOERROR_FINISH,
> -	XBF_IOERROR_DONE,
> -	XBF_IOERROR_FAIL,
> +enum xfs_buf_ioend_disposition {
> +	XBF_IOEND_FINISH,
> +	XBF_IOEND_DONE,
> +	XBF_IOEND_FAIL,
>  };
>  
> -static int
> -xfs_buf_iodone_error(
> +static enum xfs_buf_ioend_disposition
> +xfs_buf_ioend_disposition(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_mount	*mp = bp->b_mount;
>  	struct xfs_error_cfg	*cfg;
>  
> +	if (likely(!bp->b_error))
> +		return XBF_IOEND_FINISH;
> +
>  	if (xfs_buf_ioerror_fail_without_retry(bp))
>  		goto out_stale;
>  
> @@ -1073,7 +1076,7 @@ xfs_buf_iodone_error(
>  	if (xfs_buf_ioerror_retry(bp, cfg)) {
>  		xfs_buf_ioerror(bp, 0);
>  		xfs_buf_submit(bp);
> -		return XBF_IOERROR_DONE;
> +		return XBF_IOEND_DONE;
>  	}
>  
>  	/*
> @@ -1086,13 +1089,13 @@ xfs_buf_iodone_error(
>  	}
>  
>  	/* Still considered a transient error. Caller will schedule retries. */
> -	return XBF_IOERROR_FAIL;
> +	return XBF_IOEND_FAIL;
>  
>  out_stale:
>  	xfs_buf_stale(bp);
>  	bp->b_flags |= XBF_DONE;
>  	trace_xfs_buf_error_relse(bp, _RET_IP_);
> -	return XBF_IOERROR_FINISH;
> +	return XBF_IOEND_FINISH;
>  }
>  
>  static void
> @@ -1128,6 +1131,19 @@ xfs_buf_clear_ioerror_retry_state(
>  	bp->b_first_retry_time = 0;
>  }
>  
> +static void
> +xfs_buf_inode_io_fail(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_log_item	*lip;
> +
> +	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
> +		set_bit(XFS_LI_FAILED, &lip->li_flags);
> +
> +	xfs_buf_ioerror(bp, 0);
> +	xfs_buf_relse(bp);
> +}
> +
>  /*
>   * Inode buffer iodone callback function.
>   */
> @@ -1135,30 +1151,36 @@ void
>  xfs_buf_inode_iodone(
>  	struct xfs_buf		*bp)
>  {
> -	if (bp->b_error) {
> -		struct xfs_log_item *lip;
> -		int ret = xfs_buf_iodone_error(bp);
> -
> -		if (ret == XBF_IOERROR_FINISH)
> -			goto finish_iodone;
> -		if (ret == XBF_IOERROR_DONE)
> -			return;
> -		ASSERT(ret == XBF_IOERROR_FAIL);
> -		list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
> -			set_bit(XFS_LI_FAILED, &lip->li_flags);
> -		}
> -		xfs_buf_ioerror(bp, 0);
> -		xfs_buf_relse(bp);
> +	switch (xfs_buf_ioend_disposition(bp)) {
> +	case XBF_IOEND_DONE:
> +		return;
> +	case XBF_IOEND_FAIL:
> +		xfs_buf_inode_io_fail(bp);
>  		return;
> +	default:
> +		break;
>  	}
>  
> -finish_iodone:
>  	xfs_buf_clear_ioerror_retry_state(bp);
>  	xfs_buf_item_done(bp);
>  	xfs_iflush_done(bp);
>  	xfs_buf_ioend_finish(bp);
>  }
>  
> +static void
> +xfs_buf_dquot_io_fail(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_log_item	*lip;
> +
> +	spin_lock(&bp->b_mount->m_ail->ail_lock);
> +	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
> +		xfs_set_li_failed(lip, bp);
> +	spin_unlock(&bp->b_mount->m_ail->ail_lock);
> +	xfs_buf_ioerror(bp, 0);
> +	xfs_buf_relse(bp);
> +}
> +
>  /*
>   * Dquot buffer iodone callback function.
>   */
> @@ -1166,26 +1188,16 @@ void
>  xfs_buf_dquot_iodone(
>  	struct xfs_buf		*bp)
>  {
> -	if (bp->b_error) {
> -		struct xfs_log_item *lip;
> -		int ret = xfs_buf_iodone_error(bp);
> -
> -		if (ret == XBF_IOERROR_FINISH)
> -			goto finish_iodone;
> -		if (ret == XBF_IOERROR_DONE)
> -			return;
> -		ASSERT(ret == XBF_IOERROR_FAIL);
> -		spin_lock(&bp->b_mount->m_ail->ail_lock);
> -		list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
> -			xfs_set_li_failed(lip, bp);
> -		}
> -		spin_unlock(&bp->b_mount->m_ail->ail_lock);
> -		xfs_buf_ioerror(bp, 0);
> -		xfs_buf_relse(bp);
> +	switch (xfs_buf_ioend_disposition(bp)) {
> +	case XBF_IOEND_DONE:
> +		return;
> +	case XBF_IOEND_FAIL:
> +		xfs_buf_dquot_io_fail(bp);
>  		return;
> +	default:
> +		break;
>  	}
>  
> -finish_iodone:
>  	xfs_buf_clear_ioerror_retry_state(bp);
>  	/* a newly allocated dquot buffer might have a log item attached */
>  	xfs_buf_item_done(bp);
> @@ -1203,21 +1215,18 @@ void
>  xfs_buf_iodone(
>  	struct xfs_buf		*bp)
>  {
> -	if (bp->b_error) {
> -		int ret = xfs_buf_iodone_error(bp);
> -
> -		if (ret == XBF_IOERROR_FINISH)
> -			goto finish_iodone;
> -		if (ret == XBF_IOERROR_DONE)
> -			return;
> -		ASSERT(ret == XBF_IOERROR_FAIL);
> +	switch (xfs_buf_ioend_disposition(bp)) {
> +	case XBF_IOEND_DONE:
> +		return;
> +	case XBF_IOEND_FAIL:
>  		ASSERT(list_empty(&bp->b_li_list));
>  		xfs_buf_ioerror(bp, 0);
>  		xfs_buf_relse(bp);
>  		return;
> +	default:
> +		break;
>  	}
>  
> -finish_iodone:
>  	xfs_buf_clear_ioerror_retry_state(bp);
>  	xfs_buf_item_done(bp);
>  	xfs_buf_ioend_finish(bp);
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 02/13] xfs: mark xfs_buf_ioend static
  2020-07-09 15:04 ` [PATCH 02/13] xfs: mark xfs_buf_ioend static Christoph Hellwig
@ 2020-08-18 22:10   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:42PM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf.c | 2 +-
>  fs/xfs/xfs_buf.h | 1 -
>  2 files changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index dda0c94458797a..1bce6457a9b943 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1176,7 +1176,7 @@ xfs_buf_wait_unpin(
>   *	Buffer Utility Routines
>   */
>  
> -void
> +static void
>  xfs_buf_ioend(
>  	struct xfs_buf	*bp)
>  {
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 755b652e695ace..bea20d43a38191 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -269,7 +269,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
>  
>  /* Buffer Read and Write Routines */
>  extern int xfs_bwrite(struct xfs_buf *bp);
> -extern void xfs_buf_ioend(struct xfs_buf *bp);
>  static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
>  {
>  	if (bp->b_flags & XBF_ASYNC)
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 03/13] xfs: refactor xfs_buf_ioend
  2020-07-09 15:04 ` [PATCH 03/13] xfs: refactor xfs_buf_ioend Christoph Hellwig
@ 2020-08-18 22:31   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:31 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:43PM +0200, Christoph Hellwig wrote:
> Refactor the logic for the different I/O completions to prepare for
> more code sharing.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

I think it's worth mentioning in the commit log that this patch leaves
the log recovery buffer completion code totally in charge of the buffer
state.  Not sure where exactly that part is going, though I guess your
eventual goal is to remove xlog_recover_iodone (and clean up the buffer
log item disposal)...?

If so,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf.c         | 41 +++++++++++++++++-----------------------
>  fs/xfs/xfs_log_recover.c | 14 +++++++-------
>  2 files changed, 24 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 1bce6457a9b943..7c22d63f43f754 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1199,33 +1199,26 @@ xfs_buf_ioend(
>  		if (!bp->b_error)
>  			bp->b_flags |= XBF_DONE;
>  		xfs_buf_ioend_finish(bp);
> -		return;
> -	}
> -
> -	if (!bp->b_error) {
> -		bp->b_flags &= ~XBF_WRITE_FAIL;
> -		bp->b_flags |= XBF_DONE;
> -	}
> -
> -	/*
> -	 * If this is a log recovery buffer, we aren't doing transactional IO
> -	 * yet so we need to let it handle IO completions.
> -	 */
> -	if (bp->b_flags & _XBF_LOGRECOVERY) {
> +	} else if (bp->b_flags & _XBF_LOGRECOVERY) {
> +		/*
> +		 * If this is a log recovery buffer, we aren't doing
> +		 * transactional I/O yet so we need to let the log recovery code
> +		 * handle I/O completions:
> +		 */
>  		xlog_recover_iodone(bp);
> -		return;
> -	}
> -
> -	if (bp->b_flags & _XBF_INODES) {
> -		xfs_buf_inode_iodone(bp);
> -		return;
> -	}
> +	} else {
> +		if (!bp->b_error) {
> +			bp->b_flags &= ~XBF_WRITE_FAIL;
> +			bp->b_flags |= XBF_DONE;
> +		}
>  
> -	if (bp->b_flags & _XBF_DQUOTS) {
> -		xfs_buf_dquot_iodone(bp);
> -		return;
> +		if (bp->b_flags & _XBF_INODES)
> +			xfs_buf_inode_iodone(bp);
> +		else if (bp->b_flags & _XBF_DQUOTS)
> +			xfs_buf_dquot_iodone(bp);
> +		else
> +			xfs_buf_iodone(bp);
>  	}
> -	xfs_buf_iodone(bp);
>  }
>  
>  static void
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 52a65a74208ffe..cf6dabb98f2327 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -269,15 +269,15 @@ void
>  xlog_recover_iodone(
>  	struct xfs_buf	*bp)
>  {
> -	if (bp->b_error) {
> +	if (!bp->b_error) {
> +		bp->b_flags |= XBF_DONE;
> +	} else if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
>  		/*
> -		 * We're not going to bother about retrying
> -		 * this during recovery. One strike!
> +		 * We're not going to bother about retrying this during
> +		 * recovery. One strike!
>  		 */
> -		if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
> -			xfs_buf_ioerror_alert(bp, __this_address);
> -			xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
> -		}
> +		xfs_buf_ioerror_alert(bp, __this_address);
> +		xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
>  	}
>  
>  	/*
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 04/13] xfs: move the buffer retry logic to xfs_buf.c
  2020-07-09 15:04 ` [PATCH 04/13] xfs: move the buffer retry logic to xfs_buf.c Christoph Hellwig
@ 2020-08-18 22:39   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 08:04:44AM -0700, Christoph Hellwig wrote:
> Move the buffer retry state machine logic to xfs_buf.c and call it once
> from xfs_ioend instead of duplicating it three times for the three kinds
> of buffers.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks like a pretty straightforward refactoring -- most of the buffer
completion code gets moved to xfs_buf.c, and the inode/dquot specific
pieces retreat to their respective files...

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_trans_inode.c |   6 +-
>  fs/xfs/xfs_buf.c                | 173 ++++++++++++++++++++-
>  fs/xfs/xfs_buf_item.c           | 260 +-------------------------------
>  fs/xfs/xfs_buf_item.h           |   3 +
>  fs/xfs/xfs_dquot.c              |  14 +-
>  fs/xfs/xfs_inode.c              |   6 +-
>  fs/xfs/xfs_inode_item.c         |  12 +-
>  fs/xfs/xfs_inode_item.h         |   1 -
>  fs/xfs/xfs_trace.h              |   2 +-
>  9 files changed, 206 insertions(+), 271 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
> index e15129647e00c9..3071ab55c44518 100644
> --- a/fs/xfs/libxfs/xfs_trans_inode.c
> +++ b/fs/xfs/libxfs/xfs_trans_inode.c
> @@ -177,9 +177,9 @@ xfs_trans_log_inode(
>  
>  	/*
>  	 * Always OR in the bits from the ili_last_fields field.  This is to
> -	 * coordinate with the xfs_iflush() and xfs_iflush_done() routines in
> -	 * the eventual clearing of the ili_fields bits.  See the big comment in
> -	 * xfs_iflush() for an explanation of this coordination mechanism.
> +	 * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
> +	 * in the eventual clearing of the ili_fields bits.  See the big comment
> +	 * in xfs_iflush() for an explanation of this coordination mechanism.
>  	 */
>  	iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
>  	spin_unlock(&iip->ili_lock);
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 7c22d63f43f754..443d11bdfcf502 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1173,8 +1173,145 @@ xfs_buf_wait_unpin(
>  }
>  
>  /*
> - *	Buffer Utility Routines
> + * Decide if we're going to retry the write after a failure, and prepare
> + * the buffer for retrying the write.
>   */
> +static bool
> +xfs_buf_ioerror_fail_without_retry(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_mount	*mp = bp->b_mount;
> +	static unsigned long	lasttime;
> +	static struct xfs_buftarg *lasttarg;
> +
> +	/*
> +	 * If we've already decided to shutdown the filesystem because of
> +	 * I/O errors, there's no point in giving this a retry.
> +	 */
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		return true;
> +
> +	if (bp->b_target != lasttarg ||
> +	    time_after(jiffies, (lasttime + 5*HZ))) {
> +		lasttime = jiffies;
> +		xfs_buf_ioerror_alert(bp, __this_address);
> +	}
> +	lasttarg = bp->b_target;
> +
> +	/* synchronous writes will have callers process the error */
> +	if (!(bp->b_flags & XBF_ASYNC))
> +		return true;
> +	return false;
> +}
> +
> +static bool
> +xfs_buf_ioerror_retry(
> +	struct xfs_buf		*bp,
> +	struct xfs_error_cfg	*cfg)
> +{
> +	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
> +	    bp->b_last_error == bp->b_error)
> +		return false;
> +
> +	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
> +	bp->b_last_error = bp->b_error;
> +	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
> +	    !bp->b_first_retry_time)
> +		bp->b_first_retry_time = jiffies;
> +	return true;
> +}
> +
> +/*
> + * Account for this latest trip around the retry handler, and decide if
> + * we've failed enough times to constitute a permanent failure.
> + */
> +static bool
> +xfs_buf_ioerror_permanent(
> +	struct xfs_buf		*bp,
> +	struct xfs_error_cfg	*cfg)
> +{
> +	struct xfs_mount	*mp = bp->b_mount;
> +
> +	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
> +	    ++bp->b_retries > cfg->max_retries)
> +		return true;
> +	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
> +	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
> +		return true;
> +
> +	/* At unmount we may treat errors differently */
> +	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
> +		return true;
> +
> +	return false;
> +}
> +
> +/*
> + * On a sync write or shutdown we just want to stale the buffer and let the
> + * caller handle the error in bp->b_error appropriately.
> + *
> + * If the write was asynchronous then no one will be looking for the error.  If
> + * this is the first failure of this type, clear the error state and write the
> + * buffer out again. This means we always retry an async write failure at least
> + * once, but we also need to set the buffer up to behave correctly now for
> + * repeated failures.
> + *
> + * If we get repeated async write failures, then we take action according to the
> + * error configuration we have been set up to use.
> + *
> + * Multi-state return value:
> + *
> + * XBF_IOEND_FINISH: run callback completions
> + * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
> + * XBF_IOEND_FAIL: transient error, run failure callback completions and then
> + *    release the buffer
> + */
> +enum xfs_buf_ioend_disposition {
> +	XBF_IOEND_FINISH,
> +	XBF_IOEND_DONE,
> +	XBF_IOEND_FAIL,
> +};
> +
> +static enum xfs_buf_ioend_disposition
> +xfs_buf_ioend_disposition(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_mount	*mp = bp->b_mount;
> +	struct xfs_error_cfg	*cfg;
> +
> +	if (likely(!bp->b_error))
> +		return XBF_IOEND_FINISH;
> +
> +	if (xfs_buf_ioerror_fail_without_retry(bp))
> +		goto out_stale;
> +
> +	trace_xfs_buf_iodone_async(bp, _RET_IP_);
> +
> +	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
> +	if (xfs_buf_ioerror_retry(bp, cfg)) {
> +		xfs_buf_ioerror(bp, 0);
> +		xfs_buf_submit(bp);
> +		return XBF_IOEND_DONE;
> +	}
> +
> +	/*
> +	 * Permanent error - we need to trigger a shutdown if we haven't already
> +	 * to indicate that inconsistency will result from this action.
> +	 */
> +	if (xfs_buf_ioerror_permanent(bp, cfg)) {
> +		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
> +		goto out_stale;
> +	}
> +
> +	/* Still considered a transient error. Caller will schedule retries. */
> +	return XBF_IOEND_FAIL;
> +
> +out_stale:
> +	xfs_buf_stale(bp);
> +	bp->b_flags |= XBF_DONE;
> +	trace_xfs_buf_error_relse(bp, _RET_IP_);
> +	return XBF_IOEND_FINISH;
> +}
>  
>  static void
>  xfs_buf_ioend(
> @@ -1212,12 +1349,42 @@ xfs_buf_ioend(
>  			bp->b_flags |= XBF_DONE;
>  		}
>  
> +		switch (xfs_buf_ioend_disposition(bp)) {
> +		case XBF_IOEND_DONE:
> +			return;
> +		case XBF_IOEND_FAIL:
> +			if (bp->b_flags & _XBF_INODES)
> +				xfs_buf_inode_io_fail(bp);
> +			else if (bp->b_flags & _XBF_DQUOTS)
> +				xfs_buf_dquot_io_fail(bp);
> +			else
> +				ASSERT(list_empty(&bp->b_li_list));
> +			xfs_buf_ioerror(bp, 0);
> +			xfs_buf_relse(bp);
> +			return;
> +		default:
> +			break;
> +		}
> +
> +		/* clear the retry state */
> +		bp->b_last_error = 0;
> +		bp->b_retries = 0;
> +		bp->b_first_retry_time = 0;
> +
> +		/*
> +		 * Note that for things like remote attribute buffers, there may
> +		 * not be a buffer log item here, so processing the buffer log
> +		 * item must remain optional.
> +		 */
> +		if (bp->b_log_item)
> +			xfs_buf_item_done(bp);
> +
>  		if (bp->b_flags & _XBF_INODES)
>  			xfs_buf_inode_iodone(bp);
>  		else if (bp->b_flags & _XBF_DQUOTS)
>  			xfs_buf_dquot_iodone(bp);
> -		else
> -			xfs_buf_iodone(bp);
> +
> +		xfs_buf_ioend_finish(bp);
>  	}
>  }
>  
> diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
> index 19896884189973..ccc9d69683fae4 100644
> --- a/fs/xfs/xfs_buf_item.c
> +++ b/fs/xfs/xfs_buf_item.c
> @@ -31,8 +31,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
>  	return container_of(lip, struct xfs_buf_log_item, bli_item);
>  }
>  
> -static void xfs_buf_item_done(struct xfs_buf *bp);
> -
>  /* Is this log iovec plausibly large enough to contain the buffer log format? */
>  bool
>  xfs_buf_log_check_iovec(
> @@ -464,7 +462,7 @@ xfs_buf_item_unpin(
>  		 */
>  		if (bip->bli_flags & XFS_BLI_STALE_INODE) {
>  			xfs_buf_item_done(bp);
> -			xfs_iflush_done(bp);
> +			xfs_buf_inode_iodone(bp);
>  			ASSERT(list_empty(&bp->b_li_list));
>  		} else {
>  			xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
> @@ -957,156 +955,12 @@ xfs_buf_item_relse(
>  	xfs_buf_item_free(bip);
>  }
>  
> -/*
> - * Decide if we're going to retry the write after a failure, and prepare
> - * the buffer for retrying the write.
> - */
> -static bool
> -xfs_buf_ioerror_fail_without_retry(
> -	struct xfs_buf		*bp)
> -{
> -	struct xfs_mount	*mp = bp->b_mount;
> -	static ulong		lasttime;
> -	static xfs_buftarg_t	*lasttarg;
> -
> -	/*
> -	 * If we've already decided to shutdown the filesystem because of
> -	 * I/O errors, there's no point in giving this a retry.
> -	 */
> -	if (XFS_FORCED_SHUTDOWN(mp))
> -		return true;
> -
> -	if (bp->b_target != lasttarg ||
> -	    time_after(jiffies, (lasttime + 5*HZ))) {
> -		lasttime = jiffies;
> -		xfs_buf_ioerror_alert(bp, __this_address);
> -	}
> -	lasttarg = bp->b_target;
> -
> -	/* synchronous writes will have callers process the error */
> -	if (!(bp->b_flags & XBF_ASYNC))
> -		return true;
> -	return false;
> -}
> -
> -static bool
> -xfs_buf_ioerror_retry(
> -	struct xfs_buf		*bp,
> -	struct xfs_error_cfg	*cfg)
> -{
> -	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
> -	    bp->b_last_error == bp->b_error)
> -		return false;
> -
> -	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
> -	bp->b_last_error = bp->b_error;
> -	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
> -	    !bp->b_first_retry_time)
> -		bp->b_first_retry_time = jiffies;
> -	return true;
> -}
> -
> -/*
> - * Account for this latest trip around the retry handler, and decide if
> - * we've failed enough times to constitute a permanent failure.
> - */
> -static bool
> -xfs_buf_ioerror_permanent(
> -	struct xfs_buf		*bp,
> -	struct xfs_error_cfg	*cfg)
> -{
> -	struct xfs_mount	*mp = bp->b_mount;
> -
> -	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
> -	    ++bp->b_retries > cfg->max_retries)
> -		return true;
> -	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
> -	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
> -		return true;
> -
> -	/* At unmount we may treat errors differently */
> -	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
> -		return true;
> -
> -	return false;
> -}
> -
> -/*
> - * On a sync write or shutdown we just want to stale the buffer and let the
> - * caller handle the error in bp->b_error appropriately.
> - *
> - * If the write was asynchronous then no one will be looking for the error.  If
> - * this is the first failure of this type, clear the error state and write the
> - * buffer out again. This means we always retry an async write failure at least
> - * once, but we also need to set the buffer up to behave correctly now for
> - * repeated failures.
> - *
> - * If we get repeated async write failures, then we take action according to the
> - * error configuration we have been set up to use.
> - *
> - * Multi-state return value:
> - *
> - * XBF_IOEND_FINISH: run callback completions
> - * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
> - * XBF_IOEND_FAIL: transient error, run failure callback completions and then
> - *    release the buffer
> - */
> -enum xfs_buf_ioend_disposition {
> -	XBF_IOEND_FINISH,
> -	XBF_IOEND_DONE,
> -	XBF_IOEND_FAIL,
> -};
> -
> -static enum xfs_buf_ioend_disposition
> -xfs_buf_ioend_disposition(
> -	struct xfs_buf		*bp)
> -{
> -	struct xfs_mount	*mp = bp->b_mount;
> -	struct xfs_error_cfg	*cfg;
> -
> -	if (likely(!bp->b_error))
> -		return XBF_IOEND_FINISH;
> -
> -	if (xfs_buf_ioerror_fail_without_retry(bp))
> -		goto out_stale;
> -
> -	trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
> -
> -	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
> -	if (xfs_buf_ioerror_retry(bp, cfg)) {
> -		xfs_buf_ioerror(bp, 0);
> -		xfs_buf_submit(bp);
> -		return XBF_IOEND_DONE;
> -	}
> -
> -	/*
> -	 * Permanent error - we need to trigger a shutdown if we haven't already
> -	 * to indicate that inconsistency will result from this action.
> -	 */
> -	if (xfs_buf_ioerror_permanent(bp, cfg)) {
> -		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
> -		goto out_stale;
> -	}
> -
> -	/* Still considered a transient error. Caller will schedule retries. */
> -	return XBF_IOEND_FAIL;
> -
> -out_stale:
> -	xfs_buf_stale(bp);
> -	bp->b_flags |= XBF_DONE;
> -	trace_xfs_buf_error_relse(bp, _RET_IP_);
> -	return XBF_IOEND_FINISH;
> -}
> -
> -static void
> +void
>  xfs_buf_item_done(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_buf_log_item	*bip = bp->b_log_item;
>  
> -	if (!bip)
> -		return;
> -
>  	/*
>  	 * If we are forcibly shutting down, this may well be off the AIL
>  	 * already. That's because we simulate the log-committed callbacks to
> @@ -1121,113 +975,3 @@ xfs_buf_item_done(
>  	xfs_buf_item_free(bip);
>  	xfs_buf_rele(bp);
>  }
> -
> -static inline void
> -xfs_buf_clear_ioerror_retry_state(
> -	struct xfs_buf		*bp)
> -{
> -	bp->b_last_error = 0;
> -	bp->b_retries = 0;
> -	bp->b_first_retry_time = 0;
> -}
> -
> -static void
> -xfs_buf_inode_io_fail(
> -	struct xfs_buf		*bp)
> -{
> -	struct xfs_log_item	*lip;
> -
> -	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
> -		set_bit(XFS_LI_FAILED, &lip->li_flags);
> -
> -	xfs_buf_ioerror(bp, 0);
> -	xfs_buf_relse(bp);
> -}
> -
> -/*
> - * Inode buffer iodone callback function.
> - */
> -void
> -xfs_buf_inode_iodone(
> -	struct xfs_buf		*bp)
> -{
> -	switch (xfs_buf_ioend_disposition(bp)) {
> -	case XBF_IOEND_DONE:
> -		return;
> -	case XBF_IOEND_FAIL:
> -		xfs_buf_inode_io_fail(bp);
> -		return;
> -	default:
> -		break;
> -	}
> -
> -	xfs_buf_clear_ioerror_retry_state(bp);
> -	xfs_buf_item_done(bp);
> -	xfs_iflush_done(bp);
> -	xfs_buf_ioend_finish(bp);
> -}
> -
> -static void
> -xfs_buf_dquot_io_fail(
> -	struct xfs_buf		*bp)
> -{
> -	struct xfs_log_item	*lip;
> -
> -	spin_lock(&bp->b_mount->m_ail->ail_lock);
> -	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
> -		xfs_set_li_failed(lip, bp);
> -	spin_unlock(&bp->b_mount->m_ail->ail_lock);
> -	xfs_buf_ioerror(bp, 0);
> -	xfs_buf_relse(bp);
> -}
> -
> -/*
> - * Dquot buffer iodone callback function.
> - */
> -void
> -xfs_buf_dquot_iodone(
> -	struct xfs_buf		*bp)
> -{
> -	switch (xfs_buf_ioend_disposition(bp)) {
> -	case XBF_IOEND_DONE:
> -		return;
> -	case XBF_IOEND_FAIL:
> -		xfs_buf_dquot_io_fail(bp);
> -		return;
> -	default:
> -		break;
> -	}
> -
> -	xfs_buf_clear_ioerror_retry_state(bp);
> -	/* a newly allocated dquot buffer might have a log item attached */
> -	xfs_buf_item_done(bp);
> -	xfs_dquot_done(bp);
> -	xfs_buf_ioend_finish(bp);
> -}
> -
> -/*
> - * Dirty buffer iodone callback function.
> - *
> - * Note that for things like remote attribute buffers, there may not be a buffer
> - * log item here, so processing the buffer log item must remain be optional.
> - */
> -void
> -xfs_buf_iodone(
> -	struct xfs_buf		*bp)
> -{
> -	switch (xfs_buf_ioend_disposition(bp)) {
> -	case XBF_IOEND_DONE:
> -		return;
> -	case XBF_IOEND_FAIL:
> -		ASSERT(list_empty(&bp->b_li_list));
> -		xfs_buf_ioerror(bp, 0);
> -		xfs_buf_relse(bp);
> -		return;
> -	default:
> -		break;
> -	}
> -
> -	xfs_buf_clear_ioerror_retry_state(bp);
> -	xfs_buf_item_done(bp);
> -	xfs_buf_ioend_finish(bp);
> -}
> diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
> index 23507cbb4c4132..55da71fb6e22fc 100644
> --- a/fs/xfs/xfs_buf_item.h
> +++ b/fs/xfs/xfs_buf_item.h
> @@ -50,12 +50,15 @@ struct xfs_buf_log_item {
>  };
>  
>  int	xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
> +void	xfs_buf_item_done(struct xfs_buf *bp);
>  void	xfs_buf_item_relse(struct xfs_buf *);
>  bool	xfs_buf_item_put(struct xfs_buf_log_item *);
>  void	xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
>  bool	xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
>  void	xfs_buf_inode_iodone(struct xfs_buf *);
> +void	xfs_buf_inode_io_fail(struct xfs_buf *bp);
>  void	xfs_buf_dquot_iodone(struct xfs_buf *);
> +void	xfs_buf_dquot_io_fail(struct xfs_buf *bp);
>  void	xfs_buf_iodone(struct xfs_buf *);
>  bool	xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
>  
> diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
> index 76353c9a723ee0..2f309767d8c03b 100644
> --- a/fs/xfs/xfs_dquot.c
> +++ b/fs/xfs/xfs_dquot.c
> @@ -1087,7 +1087,7 @@ xfs_qm_dqflush_done(
>  }
>  
>  void
> -xfs_dquot_done(
> +xfs_buf_dquot_iodone(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_log_item	*lip, *n;
> @@ -1098,6 +1098,18 @@ xfs_dquot_done(
>  	}
>  }
>  
> +void
> +xfs_buf_dquot_io_fail(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_log_item	*lip;
> +
> +	spin_lock(&bp->b_mount->m_ail->ail_lock);
> +	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
> +		xfs_set_li_failed(lip, bp);
> +	spin_unlock(&bp->b_mount->m_ail->ail_lock);
> +}
> +
>  /*
>   * Write a modified dquot to disk.
>   * The dquot must be locked and the flush lock too taken by caller.
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 5c07bf491d9f5f..98240126914fc8 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3569,8 +3569,8 @@ xfs_iflush(
>  	 *
>  	 * What we do is move the bits to the ili_last_fields field.  When
>  	 * logging the inode, these bits are moved back to the ili_fields field.
> -	 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
> -	 * know that the information those bits represent is permanently on
> +	 * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
> +	 * we know that the information those bits represent is permanently on
>  	 * disk.  As long as the flush completes before the inode is logged
>  	 * again, then both ili_fields and ili_last_fields will be cleared.
>  	 */
> @@ -3584,7 +3584,7 @@ xfs_iflush(
>  
>  	/*
>  	 * Store the current LSN of the inode so that we can tell whether the
> -	 * item has moved in the AIL from xfs_iflush_done().
> +	 * item has moved in the AIL from xfs_buf_inode_iodone().
>  	 */
>  	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
>  				&iip->ili_item.li_lsn);
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index 3840117f8a5e2c..69c3a40a51db10 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -714,7 +714,7 @@ xfs_iflush_finish(
>   * as completing the flush and unlocking the inode.
>   */
>  void
> -xfs_iflush_done(
> +xfs_buf_inode_iodone(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_log_item	*lip, *n;
> @@ -753,6 +753,16 @@ xfs_iflush_done(
>  		list_splice_tail(&flushed_inodes, &bp->b_li_list);
>  }
>  
> +void
> +xfs_buf_inode_io_fail(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_log_item	*lip;
> +
> +	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
> +		set_bit(XFS_LI_FAILED, &lip->li_flags);
> +}
> +
>  /*
>   * This is the inode flushing abort routine.  It is called from xfs_iflush when
>   * the filesystem is shutting down to clean up the inode state.  It is
> diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
> index 048b5e7dee901f..7154d92338a393 100644
> --- a/fs/xfs/xfs_inode_item.h
> +++ b/fs/xfs/xfs_inode_item.h
> @@ -43,7 +43,6 @@ static inline int xfs_inode_clean(struct xfs_inode *ip)
>  
>  extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
>  extern void xfs_inode_item_destroy(struct xfs_inode *);
> -extern void xfs_iflush_done(struct xfs_buf *);
>  extern void xfs_iflush_abort(struct xfs_inode *);
>  extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
>  					 struct xfs_inode_log_format *);
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 50c478374a31b4..90702c6e5bd7ec 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -337,7 +337,7 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);
>  DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
>  DEFINE_BUF_EVENT(xfs_buf_get_uncached);
>  DEFINE_BUF_EVENT(xfs_buf_item_relse);
> -DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
> +DEFINE_BUF_EVENT(xfs_buf_iodone_async);
>  DEFINE_BUF_EVENT(xfs_buf_error_relse);
>  DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
>  DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 05/13] xfs: fold xfs_buf_ioend_finish into xfs_ioend
  2020-07-09 15:04 ` [PATCH 05/13] xfs: fold xfs_buf_ioend_finish into xfs_ioend Christoph Hellwig
@ 2020-08-18 22:40   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:40 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:45PM +0200, Christoph Hellwig wrote:
> No need to keep a separate helper for this logic.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok to me,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf.c         | 8 +++++---
>  fs/xfs/xfs_buf.h         | 7 -------
>  fs/xfs/xfs_log_recover.c | 1 -
>  3 files changed, 5 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 443d11bdfcf502..e6a73e455caa1a 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1335,7 +1335,6 @@ xfs_buf_ioend(
>  			bp->b_ops->verify_read(bp);
>  		if (!bp->b_error)
>  			bp->b_flags |= XBF_DONE;
> -		xfs_buf_ioend_finish(bp);
>  	} else if (bp->b_flags & _XBF_LOGRECOVERY) {
>  		/*
>  		 * If this is a log recovery buffer, we aren't doing
> @@ -1383,9 +1382,12 @@ xfs_buf_ioend(
>  			xfs_buf_inode_iodone(bp);
>  		else if (bp->b_flags & _XBF_DQUOTS)
>  			xfs_buf_dquot_iodone(bp);
> -
> -		xfs_buf_ioend_finish(bp);
>  	}
> +
> +	if (bp->b_flags & XBF_ASYNC)
> +		xfs_buf_relse(bp);
> +	else
> +		complete(&bp->b_iowait);
>  }
>  
>  static void
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index bea20d43a38191..9eb4044597c985 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -269,13 +269,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
>  
>  /* Buffer Read and Write Routines */
>  extern int xfs_bwrite(struct xfs_buf *bp);
> -static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
> -{
> -	if (bp->b_flags & XBF_ASYNC)
> -		xfs_buf_relse(bp);
> -	else
> -		complete(&bp->b_iowait);
> -}
>  
>  extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
>  		xfs_failaddr_t failaddr);
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index cf6dabb98f2327..741a2c247bc585 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -288,7 +288,6 @@ xlog_recover_iodone(
>  		xfs_buf_item_relse(bp);
>  	ASSERT(bp->b_log_item == NULL);
>  	bp->b_flags &= ~_XBF_LOGRECOVERY;
> -	xfs_buf_ioend_finish(bp);
>  }
>  
>  /*
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 06/13] xfs: refactor xfs_buf_ioerror_fail_without_retry
  2020-07-09 15:04 ` [PATCH 06/13] xfs: refactor xfs_buf_ioerror_fail_without_retry Christoph Hellwig
@ 2020-08-18 22:41   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:46PM +0200, Christoph Hellwig wrote:
> xfs_buf_ioerror_fail_without_retry is a somewhat weird function in
> that it has two trivial checks that decide the return value, while
> the rest implements a ratelimited warning.  Just lift the two checks
> into the caller, and give the remainder a suitable name.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Pretty straightforward,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf.c | 35 +++++++++++++++--------------------
>  1 file changed, 15 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index e6a73e455caa1a..2f2ce3faab0826 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1172,36 +1172,19 @@ xfs_buf_wait_unpin(
>  	set_current_state(TASK_RUNNING);
>  }
>  
> -/*
> - * Decide if we're going to retry the write after a failure, and prepare
> - * the buffer for retrying the write.
> - */
> -static bool
> -xfs_buf_ioerror_fail_without_retry(
> +static void
> +xfs_buf_ioerror_alert_ratelimited(
>  	struct xfs_buf		*bp)
>  {
> -	struct xfs_mount	*mp = bp->b_mount;
>  	static unsigned long	lasttime;
>  	static struct xfs_buftarg *lasttarg;
>  
> -	/*
> -	 * If we've already decided to shutdown the filesystem because of
> -	 * I/O errors, there's no point in giving this a retry.
> -	 */
> -	if (XFS_FORCED_SHUTDOWN(mp))
> -		return true;
> -
>  	if (bp->b_target != lasttarg ||
>  	    time_after(jiffies, (lasttime + 5*HZ))) {
>  		lasttime = jiffies;
>  		xfs_buf_ioerror_alert(bp, __this_address);
>  	}
>  	lasttarg = bp->b_target;
> -
> -	/* synchronous writes will have callers process the error */
> -	if (!(bp->b_flags & XBF_ASYNC))
> -		return true;
> -	return false;
>  }
>  
>  static bool
> @@ -1282,7 +1265,19 @@ xfs_buf_ioend_disposition(
>  	if (likely(!bp->b_error))
>  		return XBF_IOEND_FINISH;
>  
> -	if (xfs_buf_ioerror_fail_without_retry(bp))
> +	/*
> +	 * If we've already decided to shutdown the filesystem because of I/O
> +	 * errors, there's no point in giving this a retry.
> +	 */
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		goto out_stale;
> +
> +	xfs_buf_ioerror_alert_ratelimited(bp);
> +
> +	/*
> +	 * Synchronous writes will have callers process the error.
> +	 */
> +	if (!(bp->b_flags & XBF_ASYNC))
>  		goto out_stale;
>  
>  	trace_xfs_buf_iodone_async(bp, _RET_IP_);
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/13] xfs: remove xfs_buf_ioerror_retry
  2020-07-09 15:04 ` [PATCH 07/13] xfs: remove xfs_buf_ioerror_retry Christoph Hellwig
@ 2020-08-18 22:42   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:47PM +0200, Christoph Hellwig wrote:
> Merge xfs_buf_ioerror_retry into its only caller to make the resubmission
> flow a little more obvious.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Not sure about obvious, but at least it's all together again... :)

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf.c | 33 ++++++++++++---------------------
>  1 file changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 2f2ce3faab0826..e5592563dda6a1 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1187,23 +1187,6 @@ xfs_buf_ioerror_alert_ratelimited(
>  	lasttarg = bp->b_target;
>  }
>  
> -static bool
> -xfs_buf_ioerror_retry(
> -	struct xfs_buf		*bp,
> -	struct xfs_error_cfg	*cfg)
> -{
> -	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
> -	    bp->b_last_error == bp->b_error)
> -		return false;
> -
> -	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
> -	bp->b_last_error = bp->b_error;
> -	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
> -	    !bp->b_first_retry_time)
> -		bp->b_first_retry_time = jiffies;
> -	return true;
> -}
> -
>  /*
>   * Account for this latest trip around the retry handler, and decide if
>   * we've failed enough times to constitute a permanent failure.
> @@ -1283,10 +1266,13 @@ xfs_buf_ioend_disposition(
>  	trace_xfs_buf_iodone_async(bp, _RET_IP_);
>  
>  	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
> -	if (xfs_buf_ioerror_retry(bp, cfg)) {
> -		xfs_buf_ioerror(bp, 0);
> -		xfs_buf_submit(bp);
> -		return XBF_IOEND_DONE;
> +	if (bp->b_last_error != bp->b_error ||
> +	    !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) {
> +		bp->b_last_error = bp->b_error;
> +		if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
> +		    !bp->b_first_retry_time)
> +			bp->b_first_retry_time = jiffies;
> +		goto resubmit;
>  	}
>  
>  	/*
> @@ -1301,6 +1287,11 @@ xfs_buf_ioend_disposition(
>  	/* Still considered a transient error. Caller will schedule retries. */
>  	return XBF_IOEND_FAIL;
>  
> +resubmit:
> +	xfs_buf_ioerror(bp, 0);
> +	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
> +	xfs_buf_submit(bp);
> +	return XBF_IOEND_DONE;
>  out_stale:
>  	xfs_buf_stale(bp);
>  	bp->b_flags |= XBF_DONE;
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 08/13] xfs: lift the XBF_IOEND_FAIL handling into xfs_buf_ioend_disposition
  2020-07-09 15:04 ` [PATCH 08/13] xfs: lift the XBF_IOEND_FAIL handling into xfs_buf_ioend_disposition Christoph Hellwig
@ 2020-08-18 22:48   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:48 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:48PM +0200, Christoph Hellwig wrote:
> Keep all the error handling code together.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index e5592563dda6a1..e3e80615c5ed9e 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1285,6 +1285,14 @@ xfs_buf_ioend_disposition(
>  	}
>  
>  	/* Still considered a transient error. Caller will schedule retries. */
> +	if (bp->b_flags & _XBF_INODES)
> +		xfs_buf_inode_io_fail(bp);
> +	else if (bp->b_flags & _XBF_DQUOTS)
> +		xfs_buf_dquot_io_fail(bp);
> +	else
> +		ASSERT(list_empty(&bp->b_li_list));
> +	xfs_buf_ioerror(bp, 0);
> +	xfs_buf_relse(bp);
>  	return XBF_IOEND_FAIL;

Hm.  I was about to whine about turning a "decide the outcome" function
into a "decide the outcome and do some of it" function, but I guess the
advantage of lazy reviewing is that I can skip to the last patch and see
that this all gets swallowed into xfs_buf_ioend_handle_error, doesn't it?

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

>  
>  resubmit:
> @@ -1338,14 +1346,6 @@ xfs_buf_ioend(
>  		case XBF_IOEND_DONE:
>  			return;
>  		case XBF_IOEND_FAIL:
> -			if (bp->b_flags & _XBF_INODES)
> -				xfs_buf_inode_io_fail(bp);
> -			else if (bp->b_flags & _XBF_DQUOTS)
> -				xfs_buf_dquot_io_fail(bp);
> -			else
> -				ASSERT(list_empty(&bp->b_li_list));
> -			xfs_buf_ioerror(bp, 0);
> -			xfs_buf_relse(bp);
>  			return;
>  		default:
>  			break;
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 09/13] xfs: simplify the xfs_buf_ioend_disposition calling convention
  2020-07-09 15:04 ` [PATCH 09/13] xfs: simplify the xfs_buf_ioend_disposition calling convention Christoph Hellwig
@ 2020-08-18 22:52   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:52 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:49PM +0200, Christoph Hellwig wrote:
> Now that all the actual error handling is in a single place,
> xfs_buf_ioend_disposition just needs to return true if took ownership of
> the buffer, or false if not instead of the tristate.  Also move the

Could you capture the meaning of the return values in the comment prior
to xfs_buf_ioend_handle_error, please?  It took me a while to figure out
that "return false" means that the caller owns the buffer and log items
attached to it and needs to clear all the state that we set up for the
buffer write; and that this is true even for buffers that we just
permanent-failed.

Otherwise, this looks fairly straightforward to me.

--D

> error check back in the caller to optimize for the fast path, and give
> the function a better fitting name.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c | 34 ++++++----------------------------
>  1 file changed, 6 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index e3e80615c5ed9e..4a9034a9175504 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1224,30 +1224,14 @@ xfs_buf_ioerror_permanent(
>   *
>   * If we get repeated async write failures, then we take action according to the
>   * error configuration we have been set up to use.
> - *
> - * Multi-state return value:
> - *
> - * XBF_IOEND_FINISH: run callback completions
> - * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
> - * XBF_IOEND_FAIL: transient error, run failure callback completions and then
> - *    release the buffer
>   */
> -enum xfs_buf_ioend_disposition {
> -	XBF_IOEND_FINISH,
> -	XBF_IOEND_DONE,
> -	XBF_IOEND_FAIL,
> -};
> -
> -static enum xfs_buf_ioend_disposition
> -xfs_buf_ioend_disposition(
> +static bool
> +xfs_buf_ioend_handle_error(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_mount	*mp = bp->b_mount;
>  	struct xfs_error_cfg	*cfg;
>  
> -	if (likely(!bp->b_error))
> -		return XBF_IOEND_FINISH;
> -
>  	/*
>  	 * If we've already decided to shutdown the filesystem because of I/O
>  	 * errors, there's no point in giving this a retry.
> @@ -1293,18 +1277,18 @@ xfs_buf_ioend_disposition(
>  		ASSERT(list_empty(&bp->b_li_list));
>  	xfs_buf_ioerror(bp, 0);
>  	xfs_buf_relse(bp);
> -	return XBF_IOEND_FAIL;
> +	return true;
>  
>  resubmit:
>  	xfs_buf_ioerror(bp, 0);
>  	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
>  	xfs_buf_submit(bp);
> -	return XBF_IOEND_DONE;
> +	return true;
>  out_stale:
>  	xfs_buf_stale(bp);
>  	bp->b_flags |= XBF_DONE;
>  	trace_xfs_buf_error_relse(bp, _RET_IP_);
> -	return XBF_IOEND_FINISH;
> +	return false;
>  }
>  
>  static void
> @@ -1342,14 +1326,8 @@ xfs_buf_ioend(
>  			bp->b_flags |= XBF_DONE;
>  		}
>  
> -		switch (xfs_buf_ioend_disposition(bp)) {
> -		case XBF_IOEND_DONE:
> +		if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
>  			return;
> -		case XBF_IOEND_FAIL:
> -			return;
> -		default:
> -			break;
> -		}
>  
>  		/* clear the retry state */
>  		bp->b_last_error = 0;
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/13] xfs: use xfs_buf_item_relse in xfs_buf_item_done
  2020-07-09 15:04 ` [PATCH 10/13] xfs: use xfs_buf_item_relse in xfs_buf_item_done Christoph Hellwig
@ 2020-08-18 22:52   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:52 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:50PM +0200, Christoph Hellwig wrote:
> Reuse xfs_buf_item_relse instead of duplicating it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good to me,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf_item.c | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
> index ccc9d69683fae4..ccfd747d32e410 100644
> --- a/fs/xfs/xfs_buf_item.c
> +++ b/fs/xfs/xfs_buf_item.c
> @@ -959,8 +959,6 @@ void
>  xfs_buf_item_done(
>  	struct xfs_buf		*bp)
>  {
> -	struct xfs_buf_log_item	*bip = bp->b_log_item;
> -
>  	/*
>  	 * If we are forcibly shutting down, this may well be off the AIL
>  	 * already. That's because we simulate the log-committed callbacks to
> @@ -970,8 +968,7 @@ xfs_buf_item_done(
>  	 *
>  	 * Either way, AIL is useless if we're forcing a shutdown.
>  	 */
> -	xfs_trans_ail_delete(&bip->bli_item, SHUTDOWN_CORRUPT_INCORE);
> -	bp->b_log_item = NULL;
> -	xfs_buf_item_free(bip);
> -	xfs_buf_rele(bp);
> +	xfs_trans_ail_delete(&bp->b_log_item->bli_item,
> +			     SHUTDOWN_CORRUPT_INCORE);
> +	xfs_buf_item_relse(bp);
>  }
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 11/13] xfs: clear the read/write flags later in xfs_buf_ioend
  2020-07-09 15:04 ` [PATCH 11/13] xfs: clear the read/write flags later in xfs_buf_ioend Christoph Hellwig
@ 2020-08-18 22:54   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:54 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:51PM +0200, Christoph Hellwig wrote:
> Clear the flags at the end of xfs_buf_ioend so that they can be used
> during the completion.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Seems fairly straightforward...
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_buf.c | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 4a9034a9175504..8bbd28f39a927b 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1281,12 +1281,13 @@ xfs_buf_ioend_handle_error(
>  
>  resubmit:
>  	xfs_buf_ioerror(bp, 0);
> -	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
> +	bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
>  	xfs_buf_submit(bp);
>  	return true;
>  out_stale:
>  	xfs_buf_stale(bp);
>  	bp->b_flags |= XBF_DONE;
> +	bp->b_flags &= ~XBF_WRITE;
>  	trace_xfs_buf_error_relse(bp, _RET_IP_);
>  	return false;
>  }
> @@ -1295,12 +1296,8 @@ static void
>  xfs_buf_ioend(
>  	struct xfs_buf	*bp)
>  {
> -	bool		read = bp->b_flags & XBF_READ;
> -
>  	trace_xfs_buf_iodone(bp, _RET_IP_);
>  
> -	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
> -
>  	/*
>  	 * Pull in IO completion errors now. We are guaranteed to be running
>  	 * single threaded, so we don't need the lock to read b_io_error.
> @@ -1308,7 +1305,7 @@ xfs_buf_ioend(
>  	if (!bp->b_error && bp->b_io_error)
>  		xfs_buf_ioerror(bp, bp->b_io_error);
>  
> -	if (read) {
> +	if (bp->b_flags & XBF_READ) {
>  		if (!bp->b_error && bp->b_ops)
>  			bp->b_ops->verify_read(bp);
>  		if (!bp->b_error)
> @@ -1348,6 +1345,8 @@ xfs_buf_ioend(
>  			xfs_buf_dquot_iodone(bp);
>  	}
>  
> +	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
> +
>  	if (bp->b_flags & XBF_ASYNC)
>  		xfs_buf_relse(bp);
>  	else
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 12/13] xfs: remove xlog_recover_iodone
  2020-07-09 15:04 ` [PATCH 12/13] xfs: remove xlog_recover_iodone Christoph Hellwig
@ 2020-08-18 22:54   ` Darrick J. Wong
  0 siblings, 0 replies; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 22:54 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:52PM +0200, Christoph Hellwig wrote:
> The log recovery I/O completion handler does not substancially differ from

"substantially"

> the normal one except for the fact that we:
> 
>  a) never retry failed writes
>  b) can have log items that aren't on the AIL
>  c) never have inode/dquot log items attached and thus don't need to
>     handle them
> 
> Add conditionals for (a) and (b) to the ioend code, while (c) doesn't
> need special handling anyway.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

With that one typo fixed:

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_log_recover.h |  1 -
>  fs/xfs/xfs_buf.c                | 20 ++++++++++++--------
>  fs/xfs/xfs_buf_item.c           |  4 ++++
>  fs/xfs/xfs_buf_item_recover.c   |  2 +-
>  fs/xfs/xfs_log_recover.c        | 25 -------------------------
>  5 files changed, 17 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index 641132d0e39ddd..3cca2bfe714cb2 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -121,7 +121,6 @@ struct xlog_recover {
>  void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
>  		const struct xfs_buf_ops *ops);
>  bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
> -void xlog_recover_iodone(struct xfs_buf *bp);
>  
>  void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
>  		uint64_t intent_id);
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 8bbd28f39a927b..1172d5fa06aad2 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1241,6 +1241,15 @@ xfs_buf_ioend_handle_error(
>  
>  	xfs_buf_ioerror_alert_ratelimited(bp);
>  
> +	/*
> +	 * We're not going to bother about retrying this during recovery.
> +	 * One strike!
> +	 */
> +	if (bp->b_flags & _XBF_LOGRECOVERY) {
> +		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
> +		return false;
> +	}
> +
>  	/*
>  	 * Synchronous writes will have callers process the error.
>  	 */
> @@ -1310,13 +1319,6 @@ xfs_buf_ioend(
>  			bp->b_ops->verify_read(bp);
>  		if (!bp->b_error)
>  			bp->b_flags |= XBF_DONE;
> -	} else if (bp->b_flags & _XBF_LOGRECOVERY) {
> -		/*
> -		 * If this is a log recovery buffer, we aren't doing
> -		 * transactional I/O yet so we need to let the log recovery code
> -		 * handle I/O completions:
> -		 */
> -		xlog_recover_iodone(bp);
>  	} else {
>  		if (!bp->b_error) {
>  			bp->b_flags &= ~XBF_WRITE_FAIL;
> @@ -1343,9 +1345,11 @@ xfs_buf_ioend(
>  			xfs_buf_inode_iodone(bp);
>  		else if (bp->b_flags & _XBF_DQUOTS)
>  			xfs_buf_dquot_iodone(bp);
> +
>  	}
>  
> -	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
> +	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
> +			 _XBF_LOGRECOVERY);
>  
>  	if (bp->b_flags & XBF_ASYNC)
>  		xfs_buf_relse(bp);
> diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
> index ccfd747d32e410..ab87c1294f7584 100644
> --- a/fs/xfs/xfs_buf_item.c
> +++ b/fs/xfs/xfs_buf_item.c
> @@ -967,8 +967,12 @@ xfs_buf_item_done(
>  	 * xfs_trans_ail_delete() takes care of these.
>  	 *
>  	 * Either way, AIL is useless if we're forcing a shutdown.
> +	 *
> +	 * Note that log recovery writes might have buffer items that are not on
> +	 * the AIL during normal operations.
>  	 */
>  	xfs_trans_ail_delete(&bp->b_log_item->bli_item,
> +			     (bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
>  			     SHUTDOWN_CORRUPT_INCORE);
>  	xfs_buf_item_relse(bp);
>  }
> diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
> index 74c851f60eeeb1..1bbae3ab1322f7 100644
> --- a/fs/xfs/xfs_buf_item_recover.c
> +++ b/fs/xfs/xfs_buf_item_recover.c
> @@ -414,7 +414,7 @@ xlog_recover_validate_buf_type(
>  	 *
>  	 * Write verifiers update the metadata LSN from log items attached to
>  	 * the buffer. Therefore, initialize a bli purely to carry the LSN to
> -	 * the verifier. We'll clean it up in our ->iodone() callback.
> +	 * the verifier.
>  	 */
>  	if (bp->b_ops) {
>  		struct xfs_buf_log_item	*bip;
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 741a2c247bc585..b181f3253e6e74 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -265,31 +265,6 @@ xlog_header_check_mount(
>  	return 0;
>  }
>  
> -void
> -xlog_recover_iodone(
> -	struct xfs_buf	*bp)
> -{
> -	if (!bp->b_error) {
> -		bp->b_flags |= XBF_DONE;
> -	} else if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
> -		/*
> -		 * We're not going to bother about retrying this during
> -		 * recovery. One strike!
> -		 */
> -		xfs_buf_ioerror_alert(bp, __this_address);
> -		xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
> -	}
> -
> -	/*
> -	 * On v5 supers, a bli could be attached to update the metadata LSN.
> -	 * Clean it up.
> -	 */
> -	if (bp->b_log_item)
> -		xfs_buf_item_relse(bp);
> -	ASSERT(bp->b_log_item == NULL);
> -	bp->b_flags &= ~_XBF_LOGRECOVERY;
> -}
> -
>  /*
>   * This routine finds (to an approximation) the first block in the physical
>   * log which contains the given cycle.  It uses a binary search algorithm.
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 13/13] xfs: reuse _xfs_buf_read for re-reading the superblock
  2020-07-09 15:04 ` [PATCH 13/13] xfs: reuse _xfs_buf_read for re-reading the superblock Christoph Hellwig
@ 2020-08-18 23:02   ` Darrick J. Wong
  2020-08-29  6:47     ` Christoph Hellwig
  0 siblings, 1 reply; 28+ messages in thread
From: Darrick J. Wong @ 2020-08-18 23:02 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Jul 09, 2020 at 05:04:53PM +0200, Christoph Hellwig wrote:
> Instead of poking deeply into buffer cache internals when re-reading the
> superblock during log recovery just generalize _xfs_buf_read and use it
> there.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c         | 28 +++++++++++++++++++---------
>  fs/xfs/xfs_buf.h         | 10 ++--------
>  fs/xfs/xfs_log_recover.c | 11 +++--------
>  3 files changed, 24 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 1172d5fa06aad2..34d88c8b50854a 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -52,6 +52,15 @@ static kmem_zone_t *xfs_buf_zone;
>   *	  b_lock (trylock due to inversion)
>   */
>  
> +static int __xfs_buf_submit(struct xfs_buf *bp, bool wait);
> +
> +static inline int
> +xfs_buf_submit(
> +	struct xfs_buf		*bp)
> +{
> +	return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
> +}
> +
>  static inline int
>  xfs_buf_is_vmapped(
>  	struct xfs_buf	*bp)
> @@ -753,16 +762,18 @@ xfs_buf_get_map(
>  	return 0;
>  }
>  
> -STATIC int
> +int
>  _xfs_buf_read(
> -	xfs_buf_t		*bp,
> -	xfs_buf_flags_t		flags)
> +	struct xfs_buf		*bp,
> +	xfs_buf_flags_t		flags,
> +	const struct xfs_buf_ops *ops)
>  {
> -	ASSERT(!(flags & XBF_WRITE));
>  	ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
> +	ASSERT(!(flags & XBF_WRITE));
>  
> -	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
> -	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
> +	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
> +	bp->b_flags |= flags & (XBF_ASYNC | XBF_READ_AHEAD);

Doesn't this change mean that the caller's XBF_READ never gets set
in bp->b_flags?  If the buffer is already in memory but doesn't have
XBF_DONE set, how does XBF_READ get set?  Maybe I'm missing something?

--D

> +	bp->b_ops = ops;
>  
>  	return xfs_buf_submit(bp);
>  }
> @@ -827,8 +838,7 @@ xfs_buf_read_map(
>  	if (!(bp->b_flags & XBF_DONE)) {
>  		/* Initiate the buffer read and wait. */
>  		XFS_STATS_INC(target->bt_mount, xb_get_read);
> -		bp->b_ops = ops;
> -		error = _xfs_buf_read(bp, flags);
> +		error = _xfs_buf_read(bp, flags, ops);
>  
>  		/* Readahead iodone already dropped the buffer, so exit. */
>  		if (flags & XBF_ASYNC)
> @@ -1637,7 +1647,7 @@ xfs_buf_iowait(
>   * safe to reference the buffer after a call to this function unless the caller
>   * holds an additional reference itself.
>   */
> -int
> +static int
>  __xfs_buf_submit(
>  	struct xfs_buf	*bp,
>  	bool		wait)
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 9eb4044597c985..db172599d32dc1 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -249,6 +249,8 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags,
>  int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
>  			  size_t numblks, int flags, struct xfs_buf **bpp,
>  			  const struct xfs_buf_ops *ops);
> +int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags,
> +		const struct xfs_buf_ops *ops);
>  void xfs_buf_hold(struct xfs_buf *bp);
>  
>  /* Releasing Buffers */
> @@ -275,14 +277,6 @@ extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
>  #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
>  extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa);
>  void xfs_buf_ioend_fail(struct xfs_buf *);
> -
> -extern int __xfs_buf_submit(struct xfs_buf *bp, bool);
> -static inline int xfs_buf_submit(struct xfs_buf *bp)
> -{
> -	bool wait = bp->b_flags & XBF_ASYNC ? false : true;
> -	return __xfs_buf_submit(bp, wait);
> -}
> -
>  void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
>  void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
>  #define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address)
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index b181f3253e6e74..04f76a75886744 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -3305,16 +3305,11 @@ xlog_do_recover(
>  	xlog_assign_tail_lsn(mp);
>  
>  	/*
> -	 * Now that we've finished replaying all buffer and inode
> -	 * updates, re-read in the superblock and reverify it.
> +	 * Now that we've finished replaying all buffer and inode updates,
> +	 * re-read in the superblock and reverify it.
>  	 */
>  	bp = xfs_getsb(mp);
> -	bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
> -	ASSERT(!(bp->b_flags & XBF_WRITE));
> -	bp->b_flags |= XBF_READ;
> -	bp->b_ops = &xfs_sb_buf_ops;
> -
> -	error = xfs_buf_submit(bp);
> +	error = _xfs_buf_read(bp, XBF_READ, &xfs_sb_buf_ops);
>  	if (error) {
>  		if (!XFS_FORCED_SHUTDOWN(mp)) {
>  			xfs_buf_ioerror_alert(bp, __this_address);
> -- 
> 2.26.2
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 13/13] xfs: reuse _xfs_buf_read for re-reading the superblock
  2020-08-18 23:02   ` Darrick J. Wong
@ 2020-08-29  6:47     ` Christoph Hellwig
  0 siblings, 0 replies; 28+ messages in thread
From: Christoph Hellwig @ 2020-08-29  6:47 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs

On Tue, Aug 18, 2020 at 04:02:56PM -0700, Darrick J. Wong wrote:
> > -	xfs_buf_flags_t		flags)
> > +	struct xfs_buf		*bp,
> > +	xfs_buf_flags_t		flags,
> > +	const struct xfs_buf_ops *ops)
> >  {
> > -	ASSERT(!(flags & XBF_WRITE));
> >  	ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
> > +	ASSERT(!(flags & XBF_WRITE));
> >  
> > -	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
> > -	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
> > +	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
> > +	bp->b_flags |= flags & (XBF_ASYNC | XBF_READ_AHEAD);
> 
> Doesn't this change mean that the caller's XBF_READ never gets set
> in bp->b_flags?  If the buffer is already in memory but doesn't have
> XBF_DONE set, how does XBF_READ get set?  Maybe I'm missing something?

Yes, this is broken for the re-read case.

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2020-08-29  6:47 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-09 15:04 tidy up the buffer cache implementation Christoph Hellwig
2020-07-09 15:04 ` [PATCH 01/13] xfs: refactor the buf ioend disposition code Christoph Hellwig
2020-08-18 22:09   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 02/13] xfs: mark xfs_buf_ioend static Christoph Hellwig
2020-08-18 22:10   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 03/13] xfs: refactor xfs_buf_ioend Christoph Hellwig
2020-08-18 22:31   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 04/13] xfs: move the buffer retry logic to xfs_buf.c Christoph Hellwig
2020-08-18 22:39   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 05/13] xfs: fold xfs_buf_ioend_finish into xfs_ioend Christoph Hellwig
2020-08-18 22:40   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 06/13] xfs: refactor xfs_buf_ioerror_fail_without_retry Christoph Hellwig
2020-08-18 22:41   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 07/13] xfs: remove xfs_buf_ioerror_retry Christoph Hellwig
2020-08-18 22:42   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 08/13] xfs: lift the XBF_IOEND_FAIL handling into xfs_buf_ioend_disposition Christoph Hellwig
2020-08-18 22:48   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 09/13] xfs: simplify the xfs_buf_ioend_disposition calling convention Christoph Hellwig
2020-08-18 22:52   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 10/13] xfs: use xfs_buf_item_relse in xfs_buf_item_done Christoph Hellwig
2020-08-18 22:52   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 11/13] xfs: clear the read/write flags later in xfs_buf_ioend Christoph Hellwig
2020-08-18 22:54   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 12/13] xfs: remove xlog_recover_iodone Christoph Hellwig
2020-08-18 22:54   ` Darrick J. Wong
2020-07-09 15:04 ` [PATCH 13/13] xfs: reuse _xfs_buf_read for re-reading the superblock Christoph Hellwig
2020-08-18 23:02   ` Darrick J. Wong
2020-08-29  6:47     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.