All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xfs: check return codes when flushing block devices
@ 2022-07-31 16:22 Darrick J. Wong
  2022-08-01  0:06 ` Dave Chinner
  0 siblings, 1 reply; 5+ messages in thread
From: Darrick J. Wong @ 2022-07-31 16:22 UTC (permalink / raw)
  To: xfs

From: Darrick J. Wong <djwong@kernel.org>

If a block device cache flush fails, fsync needs to report that to upper
levels.  If the log can't flush the data device, we should shut it down
immediately because we've just violated an invariant.  Hence, check the
return value of blkdev_issue_flush.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_file.c |   15 ++++++++++-----
 fs/xfs/xfs_log.c  |    7 +++++--
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5a171c0b244b..88450c33ab01 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -163,9 +163,11 @@ xfs_file_fsync(
 	 * inode size in case of an extending write.
 	 */
 	if (XFS_IS_REALTIME_INODE(ip))
-		blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
+		error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
 	else if (mp->m_logdev_targp != mp->m_ddev_targp)
-		blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+		error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+	if (error)
+		return error;
 
 	/*
 	 * Any inode that has dirty modifications in the log is pinned.  The
@@ -173,8 +175,11 @@ xfs_file_fsync(
 	 * that happen concurrently to the fsync call, but fsync semantics
 	 * only require to sync previously completed I/O.
 	 */
-	if (xfs_ipincount(ip))
+	if (xfs_ipincount(ip)) {
 		error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
+		if (error)
+			return error;
+	}
 
 	/*
 	 * If we only have a single device, and the log force about was
@@ -185,9 +190,9 @@ xfs_file_fsync(
 	 */
 	if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
 	    mp->m_logdev_targp == mp->m_ddev_targp)
-		blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+		return blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
 
-	return error;
+	return 0;
 }
 
 static int
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4b1c0a9c6368..8a767f4145f0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1926,8 +1926,11 @@ xlog_write_iclog(
 		 * by the LSN in this iclog is on stable storage. This is slow,
 		 * but it *must* complete before we issue the external log IO.
 		 */
-		if (log->l_targ != log->l_mp->m_ddev_targp)
-			blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
+		if (log->l_targ != log->l_mp->m_ddev_targp &&
+		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
+			xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+			return;
+		}
 	}
 	if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
 		iclog->ic_bio.bi_opf |= REQ_FUA;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfs: check return codes when flushing block devices
  2022-07-31 16:22 [PATCH] xfs: check return codes when flushing block devices Darrick J. Wong
@ 2022-08-01  0:06 ` Dave Chinner
  2022-08-01  3:46   ` Darrick J. Wong
  2022-08-01 17:24   ` Christoph Hellwig
  0 siblings, 2 replies; 5+ messages in thread
From: Dave Chinner @ 2022-08-01  0:06 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: xfs

On Sun, Jul 31, 2022 at 09:22:28AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> If a block device cache flush fails, fsync needs to report that to upper
> levels.  If the log can't flush the data device, we should shut it down
> immediately because we've just violated an invariant.  Hence, check the
> return value of blkdev_issue_flush.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/xfs_file.c |   15 ++++++++++-----
>  fs/xfs/xfs_log.c  |    7 +++++--
>  2 files changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 5a171c0b244b..88450c33ab01 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -163,9 +163,11 @@ xfs_file_fsync(
>  	 * inode size in case of an extending write.
>  	 */
>  	if (XFS_IS_REALTIME_INODE(ip))
> -		blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
> +		error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
>  	else if (mp->m_logdev_targp != mp->m_ddev_targp)
> -		blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
> +		error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
> +	if (error)
> +		return error;
>  
>  	/*
>  	 * Any inode that has dirty modifications in the log is pinned.  The
> @@ -173,8 +175,11 @@ xfs_file_fsync(
>  	 * that happen concurrently to the fsync call, but fsync semantics
>  	 * only require to sync previously completed I/O.
>  	 */
> -	if (xfs_ipincount(ip))
> +	if (xfs_ipincount(ip)) {
>  		error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
> +		if (error)
> +			return error;
> +	}

Shouldn't we still try to flush the data device if necessary, even
if the log flush failed?

>  	/*
>  	 * If we only have a single device, and the log force about was
> @@ -185,9 +190,9 @@ xfs_file_fsync(
>  	 */
>  	if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
>  	    mp->m_logdev_targp == mp->m_ddev_targp)
> -		blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
> +		return blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
>  
> -	return error;
> +	return 0;
>  }
>  
>  static int
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 4b1c0a9c6368..8a767f4145f0 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -1926,8 +1926,11 @@ xlog_write_iclog(
>  		 * by the LSN in this iclog is on stable storage. This is slow,
>  		 * but it *must* complete before we issue the external log IO.
>  		 */
> -		if (log->l_targ != log->l_mp->m_ddev_targp)
> -			blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
> +		if (log->l_targ != log->l_mp->m_ddev_targp &&
> +		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
> +			xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
> +			return;
> +		}

That seems pretty drastic, though I'm not sure what else apart from
ignoring the data device flush error can be done here. Also, it's
not actually a log IO error - it's a data device IO error so it's a
really a metadata writeback problem. Hence the use of
SHUTDOWN_LOG_IO_ERROR probably needs a comment to explain why it
needs to be used here...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfs: check return codes when flushing block devices
  2022-08-01  0:06 ` Dave Chinner
@ 2022-08-01  3:46   ` Darrick J. Wong
  2022-08-01 17:24   ` Christoph Hellwig
  1 sibling, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2022-08-01  3:46 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Mon, Aug 01, 2022 at 10:06:41AM +1000, Dave Chinner wrote:
> On Sun, Jul 31, 2022 at 09:22:28AM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > If a block device cache flush fails, fsync needs to report that to upper
> > levels.  If the log can't flush the data device, we should shut it down
> > immediately because we've just violated an invariant.  Hence, check the
> > return value of blkdev_issue_flush.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/xfs/xfs_file.c |   15 ++++++++++-----
> >  fs/xfs/xfs_log.c  |    7 +++++--
> >  2 files changed, 15 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > index 5a171c0b244b..88450c33ab01 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ b/fs/xfs/xfs_file.c
> > @@ -163,9 +163,11 @@ xfs_file_fsync(
> >  	 * inode size in case of an extending write.
> >  	 */
> >  	if (XFS_IS_REALTIME_INODE(ip))
> > -		blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
> > +		error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
> >  	else if (mp->m_logdev_targp != mp->m_ddev_targp)
> > -		blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
> > +		error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
> > +	if (error)
> > +		return error;
> >  
> >  	/*
> >  	 * Any inode that has dirty modifications in the log is pinned.  The
> > @@ -173,8 +175,11 @@ xfs_file_fsync(
> >  	 * that happen concurrently to the fsync call, but fsync semantics
> >  	 * only require to sync previously completed I/O.
> >  	 */
> > -	if (xfs_ipincount(ip))
> > +	if (xfs_ipincount(ip)) {
> >  		error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
> > +		if (error)
> > +			return error;
> > +	}
> 
> Shouldn't we still try to flush the data device if necessary, even
> if the log flush failed?

Hm.  I can see two points of view here -- if the system is falling down
around us, just stop before we make anything worse.  The other POV is
that we should push as much as we can towards the storage in the hope
a reboot and recovery will fix everything.  And hope that the log flush
doesn't do anything out of order.

So, should xfs_file_fsync return the first error but try all the calls?

> >  	/*
> >  	 * If we only have a single device, and the log force about was
> > @@ -185,9 +190,9 @@ xfs_file_fsync(
> >  	 */
> >  	if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
> >  	    mp->m_logdev_targp == mp->m_ddev_targp)
> > -		blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
> > +		return blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
> >  
> > -	return error;
> > +	return 0;
> >  }
> >  
> >  static int
> > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > index 4b1c0a9c6368..8a767f4145f0 100644
> > --- a/fs/xfs/xfs_log.c
> > +++ b/fs/xfs/xfs_log.c
> > @@ -1926,8 +1926,11 @@ xlog_write_iclog(
> >  		 * by the LSN in this iclog is on stable storage. This is slow,
> >  		 * but it *must* complete before we issue the external log IO.
> >  		 */
> > -		if (log->l_targ != log->l_mp->m_ddev_targp)
> > -			blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
> > +		if (log->l_targ != log->l_mp->m_ddev_targp &&
> > +		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
> > +			xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
> > +			return;
> > +		}
> 
> That seems pretty drastic, though I'm not sure what else apart from
> ignoring the data device flush error can be done here. Also, it's
> not actually a log IO error - it's a data device IO error so it's a
> really a metadata writeback problem. Hence the use of
> SHUTDOWN_LOG_IO_ERROR probably needs a comment to explain why it
> needs to be used here...

My first thought was to do that, but my second thought is -- can we do
whatever the AIL does when writes back to the filesystem fail?  I
/think/ it retries writes, correct?  I suppose this could retry the
flush until it succeeds or we hit an impatience threshold.

(Alternately, define a new SHUTDOWN_LOG_* code that will print a
slightly different message, on the grounds that few people have external
log devices anyway.)

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfs: check return codes when flushing block devices
  2022-08-01  0:06 ` Dave Chinner
  2022-08-01  3:46   ` Darrick J. Wong
@ 2022-08-01 17:24   ` Christoph Hellwig
  2022-08-03  4:33     ` Darrick J. Wong
  1 sibling, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2022-08-01 17:24 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Darrick J. Wong, xfs

On Mon, Aug 01, 2022 at 10:06:41AM +1000, Dave Chinner wrote:
> > @@ -173,8 +175,11 @@ xfs_file_fsync(
> >  	 * that happen concurrently to the fsync call, but fsync semantics
> >  	 * only require to sync previously completed I/O.
> >  	 */
> > -	if (xfs_ipincount(ip))
> > +	if (xfs_ipincount(ip)) {
> >  		error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
> > +		if (error)
> > +			return error;
> > +	}
> 
> Shouldn't we still try to flush the data device if necessary, even
> if the log flush failed?

xfs_fsync_flush_log ails only if the log it shut down.  Does it really
make sense to flush the data cache for a pure overwrite of a data
block when the fs is toast?  I can't really see any benefit in that.

> > +		if (log->l_targ != log->l_mp->m_ddev_targp &&
> > +		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
> > +			xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
> > +			return;
> > +		}
> 
> That seems pretty drastic, though I'm not sure what else apart from
> ignoring the data device flush error can be done here. Also, it's
> not actually a log IO error - it's a data device IO error so it's a
> really a metadata writeback problem. Hence the use of
> SHUTDOWN_LOG_IO_ERROR probably needs a comment to explain why it
> needs to be used here...

Yes, the comment would be useful.  But if a cache flush fails data
integrity of the device must at this point be considered as fucked
up beyond belief, so shutting down the log and thus the file system
is the right thing to do.

So modulo a comment here the patch looks good to me.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] xfs: check return codes when flushing block devices
  2022-08-01 17:24   ` Christoph Hellwig
@ 2022-08-03  4:33     ` Darrick J. Wong
  0 siblings, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2022-08-03  4:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Dave Chinner, xfs

On Mon, Aug 01, 2022 at 10:24:27AM -0700, Christoph Hellwig wrote:
> On Mon, Aug 01, 2022 at 10:06:41AM +1000, Dave Chinner wrote:
> > > @@ -173,8 +175,11 @@ xfs_file_fsync(
> > >  	 * that happen concurrently to the fsync call, but fsync semantics
> > >  	 * only require to sync previously completed I/O.
> > >  	 */
> > > -	if (xfs_ipincount(ip))
> > > +	if (xfs_ipincount(ip)) {
> > >  		error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
> > > +		if (error)
> > > +			return error;
> > > +	}
> > 
> > Shouldn't we still try to flush the data device if necessary, even
> > if the log flush failed?
> 
> xfs_fsync_flush_log ails only if the log it shut down.  Does it really
> make sense to flush the data cache for a pure overwrite of a data
> block when the fs is toast?  I can't really see any benefit in that.

/me guesses that once we hit the first error, the rest of the calls will
probably crash and burn, and if they don't, the next fs call will, so it
likely doesn't matter if we keep going or not.

> > > +		if (log->l_targ != log->l_mp->m_ddev_targp &&
> > > +		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
> > > +			xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
> > > +			return;
> > > +		}
> > 
> > That seems pretty drastic, though I'm not sure what else apart from
> > ignoring the data device flush error can be done here. Also, it's
> > not actually a log IO error - it's a data device IO error so it's a
> > really a metadata writeback problem. Hence the use of
> > SHUTDOWN_LOG_IO_ERROR probably needs a comment to explain why it
> > needs to be used here...
> 
> Yes, the comment would be useful.  But if a cache flush fails data
> integrity of the device must at this point be considered as fucked
> up beyond belief, so shutting down the log and thus the file system
> is the right thing to do.
> 
> So modulo a comment here the patch looks good to me.

Ok, will do.

--D

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-08-03  4:33 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-31 16:22 [PATCH] xfs: check return codes when flushing block devices Darrick J. Wong
2022-08-01  0:06 ` Dave Chinner
2022-08-01  3:46   ` Darrick J. Wong
2022-08-01 17:24   ` Christoph Hellwig
2022-08-03  4:33     ` Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.