All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] ext4: don't BUG on inconsistent journal feature
@ 2020-07-10 14:07 Jan Kara
  2020-08-05 14:16 ` Jan Kara
  2020-08-06  5:11 ` tytso
  0 siblings, 2 replies; 3+ messages in thread
From: Jan Kara @ 2020-07-10 14:07 UTC (permalink / raw)
  To: Ted Tso; +Cc: linux-ext4, Lukas Czerner, Jan Kara

A customer has reported a BUG_ON in ext4_clear_journal_err() hitting
during an LTP testing. Either this has been caused by a test setup
issue where the filesystem was being overwritten while LTP was mounting
it or the journal replay has overwritten the superblock with invalid
data. In either case it is preferable we don't take the machine down
with a BUG_ON. So handle the situation of unexpectedly missing
has_journal feature more gracefully. We issue warning and fail the mount
in the cases where the race window is narrow and the failed check is
most likely a programming error. In cases where fs corruption is more
likely, we do full ext4_error() handling before failing mount / remount.

Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/super.c | 68 +++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 21 deletions(-)

Changes since v2:
- added Reviewed-by tag
- fixup compilation failure without CONFIG_QUOTA

Changes since v1:
- handle failures more likely coming from fs corruption as with ext4_error

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 330957ed1f05..9fdad843b30e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
 static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
+static int ext4_mark_recovery_complete(struct super_block *sb,
 					struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
-				   struct ext4_super_block *es);
+static int ext4_clear_journal_err(struct super_block *sb,
+				  struct ext4_super_block *es);
 static int ext4_sync_fs(struct super_block *sb, int wait);
 static int ext4_remount(struct super_block *sb, int *flags, char *data);
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -4770,7 +4770,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery) {
 		ext4_msg(sb, KERN_INFO, "recovery complete");
-		ext4_mark_recovery_complete(sb, es);
+		err = ext4_mark_recovery_complete(sb, es);
+		if (err)
+			goto failed_mount8;
 	}
 	if (EXT4_SB(sb)->s_journal) {
 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -4813,10 +4815,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
 	goto failed_mount;
 
-#ifdef CONFIG_QUOTA
 failed_mount8:
 	ext4_unregister_sysfs(sb);
-#endif
 failed_mount7:
 	ext4_unregister_li_request(sb);
 failed_mount6:
@@ -4956,7 +4956,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 	struct inode *journal_inode;
 	journal_t *journal;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+		return NULL;
 
 	journal_inode = ext4_get_journal_inode(sb, journal_inum);
 	if (!journal_inode)
@@ -4986,7 +4987,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	struct ext4_super_block *es;
 	struct block_device *bdev;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+		return NULL;
 
 	bdev = ext4_blkdev_get(j_dev, sb);
 	if (bdev == NULL)
@@ -5078,7 +5080,8 @@ static int ext4_load_journal(struct super_block *sb,
 	int err = 0;
 	int really_read_only;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+		return -EFSCORRUPTED;
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -5148,7 +5151,12 @@ static int ext4_load_journal(struct super_block *sb,
 	}
 
 	EXT4_SB(sb)->s_journal = journal;
-	ext4_clear_journal_err(sb, es);
+	err = ext4_clear_journal_err(sb, es);
+	if (err) {
+		EXT4_SB(sb)->s_journal = NULL;
+		jbd2_journal_destroy(journal);
+		return err;
+	}
 
 	if (!really_read_only && journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -5244,26 +5252,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
  * remounting) the filesystem readonly, then we will end up with a
  * consistent fs on disk.  Record that fact.
  */
-static void ext4_mark_recovery_complete(struct super_block *sb,
-					struct ext4_super_block *es)
+static int ext4_mark_recovery_complete(struct super_block *sb,
+				       struct ext4_super_block *es)
 {
+	int err;
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 
 	if (!ext4_has_feature_journal(sb)) {
-		BUG_ON(journal != NULL);
-		return;
+		if (journal != NULL) {
+			ext4_error(sb, "Journal got removed while the fs was "
+				   "mounted!");
+			return -EFSCORRUPTED;
+		}
+		return 0;
 	}
 	jbd2_journal_lock_updates(journal);
-	if (jbd2_journal_flush(journal) < 0)
+	err = jbd2_journal_flush(journal);
+	if (err < 0)
 		goto out;
 
 	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
 		ext4_clear_feature_journal_needs_recovery(sb);
 		ext4_commit_super(sb, 1);
 	}
-
 out:
 	jbd2_journal_unlock_updates(journal);
+	return err;
 }
 
 /*
@@ -5271,14 +5285,17 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
  * has recorded an error from a previous lifetime, move that error to the
  * main filesystem now.
  */
-static void ext4_clear_journal_err(struct super_block *sb,
+static int ext4_clear_journal_err(struct super_block *sb,
 				   struct ext4_super_block *es)
 {
 	journal_t *journal;
 	int j_errno;
 	const char *errstr;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (!ext4_has_feature_journal(sb)) {
+		ext4_error(sb, "Journal got removed while the fs was mounted!");
+		return -EFSCORRUPTED;
+	}
 
 	journal = EXT4_SB(sb)->s_journal;
 
@@ -5303,6 +5320,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
 		jbd2_journal_clear_err(journal);
 		jbd2_journal_update_sb_errno(journal);
 	}
+	return 0;
 }
 
 /*
@@ -5573,8 +5591,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
-			if (sbi->s_journal)
+			if (sbi->s_journal) {
+				/*
+				 * We let remount-ro finish even if marking fs
+				 * as clean failed...
+				 */
 				ext4_mark_recovery_complete(sb, es);
+			}
 			if (sbi->s_mmp_tsk)
 				kthread_stop(sbi->s_mmp_tsk);
 		} else {
@@ -5622,8 +5645,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			 * been changed by e2fsck since we originally mounted
 			 * the partition.)
 			 */
-			if (sbi->s_journal)
-				ext4_clear_journal_err(sb, es);
+			if (sbi->s_journal) {
+				err = ext4_clear_journal_err(sb, es);
+				if (err)
+					goto restore_opts;
+			}
 			sbi->s_mount_state = le16_to_cpu(es->s_state);
 
 			err = ext4_setup_super(sb, es, 0);
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] ext4: don't BUG on inconsistent journal feature
  2020-07-10 14:07 [PATCH v3] ext4: don't BUG on inconsistent journal feature Jan Kara
@ 2020-08-05 14:16 ` Jan Kara
  2020-08-06  5:11 ` tytso
  1 sibling, 0 replies; 3+ messages in thread
From: Jan Kara @ 2020-08-05 14:16 UTC (permalink / raw)
  To: Ted Tso; +Cc: linux-ext4, Lukas Czerner, Jan Kara

On Fri 10-07-20 16:07:59, Jan Kara wrote:
> A customer has reported a BUG_ON in ext4_clear_journal_err() hitting
> during an LTP testing. Either this has been caused by a test setup
> issue where the filesystem was being overwritten while LTP was mounting
> it or the journal replay has overwritten the superblock with invalid
> data. In either case it is preferable we don't take the machine down
> with a BUG_ON. So handle the situation of unexpectedly missing
> has_journal feature more gracefully. We issue warning and fail the mount
> in the cases where the race window is narrow and the failed check is
> most likely a programming error. In cases where fs corruption is more
> likely, we do full ext4_error() handling before failing mount / remount.
> 
> Reviewed-by: Lukas Czerner <lczerner@redhat.com>
> Signed-off-by: Jan Kara <jack@suse.cz>

Ted, can you please pick up this patch? Thanks!

								Honza

> ---
>  fs/ext4/super.c | 68 +++++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 47 insertions(+), 21 deletions(-)
> 
> Changes since v2:
> - added Reviewed-by tag
> - fixup compilation failure without CONFIG_QUOTA
> 
> Changes since v1:
> - handle failures more likely coming from fs corruption as with ext4_error
> 
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 330957ed1f05..9fdad843b30e 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
>  			     unsigned long journal_devnum);
>  static int ext4_show_options(struct seq_file *seq, struct dentry *root);
>  static int ext4_commit_super(struct super_block *sb, int sync);
> -static void ext4_mark_recovery_complete(struct super_block *sb,
> +static int ext4_mark_recovery_complete(struct super_block *sb,
>  					struct ext4_super_block *es);
> -static void ext4_clear_journal_err(struct super_block *sb,
> -				   struct ext4_super_block *es);
> +static int ext4_clear_journal_err(struct super_block *sb,
> +				  struct ext4_super_block *es);
>  static int ext4_sync_fs(struct super_block *sb, int wait);
>  static int ext4_remount(struct super_block *sb, int *flags, char *data);
>  static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
> @@ -4770,7 +4770,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
>  	if (needs_recovery) {
>  		ext4_msg(sb, KERN_INFO, "recovery complete");
> -		ext4_mark_recovery_complete(sb, es);
> +		err = ext4_mark_recovery_complete(sb, es);
> +		if (err)
> +			goto failed_mount8;
>  	}
>  	if (EXT4_SB(sb)->s_journal) {
>  		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
> @@ -4813,10 +4815,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
>  	goto failed_mount;
>  
> -#ifdef CONFIG_QUOTA
>  failed_mount8:
>  	ext4_unregister_sysfs(sb);
> -#endif
>  failed_mount7:
>  	ext4_unregister_li_request(sb);
>  failed_mount6:
> @@ -4956,7 +4956,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
>  	struct inode *journal_inode;
>  	journal_t *journal;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> +		return NULL;
>  
>  	journal_inode = ext4_get_journal_inode(sb, journal_inum);
>  	if (!journal_inode)
> @@ -4986,7 +4987,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
>  	struct ext4_super_block *es;
>  	struct block_device *bdev;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> +		return NULL;
>  
>  	bdev = ext4_blkdev_get(j_dev, sb);
>  	if (bdev == NULL)
> @@ -5078,7 +5080,8 @@ static int ext4_load_journal(struct super_block *sb,
>  	int err = 0;
>  	int really_read_only;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> +		return -EFSCORRUPTED;
>  
>  	if (journal_devnum &&
>  	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
> @@ -5148,7 +5151,12 @@ static int ext4_load_journal(struct super_block *sb,
>  	}
>  
>  	EXT4_SB(sb)->s_journal = journal;
> -	ext4_clear_journal_err(sb, es);
> +	err = ext4_clear_journal_err(sb, es);
> +	if (err) {
> +		EXT4_SB(sb)->s_journal = NULL;
> +		jbd2_journal_destroy(journal);
> +		return err;
> +	}
>  
>  	if (!really_read_only && journal_devnum &&
>  	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
> @@ -5244,26 +5252,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
>   * remounting) the filesystem readonly, then we will end up with a
>   * consistent fs on disk.  Record that fact.
>   */
> -static void ext4_mark_recovery_complete(struct super_block *sb,
> -					struct ext4_super_block *es)
> +static int ext4_mark_recovery_complete(struct super_block *sb,
> +				       struct ext4_super_block *es)
>  {
> +	int err;
>  	journal_t *journal = EXT4_SB(sb)->s_journal;
>  
>  	if (!ext4_has_feature_journal(sb)) {
> -		BUG_ON(journal != NULL);
> -		return;
> +		if (journal != NULL) {
> +			ext4_error(sb, "Journal got removed while the fs was "
> +				   "mounted!");
> +			return -EFSCORRUPTED;
> +		}
> +		return 0;
>  	}
>  	jbd2_journal_lock_updates(journal);
> -	if (jbd2_journal_flush(journal) < 0)
> +	err = jbd2_journal_flush(journal);
> +	if (err < 0)
>  		goto out;
>  
>  	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
>  		ext4_clear_feature_journal_needs_recovery(sb);
>  		ext4_commit_super(sb, 1);
>  	}
> -
>  out:
>  	jbd2_journal_unlock_updates(journal);
> +	return err;
>  }
>  
>  /*
> @@ -5271,14 +5285,17 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
>   * has recorded an error from a previous lifetime, move that error to the
>   * main filesystem now.
>   */
> -static void ext4_clear_journal_err(struct super_block *sb,
> +static int ext4_clear_journal_err(struct super_block *sb,
>  				   struct ext4_super_block *es)
>  {
>  	journal_t *journal;
>  	int j_errno;
>  	const char *errstr;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (!ext4_has_feature_journal(sb)) {
> +		ext4_error(sb, "Journal got removed while the fs was mounted!");
> +		return -EFSCORRUPTED;
> +	}
>  
>  	journal = EXT4_SB(sb)->s_journal;
>  
> @@ -5303,6 +5320,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
>  		jbd2_journal_clear_err(journal);
>  		jbd2_journal_update_sb_errno(journal);
>  	}
> +	return 0;
>  }
>  
>  /*
> @@ -5573,8 +5591,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
>  			    (sbi->s_mount_state & EXT4_VALID_FS))
>  				es->s_state = cpu_to_le16(sbi->s_mount_state);
>  
> -			if (sbi->s_journal)
> +			if (sbi->s_journal) {
> +				/*
> +				 * We let remount-ro finish even if marking fs
> +				 * as clean failed...
> +				 */
>  				ext4_mark_recovery_complete(sb, es);
> +			}
>  			if (sbi->s_mmp_tsk)
>  				kthread_stop(sbi->s_mmp_tsk);
>  		} else {
> @@ -5622,8 +5645,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
>  			 * been changed by e2fsck since we originally mounted
>  			 * the partition.)
>  			 */
> -			if (sbi->s_journal)
> -				ext4_clear_journal_err(sb, es);
> +			if (sbi->s_journal) {
> +				err = ext4_clear_journal_err(sb, es);
> +				if (err)
> +					goto restore_opts;
> +			}
>  			sbi->s_mount_state = le16_to_cpu(es->s_state);
>  
>  			err = ext4_setup_super(sb, es, 0);
> -- 
> 2.16.4
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] ext4: don't BUG on inconsistent journal feature
  2020-07-10 14:07 [PATCH v3] ext4: don't BUG on inconsistent journal feature Jan Kara
  2020-08-05 14:16 ` Jan Kara
@ 2020-08-06  5:11 ` tytso
  1 sibling, 0 replies; 3+ messages in thread
From: tytso @ 2020-08-06  5:11 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-ext4, Lukas Czerner

On Fri, Jul 10, 2020 at 04:07:59PM +0200, Jan Kara wrote:
> A customer has reported a BUG_ON in ext4_clear_journal_err() hitting
> during an LTP testing. Either this has been caused by a test setup
> issue where the filesystem was being overwritten while LTP was mounting
> it or the journal replay has overwritten the superblock with invalid
> data. In either case it is preferable we don't take the machine down
> with a BUG_ON. So handle the situation of unexpectedly missing
> has_journal feature more gracefully. We issue warning and fail the mount
> in the cases where the race window is narrow and the failed check is
> most likely a programming error. In cases where fs corruption is more
> likely, we do full ext4_error() handling before failing mount / remount.
> 
> Reviewed-by: Lukas Czerner <lczerner@redhat.com>
> Signed-off-by: Jan Kara <jack@suse.cz>

Applied, thanks.

						- Ted

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-08-06 11:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-10 14:07 [PATCH v3] ext4: don't BUG on inconsistent journal feature Jan Kara
2020-08-05 14:16 ` Jan Kara
2020-08-06  5:11 ` tytso

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.