All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] ext4: don't BUG on inconsistent journal feature
@ 2020-07-09 15:41 Jan Kara
  2020-07-09 18:28   ` kernel test robot
  2020-07-10  7:35 ` Lukas Czerner
  0 siblings, 2 replies; 5+ messages in thread
From: Jan Kara @ 2020-07-09 15:41 UTC (permalink / raw)
  To: Ted Tso; +Cc: linux-ext4, Lukas Czerner, Jan Kara

A customer has reported a BUG_ON in ext4_clear_journal_err() hitting
during an LTP testing. Either this has been caused by a test setup
issue where the filesystem was being overwritten while LTP was mounting
it or the journal replay has overwritten the superblock with invalid
data. In either case it is preferable we don't take the machine down
with a BUG_ON. So handle the situation of unexpectedly missing
has_journal feature more gracefully. We issue warning and fail the mount
in the cases where the race window is narrow and the failed check is
most likely a programming error. In cases where fs corruption is more
likely, we do full ext4_error() handling before failing mount / remount.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/super.c | 66 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 19 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 330957ed1f05..2c8f74f741f4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
 static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
+static int ext4_mark_recovery_complete(struct super_block *sb,
 					struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
-				   struct ext4_super_block *es);
+static int ext4_clear_journal_err(struct super_block *sb,
+				  struct ext4_super_block *es);
 static int ext4_sync_fs(struct super_block *sb, int wait);
 static int ext4_remount(struct super_block *sb, int *flags, char *data);
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -4770,7 +4770,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery) {
 		ext4_msg(sb, KERN_INFO, "recovery complete");
-		ext4_mark_recovery_complete(sb, es);
+		err = ext4_mark_recovery_complete(sb, es);
+		if (err)
+			goto failed_mount8;
 	}
 	if (EXT4_SB(sb)->s_journal) {
 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -4956,7 +4958,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 	struct inode *journal_inode;
 	journal_t *journal;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+		return NULL;
 
 	journal_inode = ext4_get_journal_inode(sb, journal_inum);
 	if (!journal_inode)
@@ -4986,7 +4989,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	struct ext4_super_block *es;
 	struct block_device *bdev;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+		return NULL;
 
 	bdev = ext4_blkdev_get(j_dev, sb);
 	if (bdev == NULL)
@@ -5078,7 +5082,8 @@ static int ext4_load_journal(struct super_block *sb,
 	int err = 0;
 	int really_read_only;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+		return -EFSCORRUPTED;
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -5148,7 +5153,12 @@ static int ext4_load_journal(struct super_block *sb,
 	}
 
 	EXT4_SB(sb)->s_journal = journal;
-	ext4_clear_journal_err(sb, es);
+	err = ext4_clear_journal_err(sb, es);
+	if (err) {
+		EXT4_SB(sb)->s_journal = NULL;
+		jbd2_journal_destroy(journal);
+		return err;
+	}
 
 	if (!really_read_only && journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -5244,26 +5254,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
  * remounting) the filesystem readonly, then we will end up with a
  * consistent fs on disk.  Record that fact.
  */
-static void ext4_mark_recovery_complete(struct super_block *sb,
-					struct ext4_super_block *es)
+static int ext4_mark_recovery_complete(struct super_block *sb,
+				       struct ext4_super_block *es)
 {
+	int err;
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 
 	if (!ext4_has_feature_journal(sb)) {
-		BUG_ON(journal != NULL);
-		return;
+		if (journal != NULL) {
+			ext4_error(sb, "Journal got removed while the fs was "
+				   "mounted!");
+			return -EFSCORRUPTED;
+		}
+		return 0;
 	}
 	jbd2_journal_lock_updates(journal);
-	if (jbd2_journal_flush(journal) < 0)
+	err = jbd2_journal_flush(journal);
+	if (err < 0)
 		goto out;
 
 	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
 		ext4_clear_feature_journal_needs_recovery(sb);
 		ext4_commit_super(sb, 1);
 	}
-
 out:
 	jbd2_journal_unlock_updates(journal);
+	return err;
 }
 
 /*
@@ -5271,14 +5287,17 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
  * has recorded an error from a previous lifetime, move that error to the
  * main filesystem now.
  */
-static void ext4_clear_journal_err(struct super_block *sb,
+static int ext4_clear_journal_err(struct super_block *sb,
 				   struct ext4_super_block *es)
 {
 	journal_t *journal;
 	int j_errno;
 	const char *errstr;
 
-	BUG_ON(!ext4_has_feature_journal(sb));
+	if (!ext4_has_feature_journal(sb)) {
+		ext4_error(sb, "Journal got removed while the fs was mounted!");
+		return -EFSCORRUPTED;
+	}
 
 	journal = EXT4_SB(sb)->s_journal;
 
@@ -5303,6 +5322,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
 		jbd2_journal_clear_err(journal);
 		jbd2_journal_update_sb_errno(journal);
 	}
+	return 0;
 }
 
 /*
@@ -5573,8 +5593,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
-			if (sbi->s_journal)
+			if (sbi->s_journal) {
+				/*
+				 * We let remount-ro finish even if marking fs
+				 * as clean failed...
+				 */
 				ext4_mark_recovery_complete(sb, es);
+			}
 			if (sbi->s_mmp_tsk)
 				kthread_stop(sbi->s_mmp_tsk);
 		} else {
@@ -5622,8 +5647,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			 * been changed by e2fsck since we originally mounted
 			 * the partition.)
 			 */
-			if (sbi->s_journal)
-				ext4_clear_journal_err(sb, es);
+			if (sbi->s_journal) {
+				err = ext4_clear_journal_err(sb, es);
+				if (err)
+					goto restore_opts;
+			}
 			sbi->s_mount_state = le16_to_cpu(es->s_state);
 
 			err = ext4_setup_super(sb, es, 0);
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] ext4: don't BUG on inconsistent journal feature
  2020-07-09 15:41 [PATCH v2] ext4: don't BUG on inconsistent journal feature Jan Kara
@ 2020-07-09 18:28   ` kernel test robot
  2020-07-10  7:35 ` Lukas Czerner
  1 sibling, 0 replies; 5+ messages in thread
From: kernel test robot @ 2020-07-09 18:28 UTC (permalink / raw)
  To: Jan Kara, Ted Tso; +Cc: kbuild-all, linux-ext4, Lukas Czerner, Jan Kara

[-- Attachment #1: Type: text/plain, Size: 3593 bytes --]

Hi Jan,

I love your patch! Yet something to improve:

[auto build test ERROR on ext4/dev]
[also build test ERROR on ext3/for_next v5.8-rc4 next-20200709]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use  as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Jan-Kara/ext4-don-t-BUG-on-inconsistent-journal-feature/20200709-234552
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: arm-at91_dt_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   fs/ext4/super.c: In function 'ext4_fill_super':
>> fs/ext4/super.c:4736:4: error: label 'failed_mount8' used but not defined
    4736 |    goto failed_mount8;
         |    ^~~~
   fs/ext4/super.c: In function 'ext4_remount':
   fs/ext4/super.c:5430:6: warning: variable 'enable_quota' set but not used [-Wunused-but-set-variable]
    5430 |  int enable_quota = 0;
         |      ^~~~~~~~~~~~

vim +/failed_mount8 +4736 fs/ext4/super.c

  4728	
  4729		EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
  4730		ext4_orphan_cleanup(sb, es);
  4731		EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
  4732		if (needs_recovery) {
  4733			ext4_msg(sb, KERN_INFO, "recovery complete");
  4734			err = ext4_mark_recovery_complete(sb, es);
  4735			if (err)
> 4736				goto failed_mount8;
  4737		}
  4738		if (EXT4_SB(sb)->s_journal) {
  4739			if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
  4740				descr = " journalled data mode";
  4741			else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
  4742				descr = " ordered data mode";
  4743			else
  4744				descr = " writeback data mode";
  4745		} else
  4746			descr = "out journal";
  4747	
  4748		if (test_opt(sb, DISCARD)) {
  4749			struct request_queue *q = bdev_get_queue(sb->s_bdev);
  4750			if (!blk_queue_discard(q))
  4751				ext4_msg(sb, KERN_WARNING,
  4752					 "mounting with \"discard\" option, but "
  4753					 "the device does not support discard");
  4754		}
  4755	
  4756		if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
  4757			ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
  4758				 "Opts: %.*s%s%s", descr,
  4759				 (int) sizeof(sbi->s_es->s_mount_opts),
  4760				 sbi->s_es->s_mount_opts,
  4761				 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
  4762	
  4763		if (es->s_error_count)
  4764			mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
  4765	
  4766		/* Enable message ratelimiting. Default is 10 messages per 5 secs. */
  4767		ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
  4768		ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
  4769		ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
  4770	
  4771		kfree(orig_data);
  4772		return 0;
  4773	
  4774	cantfind_ext4:
  4775		if (!silent)
  4776			ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
  4777		goto failed_mount;
  4778	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28287 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] ext4: don't BUG on inconsistent journal feature
@ 2020-07-09 18:28   ` kernel test robot
  0 siblings, 0 replies; 5+ messages in thread
From: kernel test robot @ 2020-07-09 18:28 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3686 bytes --]

Hi Jan,

I love your patch! Yet something to improve:

[auto build test ERROR on ext4/dev]
[also build test ERROR on ext3/for_next v5.8-rc4 next-20200709]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use  as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Jan-Kara/ext4-don-t-BUG-on-inconsistent-journal-feature/20200709-234552
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: arm-at91_dt_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   fs/ext4/super.c: In function 'ext4_fill_super':
>> fs/ext4/super.c:4736:4: error: label 'failed_mount8' used but not defined
    4736 |    goto failed_mount8;
         |    ^~~~
   fs/ext4/super.c: In function 'ext4_remount':
   fs/ext4/super.c:5430:6: warning: variable 'enable_quota' set but not used [-Wunused-but-set-variable]
    5430 |  int enable_quota = 0;
         |      ^~~~~~~~~~~~

vim +/failed_mount8 +4736 fs/ext4/super.c

  4728	
  4729		EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
  4730		ext4_orphan_cleanup(sb, es);
  4731		EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
  4732		if (needs_recovery) {
  4733			ext4_msg(sb, KERN_INFO, "recovery complete");
  4734			err = ext4_mark_recovery_complete(sb, es);
  4735			if (err)
> 4736				goto failed_mount8;
  4737		}
  4738		if (EXT4_SB(sb)->s_journal) {
  4739			if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
  4740				descr = " journalled data mode";
  4741			else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
  4742				descr = " ordered data mode";
  4743			else
  4744				descr = " writeback data mode";
  4745		} else
  4746			descr = "out journal";
  4747	
  4748		if (test_opt(sb, DISCARD)) {
  4749			struct request_queue *q = bdev_get_queue(sb->s_bdev);
  4750			if (!blk_queue_discard(q))
  4751				ext4_msg(sb, KERN_WARNING,
  4752					 "mounting with \"discard\" option, but "
  4753					 "the device does not support discard");
  4754		}
  4755	
  4756		if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
  4757			ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
  4758				 "Opts: %.*s%s%s", descr,
  4759				 (int) sizeof(sbi->s_es->s_mount_opts),
  4760				 sbi->s_es->s_mount_opts,
  4761				 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
  4762	
  4763		if (es->s_error_count)
  4764			mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
  4765	
  4766		/* Enable message ratelimiting. Default is 10 messages per 5 secs. */
  4767		ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
  4768		ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
  4769		ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
  4770	
  4771		kfree(orig_data);
  4772		return 0;
  4773	
  4774	cantfind_ext4:
  4775		if (!silent)
  4776			ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
  4777		goto failed_mount;
  4778	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 28287 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] ext4: don't BUG on inconsistent journal feature
  2020-07-09 15:41 [PATCH v2] ext4: don't BUG on inconsistent journal feature Jan Kara
  2020-07-09 18:28   ` kernel test robot
@ 2020-07-10  7:35 ` Lukas Czerner
  2020-07-10 14:08   ` Jan Kara
  1 sibling, 1 reply; 5+ messages in thread
From: Lukas Czerner @ 2020-07-10  7:35 UTC (permalink / raw)
  To: Jan Kara; +Cc: Ted Tso, linux-ext4

On Thu, Jul 09, 2020 at 05:41:04PM +0200, Jan Kara wrote:
> A customer has reported a BUG_ON in ext4_clear_journal_err() hitting
> during an LTP testing. Either this has been caused by a test setup
> issue where the filesystem was being overwritten while LTP was mounting
> it or the journal replay has overwritten the superblock with invalid
> data. In either case it is preferable we don't take the machine down
> with a BUG_ON. So handle the situation of unexpectedly missing
> has_journal feature more gracefully. We issue warning and fail the mount
> in the cases where the race window is narrow and the failed check is
> most likely a programming error. In cases where fs corruption is more
> likely, we do full ext4_error() handling before failing mount / remount.
> 
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/ext4/super.c | 66 ++++++++++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 47 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 330957ed1f05..2c8f74f741f4 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
>  			     unsigned long journal_devnum);
>  static int ext4_show_options(struct seq_file *seq, struct dentry *root);
>  static int ext4_commit_super(struct super_block *sb, int sync);
> -static void ext4_mark_recovery_complete(struct super_block *sb,
> +static int ext4_mark_recovery_complete(struct super_block *sb,
>  					struct ext4_super_block *es);
> -static void ext4_clear_journal_err(struct super_block *sb,
> -				   struct ext4_super_block *es);
> +static int ext4_clear_journal_err(struct super_block *sb,
> +				  struct ext4_super_block *es);
>  static int ext4_sync_fs(struct super_block *sb, int wait);
>  static int ext4_remount(struct super_block *sb, int *flags, char *data);
>  static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
> @@ -4770,7 +4770,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
>  	if (needs_recovery) {
>  		ext4_msg(sb, KERN_INFO, "recovery complete");
> -		ext4_mark_recovery_complete(sb, es);
> +		err = ext4_mark_recovery_complete(sb, es);
> +		if (err)
> +			goto failed_mount8;

failed_mount8 is in #ifdef CONFIG_QUOTA so it probably needs to be moved
out of the ifdef block.

Other than that it looks good to me, so with that change you can add

Reviewed-by: Lukas Czerner <lczerner@redhat.com>

Thanks!
-Lukas

>  	}
>  	if (EXT4_SB(sb)->s_journal) {
>  		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
> @@ -4956,7 +4958,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
>  	struct inode *journal_inode;
>  	journal_t *journal;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> +		return NULL;
>  
>  	journal_inode = ext4_get_journal_inode(sb, journal_inum);
>  	if (!journal_inode)
> @@ -4986,7 +4989,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
>  	struct ext4_super_block *es;
>  	struct block_device *bdev;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> +		return NULL;
>  
>  	bdev = ext4_blkdev_get(j_dev, sb);
>  	if (bdev == NULL)
> @@ -5078,7 +5082,8 @@ static int ext4_load_journal(struct super_block *sb,
>  	int err = 0;
>  	int really_read_only;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> +		return -EFSCORRUPTED;
>  
>  	if (journal_devnum &&
>  	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
> @@ -5148,7 +5153,12 @@ static int ext4_load_journal(struct super_block *sb,
>  	}
>  
>  	EXT4_SB(sb)->s_journal = journal;
> -	ext4_clear_journal_err(sb, es);
> +	err = ext4_clear_journal_err(sb, es);
> +	if (err) {
> +		EXT4_SB(sb)->s_journal = NULL;
> +		jbd2_journal_destroy(journal);
> +		return err;
> +	}
>  
>  	if (!really_read_only && journal_devnum &&
>  	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
> @@ -5244,26 +5254,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
>   * remounting) the filesystem readonly, then we will end up with a
>   * consistent fs on disk.  Record that fact.
>   */
> -static void ext4_mark_recovery_complete(struct super_block *sb,
> -					struct ext4_super_block *es)
> +static int ext4_mark_recovery_complete(struct super_block *sb,
> +				       struct ext4_super_block *es)
>  {
> +	int err;
>  	journal_t *journal = EXT4_SB(sb)->s_journal;
>  
>  	if (!ext4_has_feature_journal(sb)) {
> -		BUG_ON(journal != NULL);
> -		return;
> +		if (journal != NULL) {
> +			ext4_error(sb, "Journal got removed while the fs was "
> +				   "mounted!");
> +			return -EFSCORRUPTED;
> +		}
> +		return 0;
>  	}
>  	jbd2_journal_lock_updates(journal);
> -	if (jbd2_journal_flush(journal) < 0)
> +	err = jbd2_journal_flush(journal);
> +	if (err < 0)
>  		goto out;
>  
>  	if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
>  		ext4_clear_feature_journal_needs_recovery(sb);
>  		ext4_commit_super(sb, 1);
>  	}
> -
>  out:
>  	jbd2_journal_unlock_updates(journal);
> +	return err;
>  }
>  
>  /*
> @@ -5271,14 +5287,17 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
>   * has recorded an error from a previous lifetime, move that error to the
>   * main filesystem now.
>   */
> -static void ext4_clear_journal_err(struct super_block *sb,
> +static int ext4_clear_journal_err(struct super_block *sb,
>  				   struct ext4_super_block *es)
>  {
>  	journal_t *journal;
>  	int j_errno;
>  	const char *errstr;
>  
> -	BUG_ON(!ext4_has_feature_journal(sb));
> +	if (!ext4_has_feature_journal(sb)) {
> +		ext4_error(sb, "Journal got removed while the fs was mounted!");
> +		return -EFSCORRUPTED;
> +	}
>  
>  	journal = EXT4_SB(sb)->s_journal;
>  
> @@ -5303,6 +5322,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
>  		jbd2_journal_clear_err(journal);
>  		jbd2_journal_update_sb_errno(journal);
>  	}
> +	return 0;
>  }
>  
>  /*
> @@ -5573,8 +5593,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
>  			    (sbi->s_mount_state & EXT4_VALID_FS))
>  				es->s_state = cpu_to_le16(sbi->s_mount_state);
>  
> -			if (sbi->s_journal)
> +			if (sbi->s_journal) {
> +				/*
> +				 * We let remount-ro finish even if marking fs
> +				 * as clean failed...
> +				 */
>  				ext4_mark_recovery_complete(sb, es);
> +			}
>  			if (sbi->s_mmp_tsk)
>  				kthread_stop(sbi->s_mmp_tsk);
>  		} else {
> @@ -5622,8 +5647,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
>  			 * been changed by e2fsck since we originally mounted
>  			 * the partition.)
>  			 */
> -			if (sbi->s_journal)
> -				ext4_clear_journal_err(sb, es);
> +			if (sbi->s_journal) {
> +				err = ext4_clear_journal_err(sb, es);
> +				if (err)
> +					goto restore_opts;
> +			}
>  			sbi->s_mount_state = le16_to_cpu(es->s_state);
>  
>  			err = ext4_setup_super(sb, es, 0);
> -- 
> 2.16.4
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] ext4: don't BUG on inconsistent journal feature
  2020-07-10  7:35 ` Lukas Czerner
@ 2020-07-10 14:08   ` Jan Kara
  0 siblings, 0 replies; 5+ messages in thread
From: Jan Kara @ 2020-07-10 14:08 UTC (permalink / raw)
  To: Lukas Czerner; +Cc: Jan Kara, Ted Tso, linux-ext4

On Fri 10-07-20 09:35:36, Lukas Czerner wrote:
> On Thu, Jul 09, 2020 at 05:41:04PM +0200, Jan Kara wrote:
> > A customer has reported a BUG_ON in ext4_clear_journal_err() hitting
> > during an LTP testing. Either this has been caused by a test setup
> > issue where the filesystem was being overwritten while LTP was mounting
> > it or the journal replay has overwritten the superblock with invalid
> > data. In either case it is preferable we don't take the machine down
> > with a BUG_ON. So handle the situation of unexpectedly missing
> > has_journal feature more gracefully. We issue warning and fail the mount
> > in the cases where the race window is narrow and the failed check is
> > most likely a programming error. In cases where fs corruption is more
> > likely, we do full ext4_error() handling before failing mount / remount.
> > 
> > Signed-off-by: Jan Kara <jack@suse.cz>
> > ---
> >  fs/ext4/super.c | 66 ++++++++++++++++++++++++++++++++++++++++-----------------
> >  1 file changed, 47 insertions(+), 19 deletions(-)
> > 
> > diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> > index 330957ed1f05..2c8f74f741f4 100644
> > --- a/fs/ext4/super.c
> > +++ b/fs/ext4/super.c
> > @@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
> >  			     unsigned long journal_devnum);
> >  static int ext4_show_options(struct seq_file *seq, struct dentry *root);
> >  static int ext4_commit_super(struct super_block *sb, int sync);
> > -static void ext4_mark_recovery_complete(struct super_block *sb,
> > +static int ext4_mark_recovery_complete(struct super_block *sb,
> >  					struct ext4_super_block *es);
> > -static void ext4_clear_journal_err(struct super_block *sb,
> > -				   struct ext4_super_block *es);
> > +static int ext4_clear_journal_err(struct super_block *sb,
> > +				  struct ext4_super_block *es);
> >  static int ext4_sync_fs(struct super_block *sb, int wait);
> >  static int ext4_remount(struct super_block *sb, int *flags, char *data);
> >  static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
> > @@ -4770,7 +4770,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> >  	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
> >  	if (needs_recovery) {
> >  		ext4_msg(sb, KERN_INFO, "recovery complete");
> > -		ext4_mark_recovery_complete(sb, es);
> > +		err = ext4_mark_recovery_complete(sb, es);
> > +		if (err)
> > +			goto failed_mount8;
> 
> failed_mount8 is in #ifdef CONFIG_QUOTA so it probably needs to be moved
> out of the ifdef block.
> 
> Other than that it looks good to me, so with that change you can add
> 
> Reviewed-by: Lukas Czerner <lczerner@redhat.com>

Thanks for review! I've sent out v3 with you tag and the compilation
failure fixed up.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-07-10 14:08 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-09 15:41 [PATCH v2] ext4: don't BUG on inconsistent journal feature Jan Kara
2020-07-09 18:28 ` kernel test robot
2020-07-09 18:28   ` kernel test robot
2020-07-10  7:35 ` Lukas Czerner
2020-07-10 14:08   ` Jan Kara

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.