All of lore.kernel.org
 help / color / mirror / Atom feed
* Sleeping function called in invalid context
@ 2016-08-03  7:22 Nikolay Borisov
  2016-08-04 16:05 ` Jan Kara
  0 siblings, 1 reply; 7+ messages in thread
From: Nikolay Borisov @ 2016-08-03  7:22 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: Jan Kara, linux-ext4

While doing some testing on today's checkout of Linus' master branch I got the following: 

[    9.302725] BUG: sleeping function called from invalid context at ./include/linux/buffer_head.h:358
[    9.304403] in_atomic(): 1, irqs_disabled(): 0, pid: 1718, name: mount
[    9.305633] 8 locks held by mount/1718:
[    9.306382]  #0:  (sb_writers#6){.+.+.+}, at: [<ffffffff811f0473>] __sb_start_write+0xd3/0xf0
[    9.308178]  #1:  (&type->i_mutex_dir_key#2/1){+.+.+.}, at: [<ffffffff811f9574>] lock_rename+0xe4/0x120
[    9.310096]  #2:  (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [<ffffffff8120c157>] lock_two_nondirectories+0x47/0x90
[    9.312195]  #3:  (&sb->s_type->i_mutex_key#9/4){+.+.+.}, at: [<ffffffff8120c18f>] lock_two_nondirectories+0x7f/0x90
[    9.314315]  #4:  (&sbi->s_journal_flag_rwsem){.+.+.+}, at: [<ffffffff812988c0>] ext4_writepages+0x50/0xff0
[    9.316278]  #5:  (jbd2_handle){++++..}, at: [<ffffffff812e8d43>] start_this_handle+0x143/0x4c0
[    9.318032]  #6:  (&ei->i_data_sem){++++..}, at: [<ffffffff81294803>] ext4_map_blocks+0x123/0x540
[    9.319793]  #7:  (&(&bgl->locks[i].lock)->rlock){+.+...}, at: [<ffffffff812d0df3>] ext4_mb_init_cache+0x3e3/0xa70
[    9.321887] CPU: 0 PID: 1718 Comm: mount Not tainted 4.7.0-clouder1 #5
[    9.322873] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014
[    9.322873]  0000000000000000 ffff88003a4d3058 ffffffff81375c97 ffffffff819c29f9
[    9.322873]  0000000000000166 ffff880038a41500 ffffffff819c29f9 ffff88003a4d3088
[    9.322873]  ffffffff8108dc43 ffff88003a4d3088 0000000000000000 0000000000000166
[    9.322873] Call Trace:
[    9.322873]  [<ffffffff81375c97>] dump_stack+0x6b/0xa4
[    9.322873]  [<ffffffff8108dc43>] ___might_sleep+0x183/0x240
[    9.322873]  [<ffffffff8108dd52>] __might_sleep+0x52/0x90
[    9.322873]  [<ffffffff812ae9c1>] ext4_commit_super+0x191/0x2a0
[    9.322873]  [<ffffffff812af790>] __ext4_grp_locked_error+0x170/0x240
[    9.322873]  [<ffffffff8108dd52>] ? __might_sleep+0x52/0x90
[    9.322873]  [<ffffffff812cf00c>] ext4_mb_generate_buddy+0x20c/0x360
[    9.322873]  [<ffffffff812d0e61>] ext4_mb_init_cache+0x451/0xa70
[    9.322873]  [<ffffffff812d167c>] ext4_mb_init_group+0x1fc/0x2b0
[    9.322873]  [<ffffffff812d73dd>] ? ext4_mb_new_blocks+0x24d/0x1160
[    9.322873]  [<ffffffff812d18af>] ext4_mb_good_group+0x17f/0x190
[    9.322873]  [<ffffffff812d6fa7>] ext4_mb_regular_allocator+0x2c7/0x4b0
[    9.322873]  [<ffffffff812d05ae>] ? ext4_mb_use_preallocated+0x3e/0x4a0
[    9.322873]  [<ffffffff811c9e0c>] ? kmem_cache_alloc+0x24c/0x2d0
[    9.322873]  [<ffffffff812d73dd>] ? ext4_mb_new_blocks+0x24d/0x1160
[    9.322873]  [<ffffffff812cdea4>] ? ext4_mb_initialize_context+0x84/0x1a0
[    9.322873]  [<ffffffff812d7a84>] ext4_mb_new_blocks+0x8f4/0x1160
[    9.322873]  [<ffffffff810b1bf6>] ? check_usage+0x86/0x4c0
[    9.322873]  [<ffffffff810b2f9b>] ? __lock_acquire+0x39b/0x1800
[    9.322873]  [<ffffffff810b379e>] ? __lock_acquire+0xb9e/0x1800
[    9.322873]  [<ffffffff812c28de>] ? ext4_find_extent+0x23e/0x2b0
[    9.322873]  [<ffffffff810c6fff>] ? rcu_read_lock_sched_held+0x4f/0x90
[    9.322873]  [<ffffffff811caf84>] ? __kmalloc+0x274/0x320
[    9.322873]  [<ffffffff812c28de>] ? ext4_find_extent+0x23e/0x2b0
[    9.322873]  [<ffffffff812c28de>] ? ext4_find_extent+0x23e/0x2b0
[    9.322873]  [<ffffffff812c7f5c>] ext4_ext_map_blocks+0xcfc/0x1140
[    9.322873]  [<ffffffff812de7d8>] ? ext4_es_lookup_extent+0x58/0x3a0
[    9.322873]  [<ffffffff81294803>] ? ext4_map_blocks+0x123/0x540
[    9.322873]  [<ffffffff81294803>] ? ext4_map_blocks+0x123/0x540
[    9.322873]  [<ffffffff81294827>] ext4_map_blocks+0x147/0x540
[    9.322873]  [<ffffffff81298ea8>] ? ext4_writepages+0x638/0xff0
[    9.322873]  [<ffffffff812991c4>] ext4_writepages+0x954/0xff0
[    9.322873]  [<ffffffff810b0908>] ? add_lock_to_list.clone.0+0x78/0xb0
[    9.322873]  [<ffffffff810b379e>] ? __lock_acquire+0xb9e/0x1800
[    9.322873]  [<ffffffff816fc1db>] ? _raw_spin_unlock+0x2b/0x40
[    9.322873]  [<ffffffff81223fb9>] ? wbc_attach_and_unlock_inode+0x179/0x290
[    9.322873]  [<ffffffff8116eb43>] do_writepages+0x23/0x40
[    9.322873]  [<ffffffff8115c6a5>] __filemap_fdatawrite_range+0xb5/0x100
[    9.322873]  [<ffffffff810b0908>] ? add_lock_to_list.clone.0+0x78/0xb0
[    9.322873]  [<ffffffff8115c8fc>] filemap_flush+0x1c/0x20
[    9.322873]  [<ffffffff81293bf6>] ext4_alloc_da_blocks+0x56/0x160
[    9.322873]  [<ffffffff812a7655>] ext4_rename+0x665/0x900
[    9.322873]  [<ffffffff8120c18f>] ? lock_two_nondirectories+0x7f/0x90
[    9.322873]  [<ffffffff8120c18f>] ? lock_two_nondirectories+0x7f/0x90
[    9.322873]  [<ffffffff812a7912>] ext4_rename2+0x22/0x40
[    9.322873]  [<ffffffff811fa5ec>] vfs_rename+0x27c/0x5e0
[    9.322873]  [<ffffffff811fea71>] SyS_renameat2+0x431/0x4c0
[    9.322873]  [<ffffffff811f88ce>] SyS_rename+0x1e/0x20
[    9.322873]  [<ffffffff816fc365>] entry_SYSCALL_64_fastpath+0x18/0xa8
[    9.322873]  [<ffffffff816fc31a>] ? entry_SYSCALL_64_after_swapgs+0x17/0x4a

Line 358 is lock_buffer

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Sleeping function called in invalid context
  2016-08-03  7:22 Sleeping function called in invalid context Nikolay Borisov
@ 2016-08-04 16:05 ` Jan Kara
  2016-08-04 20:58   ` Theodore Ts'o
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Kara @ 2016-08-04 16:05 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: Theodore Ts'o, Jan Kara, linux-ext4

On Wed 03-08-16 10:22:03, Nikolay Borisov wrote:
> While doing some testing on today's checkout of Linus' master branch I
> got the following: 

> 
> [    9.302725] BUG: sleeping function called from invalid context at ./include/linux/buffer_head.h:358
> [    9.304403] in_atomic(): 1, irqs_disabled(): 0, pid: 1718, name: mount
> [    9.305633] 8 locks held by mount/1718:

Yeah, this looks like a regression cause by commit 4743f83990614af "ext4:
Fix WARN_ON_ONCE in ext4_commit_super()". Arguably that cure is worse than
the disease but OTOH calling ext4_commit_super() from an atomic context
(like __ext4_grp_locked_error() does) sucks as well.

I'm not sure what the right fix is here. The cleanest would probably be to
always drop group lock in __ext4_grp_locked_error() and make sure we always
properly bail out of mballoc code on such error. But that's a non-trivial
amount of work. Not sure if other ext4 people have opinion on this?

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Sleeping function called in invalid context
  2016-08-04 16:05 ` Jan Kara
@ 2016-08-04 20:58   ` Theodore Ts'o
  2016-08-05  6:29     ` Nikolay Borisov
  0 siblings, 1 reply; 7+ messages in thread
From: Theodore Ts'o @ 2016-08-04 20:58 UTC (permalink / raw)
  To: Jan Kara; +Cc: Nikolay Borisov, Jan Kara, linux-ext4

On Thu, Aug 04, 2016 at 06:05:50PM +0200, Jan Kara wrote:
> On Wed 03-08-16 10:22:03, Nikolay Borisov wrote:
> > While doing some testing on today's checkout of Linus' master branch I
> > got the following: 
> 
> > 
> > [    9.302725] BUG: sleeping function called from invalid context at ./include/linux/buffer_head.h:358
> > [    9.304403] in_atomic(): 1, irqs_disabled(): 0, pid: 1718, name: mount
> > [    9.305633] 8 locks held by mount/1718:
> 
> Yeah, this looks like a regression cause by commit 4743f83990614af "ext4:
> Fix WARN_ON_ONCE in ext4_commit_super()". Arguably that cure is worse than
> the disease but OTOH calling ext4_commit_super() from an atomic context
> (like __ext4_grp_locked_error() does) sucks as well.
> 
> I'm not sure what the right fix is here. The cleanest would probably be to
> always drop group lock in __ext4_grp_locked_error() and make sure we always
> properly bail out of mballoc code on such error. But that's a non-trivial
> amount of work. Not sure if other ext4 people have opinion on this?

The easist way to fix this is defer the ext4_commit_super() to a
workqueue.  We only need this in the errors=continue case, and in that
scenario we're not in a hurry when the superblock gets written out.

In fact, we probably want to be doing this for all of the
errors=continue cases when we want to save the error state to the
superblock, so we can do the update properly using the journal,
instead of calling ext4_commit_super() which just force writes the
block.

(Of course, if the journal is aborted we'll need to fall back to using
ext4_commit_super, of course.)

						 - Ted

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Sleeping function called in invalid context
  2016-08-04 20:58   ` Theodore Ts'o
@ 2016-08-05  6:29     ` Nikolay Borisov
  2016-08-05 14:56       ` Theodore Ts'o
  0 siblings, 1 reply; 7+ messages in thread
From: Nikolay Borisov @ 2016-08-05  6:29 UTC (permalink / raw)
  To: Theodore Ts'o, Jan Kara; +Cc: Nikolay Borisov, Jan Kara, linux-ext4



On 08/04/2016 11:58 PM, Theodore Ts'o wrote:
> On Thu, Aug 04, 2016 at 06:05:50PM +0200, Jan Kara wrote:
>> On Wed 03-08-16 10:22:03, Nikolay Borisov wrote:
>>> While doing some testing on today's checkout of Linus' master branch I
>>> got the following: 
>>
>>>
>>> [    9.302725] BUG: sleeping function called from invalid context at ./include/linux/buffer_head.h:358
>>> [    9.304403] in_atomic(): 1, irqs_disabled(): 0, pid: 1718, name: mount
>>> [    9.305633] 8 locks held by mount/1718:
>>
>> Yeah, this looks like a regression cause by commit 4743f83990614af "ext4:
>> Fix WARN_ON_ONCE in ext4_commit_super()". Arguably that cure is worse than
>> the disease but OTOH calling ext4_commit_super() from an atomic context
>> (like __ext4_grp_locked_error() does) sucks as well.
>>
>> I'm not sure what the right fix is here. The cleanest would probably be to
>> always drop group lock in __ext4_grp_locked_error() and make sure we always
>> properly bail out of mballoc code on such error. But that's a non-trivial
>> amount of work. Not sure if other ext4 people have opinion on this?
> 
> The easist way to fix this is defer the ext4_commit_super() to a
> workqueue.  We only need this in the errors=continue case, and in that
> scenario we're not in a hurry when the superblock gets written out.

Is errors=continue the default option if nothing specifically is
specified at mount time, since I don't have this set explicitly:

/dev/vda / ext4 rw,relatime,data=ordered 0 0

> 
> In fact, we probably want to be doing this for all of the
> errors=continue cases when we want to save the error state to the
> superblock, so we can do the update properly using the journal,
> instead of calling ext4_commit_super() which just force writes the
> block.
> 
> (Of course, if the journal is aborted we'll need to fall back to using
> ext4_commit_super, of course.)
> 
> 						 - Ted
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Sleeping function called in invalid context
  2016-08-05  6:29     ` Nikolay Borisov
@ 2016-08-05 14:56       ` Theodore Ts'o
  2016-08-05 17:06         ` Darrick J. Wong
  2016-08-11 19:52         ` Andreas Dilger
  0 siblings, 2 replies; 7+ messages in thread
From: Theodore Ts'o @ 2016-08-05 14:56 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: Jan Kara, Jan Kara, linux-ext4

On Fri, Aug 05, 2016 at 09:29:59AM +0300, Nikolay Borisov wrote:
> > The easist way to fix this is defer the ext4_commit_super() to a
> > workqueue.  We only need this in the errors=continue case, and in that
> > scenario we're not in a hurry when the superblock gets written out.
> 
> Is errors=continue the default option if nothing specifically is
> specified at mount time, since I don't have this set explicitly:
> 
> /dev/vda / ext4 rw,relatime,data=ordered 0 0

Yes, it's the default.  I keep wondering whether we should change the
default to remount-ro or even panic, since people sometimes don't
notice that the "file system has been corrupted" messages, and then
they can end up losing a lot more detail if we forced them to address
the issue right away.

						- Ted

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Sleeping function called in invalid context
  2016-08-05 14:56       ` Theodore Ts'o
@ 2016-08-05 17:06         ` Darrick J. Wong
  2016-08-11 19:52         ` Andreas Dilger
  1 sibling, 0 replies; 7+ messages in thread
From: Darrick J. Wong @ 2016-08-05 17:06 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: Nikolay Borisov, Jan Kara, Jan Kara, linux-ext4

On Fri, Aug 05, 2016 at 10:56:04AM -0400, Theodore Ts'o wrote:
> On Fri, Aug 05, 2016 at 09:29:59AM +0300, Nikolay Borisov wrote:
> > > The easist way to fix this is defer the ext4_commit_super() to a
> > > workqueue.  We only need this in the errors=continue case, and in that
> > > scenario we're not in a hurry when the superblock gets written out.
> > 
> > Is errors=continue the default option if nothing specifically is
> > specified at mount time, since I don't have this set explicitly:
> > 
> > /dev/vda / ext4 rw,relatime,data=ordered 0 0
> 
> Yes, it's the default.  I keep wondering whether we should change the
> default to remount-ro or even panic, since people sometimes don't
> notice that the "file system has been corrupted" messages, and then
> they can end up losing a lot more detail if we forced them to address
> the issue right away.

Yes, please. :)

(At the moment I'm also trying to sort out some ext3 bug where jbd
flushes some dirty pages to disk ahead of writing out a transaction
but the dirty page writeback fails.  Apparently the userspace program
is long gone by the time this happens (I haven't been able to verify
that they're actually calling fsync) and the FS doesn't shut down,
but a later umount/mount/reread shows old file contents.  Hence I
now know about data_err=abort.)

--D

> 
> 						- Ted
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Sleeping function called in invalid context
  2016-08-05 14:56       ` Theodore Ts'o
  2016-08-05 17:06         ` Darrick J. Wong
@ 2016-08-11 19:52         ` Andreas Dilger
  1 sibling, 0 replies; 7+ messages in thread
From: Andreas Dilger @ 2016-08-11 19:52 UTC (permalink / raw)
  To: Theodore Ts'o
  Cc: Nikolay Borisov, Jan Kara, Wang Shilong, Jan Kara, linux-ext4


[-- Attachment #1.1: Type: text/plain, Size: 1479 bytes --]


> On Aug 5, 2016, at 8:56 AM, Theodore Ts'o <tytso@mit.edu> wrote:
> 
> On Fri, Aug 05, 2016 at 09:29:59AM +0300, Nikolay Borisov wrote:
>>> The easist way to fix this is defer the ext4_commit_super() to a
>>> workqueue.  We only need this in the errors=continue case, and in that
>>> scenario we're not in a hurry when the superblock gets written out.
>> 
>> Is errors=continue the default option if nothing specifically is
>> specified at mount time, since I don't have this set explicitly:
>> 
>> /dev/vda / ext4 rw,relatime,data=ordered 0 0
> 
> Yes, it's the default.  I keep wondering whether we should change the
> default to remount-ro or even panic, since people sometimes don't
> notice that the "file system has been corrupted" messages, and then
> they can end up losing a lot more detail if we forced them to address
> the issue right away.

I'd definitely be in favour of making the default "errors=remount-ro".
We've been setting that explicitly for years, since otherwise people
may not notice their ongoing problems until the filesystem completely
explodes.

Related to that, there is a Lustre patch to handle inconsistencies
between group descriptors and block/inode bitmaps by marking only the
group as unusable for new allocations, instead of marking the whole
filesystem in error.  Is that something that is of interest to a wider
audience?

Patch against RHEL7 is attached, but could be updated for newer kernels
if there is interest.

Cheers, Andreas





[-- Attachment #1.2: ext4-corrupted-inode-block-bitmaps-handling-patches.patch --]
[-- Type: application/octet-stream, Size: 15540 bytes --]

Since we could skip corrupt block groups, this patch
use ext4_warning() intead of ext4_error() to make FS not
emount RO in default, also fix a leftover from upstream
commit 163a203ddb36c36d4a1c942
---
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e069155..692b5e4 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -185,25 +185,17 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t start, tmp;
 	int flex_bg = 0;
-	struct ext4_group_info *grp;
 
 	J_ASSERT_BH(bh, buffer_locked(bh));
 
 	/* If checksum is bad mark all blocks used to prevent allocation
 	 * essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-		grp = ext4_get_group_info(sb, block_group);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-		if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, gdp);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_corrupted_block_group(sb, block_group,
+				EXT4_GROUP_INFO_BBITMAP_CORRUPT |
+				EXT4_GROUP_INFO_IBITMAP_CORRUPT,
+				"Checksum bad for group %u",
+				block_group);
 		return -EIO;
 	}
 	memset(bh->b_data, 0, sb->s_blocksize);
@@ -367,8 +359,6 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
 				       struct buffer_head *bh)
 {
 	ext4_fsblk_t	blk;
-	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (buffer_verified(bh))
 		return;
@@ -377,22 +367,19 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
 	blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
 	if (unlikely(blk != 0)) {
 		ext4_unlock_group(sb, block_group);
-		ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
-			   block_group, blk);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_corrupted_block_group(sb, block_group,
+				EXT4_GROUP_INFO_BBITMAP_CORRUPT,
+				"bg %u: block %llu: invalid block bitmap",
+				block_group, blk);
 		return;
 	}
 	if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
 			desc, bh))) {
 		ext4_unlock_group(sb, block_group);
-		ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_corrupted_block_group(sb, block_group,
+				EXT4_GROUP_INFO_BBITMAP_CORRUPT,
+				"bg %u: bad block bitmap checksum",
+				block_group);
 		return;
 	}
 	set_buffer_verified(bh);
@@ -445,8 +432,6 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 		set_buffer_uptodate(bh);
 		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
-		if (err)
-			ext4_error(sb, "Checksum bad for grp %u", block_group);
 		return bh;
 	}
 	ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3c41773..63a63b6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -91,6 +91,17 @@ typedef __u32 ext4_lblk_t;
 /* data type for block group number */
 typedef unsigned int ext4_group_t;
 
+void __ext4_corrupted_block_group(struct super_block *sb,
+				  ext4_group_t group, unsigned int flags,
+				  const char *function, unsigned int line);
+
+#define ext4_corrupted_block_group(sb, group, flags, fmt, ...)		\
+	do {								\
+		__ext4_warning(sb, __func__, __LINE__, fmt,		\
+				##__VA_ARGS__);				\
+		__ext4_corrupted_block_group(sb, group, flags,		\
+					__func__, __LINE__);		\
+	} while (0)
 /*
  * Flags used in mballoc's allocation_context flags field.
  *
@@ -2673,7 +2684,11 @@ struct ext4_group_info {
 #define EXT4_GROUP_INFO_NEED_INIT_BIT		0
 #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT		1
 #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT	2
+#define EXT4_GROUP_INFO_BBITMAP_CORRUPT		\
+	(1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
 #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT	3
+#define EXT4_GROUP_INFO_IBITMAP_CORRUPT		\
+	(1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
 
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fc65310..92bcc8d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -70,26 +70,15 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
 				       ext4_group_t block_group,
 				       struct ext4_group_desc *gdp)
 {
-	struct ext4_group_info *grp;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	J_ASSERT_BH(bh, buffer_locked(bh));
 
 	/* If checksum is bad mark all blocks and inodes use to prevent
 	 * allocation, essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-		ext4_error(sb, "Checksum bad for group %u", block_group);
-		grp = ext4_get_group_info(sb, block_group);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-		if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, gdp);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_corrupted_block_group(sb, block_group,
+				EXT4_GROUP_INFO_BBITMAP_CORRUPT |
+				EXT4_GROUP_INFO_IBITMAP_CORRUPT,
+				"Checksum bad for group %u", block_group);
 		return 0;
 	}
 
@@ -125,8 +114,6 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 	struct ext4_group_desc *desc;
 	struct buffer_head *bh = NULL;
 	ext4_fsblk_t bitmap_blk;
-	struct ext4_group_info *grp;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	desc = ext4_get_group_desc(sb, block_group, NULL);
 	if (!desc)
@@ -193,16 +180,10 @@ verify:
 					   EXT4_INODES_PER_GROUP(sb) / 8)) {
 		ext4_unlock_group(sb, block_group);
 		put_bh(bh);
-		ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
-			   "inode_bitmap = %llu", block_group, bitmap_blk);
-		grp = ext4_get_group_info(sb, block_group);
-		if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, desc);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_corrupted_block_group(sb, block_group,
+				EXT4_GROUP_INFO_IBITMAP_CORRUPT,
+				"Corrupt inode bitmap - block_group = %u, inode_bitmap = %llu",
+				block_group, bitmap_blk);
 		return NULL;
 	}
 	ext4_unlock_group(sb, block_group);
@@ -337,14 +318,9 @@ out:
 		if (!fatal)
 			fatal = err;
 	} else {
-		ext4_error(sb, "bit already cleared for inode %lu", ino);
-		if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, gdp);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_corrupted_block_group(sb, block_group,
+				EXT4_GROUP_INFO_IBITMAP_CORRUPT,
+				"bit already cleared for inode %lu", ino);
 	}
 
 error_return:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7282d07..e6805e6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -752,10 +752,18 @@ int ext4_mb_generate_buddy(struct super_block *sb,
 	if (free != grp->bb_free) {
 		struct ext4_group_desc *gdp;
 		gdp = ext4_get_group_desc(sb, group, NULL);
-		ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
-			"%u in gd, %lu pa's\n", (long unsigned int)group,
-			free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
-			grp->bb_prealloc_nr);
+
+		ext4_corrupted_block_group(sb, group,
+				EXT4_GROUP_INFO_BBITMAP_CORRUPT,
+				"group %lu: %u blocks in bitmap, %u in bb, %u in gd, %lu pa's block bitmap corrupt",
+				(unsigned long int)group, free, grp->bb_free,
+				ext4_free_group_clusters(sb, gdp),
+				grp->bb_prealloc_nr);
+		/*
+		 * If we intend to continue, we consider group descriptor
+		 * corrupt and update bb_free using bitmap value
+		 */
+		grp->bb_free = free;
 		return -EIO;
 	}
 	mb_set_largest_free_order(sb, grp);
@@ -1101,7 +1109,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 	int block;
 	int pnum;
 	int poff;
-	struct page *page;
+	struct page *page = NULL;
 	int ret;
 	struct ext4_group_info *grp;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1127,7 +1135,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 		 */
 		ret = ext4_mb_init_group(sb, group);
 		if (ret)
-			return ret;
+			goto err;
 	}
 
 	/*
@@ -1227,6 +1235,7 @@ err:
 		page_cache_release(e4b->bd_buddy_page);
 	e4b->bd_buddy = NULL;
 	e4b->bd_bitmap = NULL;
+	ext4_warning(sb, "Error loading buddy information for %u", group);
 	return ret;
 }
 
@@ -3599,9 +3608,11 @@ int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
 	}
 
 	if (free != ext4_free_group_clusters(sb, gdp)) {
-		ext4_error(sb, "on-disk bitmap for group %d"
-			"corrupted: %u blocks free in bitmap, %u - in gd\n",
-			group, free, ext4_free_group_clusters(sb, gdp));
+		ext4_corrupted_block_group(sb, group,
+				EXT4_GROUP_INFO_BBITMAP_CORRUPT,
+				"on-disk bitmap for group %d corrupted: %u blocks free in bitmap, %u - in gd\n",
+				group, free,
+				ext4_free_group_clusters(sb, gdp));
 		return -EIO;
 	}
 	return 0;
@@ -3962,16 +3973,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	/* "free < pa->pa_free" means we maybe double alloc the same blocks,
 	 * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
 	if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
-		ext4_error(sb, "pa free mismatch: [pa %p] "
-				"[phy %lu] [logic %lu] [len %u] [free %u] "
-				"[error %u] [inode %lu] [freed %u]", pa,
-				(unsigned long)pa->pa_pstart,
-				(unsigned long)pa->pa_lstart,
-				(unsigned)pa->pa_len, (unsigned)pa->pa_free,
-				(unsigned)pa->pa_error, pa->pa_inode->i_ino,
-				free);
 		ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
-					free, pa->pa_free);
+				      free, pa->pa_free);
 		/*
 		 * pa is already deleted so we use the value obtained
 		 * from the bitmap and continue.
@@ -4031,14 +4034,11 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 		return 0;
 
 	bitmap_bh = ext4_read_block_bitmap(sb, group);
-	if (bitmap_bh == NULL) {
-		ext4_error(sb, "Error reading block bitmap for %u", group);
+	if (bitmap_bh == NULL)
 		return 0;
-	}
 
 	err = ext4_mb_load_buddy(sb, group, &e4b);
 	if (err) {
-		ext4_error(sb, "Error loading buddy information for %u", group);
 		put_bh(bitmap_bh);
 		return 0;
 	}
@@ -4198,16 +4198,11 @@ repeat:
 		group = ext4_get_group_number(sb, pa->pa_pstart);
 
 		err = ext4_mb_load_buddy(sb, group, &e4b);
-		if (err) {
-			ext4_error(sb, "Error loading buddy information for %u",
-					group);
+		if (err)
 			return;
-		}
 
 		bitmap_bh = ext4_read_block_bitmap(sb, group);
 		if (bitmap_bh == NULL) {
-			ext4_error(sb, "Error reading block bitmap for %u",
-					group);
 			ext4_mb_unload_buddy(&e4b);
 			continue;
 		}
@@ -4467,11 +4462,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
 
 		group = ext4_get_group_number(sb, pa->pa_pstart);
-		if (ext4_mb_load_buddy(sb, group, &e4b)) {
-			ext4_error(sb, "Error loading buddy information for %u",
-					group);
+		if (ext4_mb_load_buddy(sb, group, &e4b))
 			continue;
-		}
 		ext4_lock_group(sb, group);
 		list_del(&pa->pa_group_list);
 		ext4_get_group_info(sb, group)->bb_prealloc_nr--;
@@ -4742,17 +4734,18 @@ errout:
 			 * been updated or not when fail case. So can
 			 * not revert pa_free back, just mark pa_error*/
 			pa->pa_error++;
-			ext4_error(sb,
-				"Updating bitmap error: [err %d] "
-				"[pa %p] [phy %lu] [logic %lu] "
-				"[len %u] [free %u] [error %u] "
-				"[inode %lu]", *errp, pa,
-				(unsigned long)pa->pa_pstart,
-				(unsigned long)pa->pa_lstart,
-				(unsigned)pa->pa_len,
-				(unsigned)pa->pa_free,
-				(unsigned)pa->pa_error,
-				pa->pa_inode ? pa->pa_inode->i_ino : 0);
+			ext4_corrupted_block_group(sb, 0, 0,
+					"Updating bitmap error: [err %d] "
+					"[pa %p] [phy %lu] [logic %lu] "
+					"[len %u] [free %u] [error %u] "
+					"[inode %lu]", *errp, pa,
+					(unsigned long)pa->pa_pstart,
+					(unsigned long)pa->pa_lstart,
+					(unsigned)pa->pa_len,
+					(unsigned)pa->pa_free,
+					(unsigned)pa->pa_error,
+					pa->pa_inode ?
+					pa->pa_inode->i_ino : 0);
 		}
 	}
 	ext4_mb_release_context(ac);
@@ -5037,7 +5030,7 @@ do_more:
 
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
 	if (err)
-		goto error_return;
+		goto error_brelse;
 
 	if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
 		struct ext4_free_data *new_entry;
@@ -5119,8 +5112,9 @@ do_more:
 		goto do_more;
 	}
 error_return:
-	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
+error_brelse:
+	brelse(bitmap_bh);
 	return;
 }
 
@@ -5216,7 +5210,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
 	if (err)
-		goto error_return;
+		goto error_brelse;
 
 	/*
 	 * need to update group_info->bb_free and bitmap
@@ -5253,8 +5247,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 		err = ret;
 
 error_return:
-	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
+error_brelse:
+	brelse(bitmap_bh);
 	return err;
 }
 
@@ -5329,11 +5324,9 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 	trace_ext4_trim_all_free(sb, group, start, max);
 
 	ret = ext4_mb_load_buddy(sb, group, &e4b);
-	if (ret) {
-		ext4_error(sb, "Error in loading buddy "
-				"information for %u", group);
+	if (ret)
 		return ret;
-	}
+
 	bitmap = e4b.bd_bitmap;
 
 	ext4_lock_group(sb, group);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c625960..0de22f2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -633,6 +633,37 @@ void __ext4_warning(struct super_block *sb, const char *function,
 	va_end(args);
 }
 
+void __ext4_corrupted_block_group(struct super_block *sb, ext4_group_t group,
+				  unsigned int flags, const char *function,
+				  unsigned int line)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+
+	if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT &&
+	    !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
+		percpu_counter_sub(&sbi->s_freeclusters_counter,
+					grp->bb_free);
+		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+			&grp->bb_state);
+	}
+
+	if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT &&
+	    !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+		if (gdp) {
+			int count;
+
+			count = ext4_free_inodes_count(sb, gdp);
+			percpu_counter_sub(&sbi->s_freeinodes_counter,
+					   count);
+		}
+		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+			&grp->bb_state);
+	}
+	save_error_info(sb, function, line);
+}
+
 void __ext4_grp_locked_error(const char *function, unsigned int line,
 			     struct super_block *sb, ext4_group_t grp,
 			     unsigned long ino, ext4_fsblk_t block,

[-- Attachment #2: Message signed with OpenPGP using GPGMail --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-08-11 19:52 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-03  7:22 Sleeping function called in invalid context Nikolay Borisov
2016-08-04 16:05 ` Jan Kara
2016-08-04 20:58   ` Theodore Ts'o
2016-08-05  6:29     ` Nikolay Borisov
2016-08-05 14:56       ` Theodore Ts'o
2016-08-05 17:06         ` Darrick J. Wong
2016-08-11 19:52         ` Andreas Dilger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.