All of lore.kernel.org
 help / color / mirror / Atom feed
* [dm-devel] [PATCH] dm: fix NULL pointer issue when free bio
@ 2021-09-29 11:59 Jiazi Li
  2021-10-06 14:43 ` [dm-devel] " Mike Snitzer
  0 siblings, 1 reply; 2+ messages in thread
From: Jiazi Li @ 2021-09-29 11:59 UTC (permalink / raw)
  To: Alasdair Kergon, Mike Snitzer; +Cc: Jiazi Li, dm-devel

dm_io_dec_pending call end_io_acct first, will dec md in-flight
pending count. If a task is swapping table at same time.
task1                             task2
do_resume
 ->do_suspend
  ->dm_wait_for_completion
                                  bio_endio
				   ->clone_endio
				    ->dm_io_dec_pending
				     ->end_io_acct
				      ->wakeup task1
 ->dm_swap_table
  ->__bind
   ->__bind_mempools
    ->bioset_exit
     ->mempool_exit
                                     ->free_io
mempool->elements is NULL, and lead to following crash:
[ 67.330330] Unable to handle kernel NULL pointer dereference at virtual
address 0000000000000000
......
[ 67.330494] pstate: 80400085 (Nzcv daIf +PAN -UAO)
[ 67.330510] pc : mempool_free+0x70/0xa0
[ 67.330515] lr : mempool_free+0x4c/0xa0
[ 67.330520] sp : ffffff8008013b20
[ 67.330524] x29: ffffff8008013b20 x28: 0000000000000004
[ 67.330530] x27: ffffffa8c2ff40a0 x26: 00000000ffff1cc8
[ 67.330535] x25: 0000000000000000 x24: ffffffdada34c800
[ 67.330541] x23: 0000000000000000 x22: ffffffdada34c800
[ 67.330547] x21: 00000000ffff1cc8 x20: ffffffd9a1304d80
[ 67.330552] x19: ffffffdada34c970 x18: 000000b312625d9c
[ 67.330558] x17: 00000000002dcfbf x16: 00000000000006dd
[ 67.330563] x15: 000000000093b41e x14: 0000000000000010
[ 67.330569] x13: 0000000000007f7a x12: 0000000034155555
[ 67.330574] x11: 0000000000000001 x10: 0000000000000001
[ 67.330579] x9 : 0000000000000000 x8 : 0000000000000000
[ 67.330585] x7 : 0000000000000000 x6 : ffffff80148b5c1a
[ 67.330590] x5 : ffffff8008013ae0 x4 : 0000000000000001
[ 67.330596] x3 : ffffff80080139c8 x2 : ffffff801083bab8
[ 67.330601] x1 : 0000000000000000 x0 : ffffffdada34c970
[ 67.330609] Call trace:
[ 67.330616] mempool_free+0x70/0xa0
[ 67.330627] bio_put+0xf8/0x110
[ 67.330638] dec_pending+0x13c/0x230
[ 67.330644] clone_endio+0x90/0x180
[ 67.330649] bio_endio+0x198/0x1b8
[ 67.330655] dec_pending+0x190/0x230
[ 67.330660] clone_endio+0x90/0x180
[ 67.330665] bio_endio+0x198/0x1b8
[ 67.330673] blk_update_request+0x214/0x428
[ 67.330683] scsi_end_request+0x2c/0x300
[ 67.330688] scsi_io_completion+0xa0/0x710
[ 67.330695] scsi_finish_command+0xd8/0x110
[ 67.330700] scsi_softirq_done+0x114/0x148
[ 67.330708] blk_done_softirq+0x74/0xd0
[ 67.330716] __do_softirq+0x18c/0x374
[ 67.330724] irq_exit+0xb4/0xb8
[ 67.330732] __handle_domain_irq+0x84/0xc0
[ 67.330737] gic_handle_irq+0x148/0x1b0
[ 67.330744] el1_irq+0xe8/0x190
[ 67.330753] lpm_cpuidle_enter+0x4f8/0x538
[ 67.330759] cpuidle_enter_state+0x1fc/0x398
[ 67.330764] cpuidle_enter+0x18/0x20
[ 67.330772] do_idle+0x1b4/0x290
[ 67.330778] cpu_startup_entry+0x20/0x28
[ 67.330786] secondary_start_kernel+0x160/0x170

Move end_io_acct after free_io to fix this issue.

Signed-off-by: Jiazi Li <lijiazi@xiaomi.com>
---
 drivers/md/dm.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a011d09..245fa41 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -496,18 +496,17 @@ static void start_io_acct(struct dm_io *io)
 				    false, 0, &io->stats_aux);
 }
 
-static void end_io_acct(struct dm_io *io)
+static void end_io_acct(struct mapped_device *md, struct bio *bio,
+		unsigned long start_time, struct dm_stats_aux *stats_aux)
 {
-	struct mapped_device *md = io->md;
-	struct bio *bio = io->orig_bio;
-	unsigned long duration = jiffies - io->start_time;
+	unsigned long duration = jiffies - start_time;
 
-	bio_end_io_acct(bio, io->start_time);
+	bio_end_io_acct(bio, start_time);
 
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
 				    bio->bi_iter.bi_sector, bio_sectors(bio),
-				    true, duration, &io->stats_aux);
+				    true, duration, stats_aux);
 
 	/* nudge anyone waiting on suspend queue */
 	if (unlikely(wq_has_sleeper(&md->wait)))
@@ -790,6 +789,8 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
 	blk_status_t io_error;
 	struct bio *bio;
 	struct mapped_device *md = io->md;
+	unsigned long start_time = 0;
+	struct dm_stats_aux stats_aux;
 
 	/* Push-back supersedes any I/O errors */
 	if (unlikely(error)) {
@@ -821,8 +822,10 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
 		}
 
 		io_error = io->status;
-		end_io_acct(io);
+		start_time = io->start_time;
+		stats_aux = io->stats_aux;
 		free_io(md, io);
+		end_io_acct(md, bio, start_time, &stats_aux);
 
 		if (io_error == BLK_STS_DM_REQUEUE)
 			return;
-- 
2.7.4

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [dm-devel] dm: fix NULL pointer issue when free bio
  2021-09-29 11:59 [dm-devel] [PATCH] dm: fix NULL pointer issue when free bio Jiazi Li
@ 2021-10-06 14:43 ` Mike Snitzer
  0 siblings, 0 replies; 2+ messages in thread
From: Mike Snitzer @ 2021-10-06 14:43 UTC (permalink / raw)
  To: Jiazi Li; +Cc: Jiazi Li, dm-devel, Alasdair Kergon

On Wed, Sep 29 2021 at  7:59P -0400,
Jiazi Li <jqqlijiazi@gmail.com> wrote:

> dm_io_dec_pending call end_io_acct first, will dec md in-flight
> pending count. If a task is swapping table at same time.
> task1                             task2
> do_resume
>  ->do_suspend
>   ->dm_wait_for_completion
>                                   bio_endio
> 				   ->clone_endio
> 				    ->dm_io_dec_pending
> 				     ->end_io_acct
> 				      ->wakeup task1
>  ->dm_swap_table
>   ->__bind
>    ->__bind_mempools
>     ->bioset_exit
>      ->mempool_exit
>                                      ->free_io
> mempool->elements is NULL, and lead to following crash:
> [ 67.330330] Unable to handle kernel NULL pointer dereference at virtual
> address 0000000000000000
> ......
> [ 67.330494] pstate: 80400085 (Nzcv daIf +PAN -UAO)
> [ 67.330510] pc : mempool_free+0x70/0xa0
> [ 67.330515] lr : mempool_free+0x4c/0xa0
> [ 67.330520] sp : ffffff8008013b20
> [ 67.330524] x29: ffffff8008013b20 x28: 0000000000000004
> [ 67.330530] x27: ffffffa8c2ff40a0 x26: 00000000ffff1cc8
> [ 67.330535] x25: 0000000000000000 x24: ffffffdada34c800
> [ 67.330541] x23: 0000000000000000 x22: ffffffdada34c800
> [ 67.330547] x21: 00000000ffff1cc8 x20: ffffffd9a1304d80
> [ 67.330552] x19: ffffffdada34c970 x18: 000000b312625d9c
> [ 67.330558] x17: 00000000002dcfbf x16: 00000000000006dd
> [ 67.330563] x15: 000000000093b41e x14: 0000000000000010
> [ 67.330569] x13: 0000000000007f7a x12: 0000000034155555
> [ 67.330574] x11: 0000000000000001 x10: 0000000000000001
> [ 67.330579] x9 : 0000000000000000 x8 : 0000000000000000
> [ 67.330585] x7 : 0000000000000000 x6 : ffffff80148b5c1a
> [ 67.330590] x5 : ffffff8008013ae0 x4 : 0000000000000001
> [ 67.330596] x3 : ffffff80080139c8 x2 : ffffff801083bab8
> [ 67.330601] x1 : 0000000000000000 x0 : ffffffdada34c970
> [ 67.330609] Call trace:
> [ 67.330616] mempool_free+0x70/0xa0
> [ 67.330627] bio_put+0xf8/0x110
> [ 67.330638] dec_pending+0x13c/0x230
> [ 67.330644] clone_endio+0x90/0x180
> [ 67.330649] bio_endio+0x198/0x1b8
> [ 67.330655] dec_pending+0x190/0x230
> [ 67.330660] clone_endio+0x90/0x180
> [ 67.330665] bio_endio+0x198/0x1b8
> [ 67.330673] blk_update_request+0x214/0x428
> [ 67.330683] scsi_end_request+0x2c/0x300
> [ 67.330688] scsi_io_completion+0xa0/0x710
> [ 67.330695] scsi_finish_command+0xd8/0x110
> [ 67.330700] scsi_softirq_done+0x114/0x148
> [ 67.330708] blk_done_softirq+0x74/0xd0
> [ 67.330716] __do_softirq+0x18c/0x374
> [ 67.330724] irq_exit+0xb4/0xb8
> [ 67.330732] __handle_domain_irq+0x84/0xc0
> [ 67.330737] gic_handle_irq+0x148/0x1b0
> [ 67.330744] el1_irq+0xe8/0x190
> [ 67.330753] lpm_cpuidle_enter+0x4f8/0x538
> [ 67.330759] cpuidle_enter_state+0x1fc/0x398
> [ 67.330764] cpuidle_enter+0x18/0x20
> [ 67.330772] do_idle+0x1b4/0x290
> [ 67.330778] cpu_startup_entry+0x20/0x28
> [ 67.330786] secondary_start_kernel+0x160/0x170
> 
> Move end_io_acct after free_io to fix this issue.
> 
> Signed-off-by: Jiazi Li <lijiazi@xiaomi.com>

Thanks very much for this.  You did a wonderful job analyzing and
fixing this race.

I've tweaked the header slightly to improve clarity and made one
whitespace indentation adjustment.  I've now marked this for stable@
and queued this up.

Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-10-06 14:48 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-29 11:59 [dm-devel] [PATCH] dm: fix NULL pointer issue when free bio Jiazi Li
2021-10-06 14:43 ` [dm-devel] " Mike Snitzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.