>From f5038c6e7a3d1a4a91879187b92ede8c868988ac Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 11 Jun 2018 10:56:04 +0200 Subject: [PATCH] bdi: Fix another oops in wb_workfn() syzbot is reporting NULL pointer dereference at wb_workfn() [1] due to wb->bdi->dev being NULL. And Dmitry confirmed that wb->state was WB_shutting_down after wb->bdi->dev became NULL. This indicates that unregister_bdi() failed to call wb_shutdown() on one of wb objects. The problem is in cgwb_bdi_unregister() which does cgwb_kill() and thus drops bdi's reference to wb structures before going through the list of wbs again and calling wb_shutdown() on each of them. This way the loop iterating through all wbs can easily miss a wb if that wb has already passed through cgwb_remove_from_bdi_list() called from wb_shutdown() from cgwb_release_workfn() and as a result fully shutdown bdi although wb_workfn() for this wb structure is still running. In fact there are also other ways cgwb_bdi_unregister() can race with cgwb_release_workfn() leading e.g. to use-after-free issues: CPU1 CPU2 cgwb_bdi_unregister() cgwb_kill(*slot); cgwb_release() queue_work(cgwb_release_wq, &wb->release_work); cgwb_release_workfn() wb = list_first_entry(&bdi->wb_list, ...) spin_unlock_irq(&cgwb_lock); wb_shutdown(wb); ... kfree_rcu(wb, rcu); wb_shutdown(wb); -> oops use-after-free We solve all these issues by making cgwb_bdi_unregister() wait for shutdown of all wb structures instead of going through them and trying to actively shut them down. [1] https://syzkaller.appspot.com/bug?id=e0818ccb7e46190b3f1038b0c794299208ed4206 Cc: Dmitry Vyukov Cc: Tejun Heo Reported-and-analyzed-by: Tetsuo Handa Reported-by: syzbot Signed-off-by: Jan Kara --- mm/backing-dev.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 347cc834c04a..39fa5f4ddd5c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -370,7 +370,6 @@ static void wb_shutdown(struct bdi_writeback *wb) set_bit(WB_shutting_down, &wb->state); spin_unlock_bh(&wb->work_lock); - cgwb_remove_from_bdi_list(wb); /* * Drain work list and shutdown the delayed_work. !WB_registered * tells wb_workfn() that @wb is dying and its work_list needs to @@ -379,6 +378,7 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); + cgwb_remove_from_bdi_list(wb); /* * Make sure bit gets cleared after shutdown is finished. Matches with * the barrier provided by test_and_clear_bit() above. @@ -541,6 +541,9 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) spin_lock_irq(&cgwb_lock); list_del_rcu(&wb->bdi_node); spin_unlock_irq(&cgwb_lock); + /* Last wb of the bdi? Wake up waiters for shutdown of all wbs. */ + if (list_empty(&wb->bdi->wb_list)) + wake_up_all(&wb->bdi->wb_waitq); } static int cgwb_create(struct backing_dev_info *bdi, @@ -710,22 +713,16 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { struct radix_tree_iter iter; void **slot; - struct bdi_writeback *wb; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); spin_lock_irq(&cgwb_lock); radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); - - while (!list_empty(&bdi->wb_list)) { - wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, - bdi_node); - spin_unlock_irq(&cgwb_lock); - wb_shutdown(wb); - spin_lock_irq(&cgwb_lock); - } spin_unlock_irq(&cgwb_lock); + + /* Wait for all writeback structures to shutdown */ + wait_event(bdi->wb_waitq, list_empty(&bdi->wb_list)); } /** -- 2.13.7