>From 73254eae63786aca0af10e42e5b41465c90d8da8 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 7 Jul 2016 11:03:30 +0200 Subject: [PATCH] block: generic_make_request() recursive bios: process deepest levels first By providing each q->make_request_fn() with an empty "recursion" bio_list, then merging any recursively submitted bios to the head of the "remainder" list, we can make the recursion-to-iteration logic in generic_make_request() process deepest level bios first. --- As suggested by Neil Brown while discussing [RFC] block: fix blk_queue_split() resource exhaustion https://lkml.org/lkml/2016/7/7/27 Stack: qA -> qB -> qC -> qD === Without this patch: generic_make_request(bio_orig to qA) recursion: empty, remainder: empty qA->make_request_fn(bio_orig) potential call to bio_queue_split() result: bio_S, bio_R recursion: empty, remainder: bio_R bio_S generic_make_request(bio_S to qB) recursion: bio_S, remainder: bio_R <- return pop: recursion: empty, remainder: bio_R qB->make_request_fn(bio_S) remap, maybe many clones because of striping generic_make_request(clones to qC) recursion: bio_C1, bio_C2, bio_C3 remainder: bio_R <- return pop: recursion: bio_C2, bio_C3, remainder: bio_R qC->make_request_fn(bio_C1) remap, ... generic_make_request(clones to qD) recursion: bio_C2, bio_C3, bio_D1_1, bio_D1_2 remainder: bio_R <- return pop: recursion: bio_C3, bio_D1_1, bio_D1_2 remainder: bio_R qC->make_request_fn(bio_C2) recursion: bio_C3, bio_D1_1, bio_D1_2, bio_D2_1, bio_D2_2 remainder: bio_R <- return pop: recursion: bio_D1_1, bio_D1_2, bio_D2_1, bio_D2_2 remainder: bio_R qC->make_request_fn(bio_C3) ... === With this patch: generic_make_request(bio_orig to qA) recursion: empty, remainder: empty qA->make_request_fn(bio_orig) potential call to bio_queue_split() result: bio_S, bio_R recursion: empty, remainder: bio_R bio_S generic_make_request(bio_S to qB) recursion: bio_S, remainder: bio_R <- return merge_head: recursion: empty, remainder: bio_S, bio_R pop: recursion: empty, remainder: bio_R qB->make_request_fn(bio_S) remap, maybe many clones because of striping generic_make_request(clones to qC) recursion: bio_C1, bio_C2, bio_C3 remainder: bio_R <- return merge_head: recursion: empty remainder: bio_C1, bio_C2, bio_C3, bio_R pop: remainder: bio_C2, bio_C3, bio_R qC->make_request_fn(bio_C1) remap, ... generic_make_request(clones to qD) recursion: bio_D1_1, bio_D1_2 remainder: bio_C2, bio_C3, bio_R <- return merge_head: recursion: empty remainder: bio_D1_1, bio_D1_2, bio_C2, bio_C3, bio_R pop qC->make_request_fn(bio_D1_1) remainder: bio_D1_2, bio_C2, bio_C3, bio_R ... --- block/bio.c | 17 ++++++++++++++--- block/blk-core.c | 10 +++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/block/bio.c b/block/bio.c index 2ffcea0..92733ce 100644 --- a/block/bio.c +++ b/block/bio.c @@ -366,13 +366,17 @@ static void punt_bios_to_rescuer(struct bio_set *bs) */ bio_list_init(&punt); - bio_list_init(&nopunt); + bio_list_init(&nopunt); while ((bio = bio_list_pop(¤t->bio_lists->recursion))) bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); - current->bio_lists->recursion = nopunt; + bio_list_init(&nopunt); + while ((bio = bio_list_pop(¤t->bio_lists->remainder))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_lists->remainder = nopunt; + spin_lock(&bs->rescue_lock); bio_list_merge(&bs->rescue_list, &punt); spin_unlock(&bs->rescue_lock); @@ -380,6 +384,13 @@ static void punt_bios_to_rescuer(struct bio_set *bs) queue_work(bs->rescue_workqueue, &bs->rescue_work); } +static bool current_has_pending_bios(void) +{ + return current->bio_lists && + (!bio_list_empty(¤t->bio_lists->recursion) || + !bio_list_empty(¤t->bio_lists->remainder)); +} + /** * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator @@ -459,7 +470,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) * workqueue before we retry with the original gfp_flags. */ - if (current->bio_lists && !bio_list_empty(¤t->bio_lists->recursion)) + if (current_has_pending_bios()) gfp_mask &= ~__GFP_DIRECT_RECLAIM; p = mempool_alloc(bs->bio_pool, gfp_mask); diff --git a/block/blk-core.c b/block/blk-core.c index f03ff4c..675131b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2070,22 +2070,22 @@ blk_qc_t generic_make_request(struct bio *bio) * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); - bio_list_init(&bio_lists_on_stack.recursion); bio_list_init(&bio_lists_on_stack.remainder); current->bio_lists = &bio_lists_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { + bio_list_init(&bio_lists_on_stack.recursion); ret = q->make_request_fn(q, bio); - blk_queue_exit(q); + bio_list_merge_head(&bio_lists_on_stack.remainder, + &bio_lists_on_stack.recursion); + /* XXX bio_list_init(&bio_lists_on_stack.recursion); */ } else { bio_io_error(bio); } - bio = bio_list_pop(¤t->bio_lists->recursion); - if (!bio) - bio = bio_list_pop(¤t->bio_lists->remainder); + bio = bio_list_pop(¤t->bio_lists->remainder); } while (bio); current->bio_lists = NULL; /* deactivate */ -- 1.9.1