From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:58888) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1buju1-0004kR-Et for qemu-devel@nongnu.org; Thu, 13 Oct 2016 13:34:51 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bujty-00043N-LH for qemu-devel@nongnu.org; Thu, 13 Oct 2016 13:34:44 -0400 From: Paolo Bonzini Date: Thu, 13 Oct 2016 19:34:08 +0200 Message-Id: <1476380062-18001-5-git-send-email-pbonzini@redhat.com> In-Reply-To: <1476380062-18001-1-git-send-email-pbonzini@redhat.com> References: <1476380062-18001-1-git-send-email-pbonzini@redhat.com> Subject: [Qemu-devel] [PATCH 04/18] block: add BDS field to count in-flight requests List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: qemu-block@nongnu.org, famz@redhat.com, kwolf@redhat.com, stefanha@redhat.com Unlike tracked_requests, this field also counts throttled requests, and remains non-zero if an AIO operation needs a BH to be "really" completed. With this change, it is no longer necessary to have a dummy BdrvTrackedRequest for requests that are never serialising, and it is no longer necessary to poll the AioContext once after bdrv_requests_pending(bs) returns false. Signed-off-by: Paolo Bonzini --- block/block-backend.c | 23 +++++++++++--- block/io.c | 81 ++++++++++++++++++++++++++++++++--------------- include/block/block_int.h | 10 +++--- 3 files changed, 81 insertions(+), 33 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 1a724a8..234df1e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -799,20 +799,25 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, BdrvRequestFlags flags) { int ret; + BlockDriverState *bs = blk_bs(blk); - trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags); + trace_blk_co_preadv(blk, bs, offset, bytes, flags); ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; } + bdrv_inc_in_flight(bs); + /* throttling disk I/O */ if (blk->public.throttle_state) { throttle_group_co_io_limits_intercept(blk, bytes, false); } - return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags); + ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags); + bdrv_dec_in_flight(bs); + return ret; } int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, @@ -820,14 +825,17 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, BdrvRequestFlags flags) { int ret; + BlockDriverState *bs = blk_bs(blk); - trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags); + trace_blk_co_pwritev(blk, bs, offset, bytes, flags); ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; } + bdrv_inc_in_flight(bs); + /* throttling disk I/O */ if (blk->public.throttle_state) { throttle_group_co_io_limits_intercept(blk, bytes, true); @@ -837,7 +845,9 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, flags |= BDRV_REQ_FUA; } - return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags); + ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags); + bdrv_dec_in_flight(bs); + return ret; } typedef struct BlkRwCo { @@ -930,6 +940,8 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) static void error_callback_bh(void *opaque) { struct BlockBackendAIOCB *acb = opaque; + + bdrv_dec_in_flight(acb->common.bs); acb->common.cb(acb->common.opaque, acb->ret); qemu_aio_unref(acb); } @@ -940,6 +952,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, { struct BlockBackendAIOCB *acb; + bdrv_inc_in_flight(blk_bs(blk)); acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); acb->blk = blk; acb->ret = ret; @@ -962,6 +975,7 @@ static const AIOCBInfo blk_aio_em_aiocb_info = { static void blk_aio_complete(BlkAioEmAIOCB *acb) { if (acb->has_returned) { + bdrv_dec_in_flight(acb->common.bs); acb->common.cb(acb->common.opaque, acb->rwco.ret); qemu_aio_unref(acb); } @@ -983,6 +997,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, BlkAioEmAIOCB *acb; Coroutine *co; + bdrv_inc_in_flight(blk_bs(blk)); acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); acb->rwco = (BlkRwCo) { .blk = blk, diff --git a/block/io.c b/block/io.c index b136c89..8d46d8b 100644 --- a/block/io.c +++ b/block/io.c @@ -143,7 +143,7 @@ bool bdrv_requests_pending(BlockDriverState *bs) { BdrvChild *child; - if (!QLIST_EMPTY(&bs->tracked_requests)) { + if (atomic_read(&bs->in_flight)) { return true; } @@ -176,12 +176,9 @@ typedef struct { static void bdrv_drain_poll(BlockDriverState *bs) { - bool busy = true; - - while (busy) { + while (bdrv_requests_pending(bs)) { /* Keep iterating */ - busy = bdrv_requests_pending(bs); - busy |= aio_poll(bdrv_get_aio_context(bs), busy); + aio_poll(bdrv_get_aio_context(bs), true); } } @@ -189,8 +186,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) { BdrvCoDrainData *data = opaque; Coroutine *co = data->co; + BlockDriverState *bs = data->bs; - bdrv_drain_poll(data->bs); + bdrv_dec_in_flight(bs); + bdrv_drain_poll(bs); data->done = true; qemu_coroutine_enter(co); } @@ -209,6 +208,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs) .bs = bs, .done = false, }; + bdrv_inc_in_flight(bs); aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data); @@ -279,7 +279,7 @@ void bdrv_drain(BlockDriverState *bs) void bdrv_drain_all(void) { /* Always run first iteration so any pending completion BHs run */ - bool busy = true; + bool waited = true; BlockDriverState *bs; BdrvNextIterator it; BlockJob *job = NULL; @@ -313,8 +313,8 @@ void bdrv_drain_all(void) * request completion. Therefore we must keep looping until there was no * more activity rather than simply draining each device independently. */ - while (busy) { - busy = false; + while (waited) { + waited = false; for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { AioContext *aio_context = ctx->data; @@ -323,12 +323,11 @@ void bdrv_drain_all(void) for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { if (aio_context == bdrv_get_aio_context(bs)) { if (bdrv_requests_pending(bs)) { - busy = true; - aio_poll(aio_context, busy); + aio_poll(aio_context, true); + waited = true; } } } - busy |= aio_poll(aio_context, false); aio_context_release(aio_context); } } @@ -476,6 +475,16 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req, return true; } +void bdrv_inc_in_flight(BlockDriverState *bs) +{ + atomic_inc(&bs->in_flight); +} + +void bdrv_dec_in_flight(BlockDriverState *bs) +{ + atomic_dec(&bs->in_flight); +} + static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) { BlockDriverState *bs = self->bs; @@ -1097,6 +1106,8 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, return ret; } + bdrv_inc_in_flight(bs); + /* Don't do copy-on-read if we read data before write operation */ if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) { flags |= BDRV_REQ_COPY_ON_READ; @@ -1132,6 +1143,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, use_local_qiov ? &local_qiov : qiov, flags); tracked_request_end(&req); + bdrv_dec_in_flight(bs); if (use_local_qiov) { qemu_iovec_destroy(&local_qiov); @@ -1480,6 +1492,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, return ret; } + bdrv_inc_in_flight(bs); /* * Align write if necessary by performing a read-modify-write cycle. * Pad qiov with the read parts and be sure to have a tracked request not @@ -1581,6 +1594,7 @@ fail: qemu_vfree(tail_buf); out: tracked_request_end(&req); + bdrv_dec_in_flight(bs); return ret; } @@ -1705,17 +1719,19 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, } *file = NULL; + bdrv_inc_in_flight(bs); ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum, file); if (ret < 0) { *pnum = 0; - return ret; + goto out; } if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID); - return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS, - *pnum, pnum, file); + ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS, + *pnum, pnum, file); + goto out; } if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { @@ -1757,6 +1773,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, } } +out: + bdrv_dec_in_flight(bs); return ret; } @@ -2102,6 +2120,7 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = { static void bdrv_co_complete(BlockAIOCBCoroutine *acb) { if (!acb->need_bh) { + bdrv_dec_in_flight(acb->common.bs); acb->common.cb(acb->common.opaque, acb->req.error); qemu_aio_unref(acb); } @@ -2152,6 +2171,9 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child, Coroutine *co; BlockAIOCBCoroutine *acb; + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(child->bs); + acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque); acb->child = child; acb->need_bh = true; @@ -2185,6 +2207,9 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, Coroutine *co; BlockAIOCBCoroutine *acb; + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(bs); + acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->need_bh = true; acb->req.error = -EINPROGRESS; @@ -2213,6 +2238,9 @@ BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count, trace_bdrv_aio_pdiscard(bs, offset, count, opaque); + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(bs); + acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->need_bh = true; acb->req.error = -EINPROGRESS; @@ -2273,23 +2301,22 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque) int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { int ret; - BdrvTrackedRequest req; if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || bdrv_is_sg(bs)) { return 0; } - tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); + bdrv_inc_in_flight(bs); int current_gen = bs->write_gen; /* Wait until any previous flushes are completed */ - while (bs->active_flush_req != NULL) { + while (bs->active_flush_req) { qemu_co_queue_wait(&bs->flush_queue); } - bs->active_flush_req = &req; + bs->active_flush_req = true; /* Write back all layers by calling one driver function */ if (bs->drv->bdrv_co_flush) { @@ -2359,11 +2386,11 @@ flush_parent: out: /* Notify any pending flushes that we have completed */ bs->flushed_gen = current_gen; - bs->active_flush_req = NULL; + bs->active_flush_req = false; /* Return value is ignored - it's ok if wait queue is empty */ qemu_co_queue_next(&bs->flush_queue); - tracked_request_end(&req); + bdrv_dec_in_flight(bs); return ret; } @@ -2446,6 +2473,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, return 0; } + bdrv_inc_in_flight(bs); tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); @@ -2492,6 +2520,7 @@ out: bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS, req.bytes >> BDRV_SECTOR_BITS); tracked_request_end(&req); + bdrv_dec_in_flight(bs); return ret; } @@ -2524,13 +2553,12 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count) static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) { BlockDriver *drv = bs->drv; - BdrvTrackedRequest tracked_req; CoroutineIOCompletion co = { .coroutine = qemu_coroutine_self(), }; BlockAIOCB *acb; - tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL); + bdrv_inc_in_flight(bs); if (!drv || !drv->bdrv_aio_ioctl) { co.ret = -ENOTSUP; goto out; @@ -2543,7 +2571,7 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) } qemu_coroutine_yield(); out: - tracked_request_end(&tracked_req); + bdrv_dec_in_flight(bs); return co.ret; } @@ -2600,6 +2628,9 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, bs, cb, opaque); Coroutine *co; + /* Matched by bdrv_co_complete's bdrv_dec_in_flight. */ + bdrv_inc_in_flight(bs); + acb->need_bh = true; acb->req.error = -EINPROGRESS; acb->req.req = req; diff --git a/include/block/block_int.h b/include/block/block_int.h index 3e79228..5a7308b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -62,8 +62,6 @@ enum BdrvTrackedRequestType { BDRV_TRACKED_READ, BDRV_TRACKED_WRITE, - BDRV_TRACKED_FLUSH, - BDRV_TRACKED_IOCTL, BDRV_TRACKED_DISCARD, }; @@ -443,7 +441,7 @@ struct BlockDriverState { note this is a reference count */ CoQueue flush_queue; /* Serializing flush queue */ - BdrvTrackedRequest *active_flush_req; /* Flush request in flight */ + bool active_flush_req; /* Flush request in flight? */ unsigned int write_gen; /* Current data generation */ unsigned int flushed_gen; /* Flushed write generation */ @@ -471,7 +469,8 @@ struct BlockDriverState { /* Callback before write request is processed */ NotifierWithReturnList before_write_notifiers; - /* number of in-flight serialising requests */ + /* number of in-flight requests; overall and serialising */ + unsigned int in_flight; unsigned int serialising_in_flight; /* Offset after the highest byte written to */ @@ -785,6 +784,9 @@ bool bdrv_requests_pending(BlockDriverState *bs); void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); +void bdrv_inc_in_flight(BlockDriverState *bs); +void bdrv_dec_in_flight(BlockDriverState *bs); + void blockdev_close_all_bdrv_states(void); #endif /* BLOCK_INT_H */ -- 2.7.4