From mboxrd@z Thu Jan 1 00:00:00 1970 From: Zhi Yong Wu Subject: Re: [PATCH v2 1/1] The codes V2 for QEMU disk I/O limits. Date: Wed, 27 Jul 2011 18:17:15 +0800 Message-ID: References: <1311670746-20498-1-git-send-email-wuzhy@linux.vnet.ibm.com> <1311670746-20498-2-git-send-email-wuzhy@linux.vnet.ibm.com> <20110726192618.GA8126@amt.cnet> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: Zhi Yong Wu , qemu-devel@nongnu.org, kvm@vger.kernel.org, aliguori@us.ibm.com, stefanha@linux.vnet.ibm.com, ryanh@us.ibm.com, kwolf@redhat.com, vgoyal@redhat.com To: Marcelo Tosatti Return-path: Received: from mail-gw0-f46.google.com ([74.125.83.46]:63645 "EHLO mail-gw0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753077Ab1G0KRS convert rfc822-to-8bit (ORCPT ); Wed, 27 Jul 2011 06:17:18 -0400 Received: by gwaa18 with SMTP id a18so905623gwa.19 for ; Wed, 27 Jul 2011 03:17:17 -0700 (PDT) In-Reply-To: <20110726192618.GA8126@amt.cnet> Sender: kvm-owner@vger.kernel.org List-ID: On Wed, Jul 27, 2011 at 3:26 AM, Marcelo Tosatti = wrote: > On Tue, Jul 26, 2011 at 04:59:06PM +0800, Zhi Yong Wu wrote: >> Welcome to give me your comments, thanks. >> >> Signed-off-by: Zhi Yong Wu >> --- >> =A0Makefile.objs =A0 =A0 | =A0 =A02 +- >> =A0block.c =A0 =A0 =A0 =A0 =A0 | =A0288 ++++++++++++++++++++++++++++= +++++++++++++++++++++++-- >> =A0block.h =A0 =A0 =A0 =A0 =A0 | =A0 =A01 - >> =A0block/blk-queue.c | =A0116 +++++++++++++++++++++ >> =A0block/blk-queue.h | =A0 70 +++++++++++++ >> =A0block_int.h =A0 =A0 =A0 | =A0 28 +++++ >> =A0blockdev.c =A0 =A0 =A0 =A0| =A0 21 ++++ >> =A0qemu-config.c =A0 =A0 | =A0 24 +++++ >> =A0qemu-option.c =A0 =A0 | =A0 17 +++ >> =A0qemu-option.h =A0 =A0 | =A0 =A01 + >> =A0qemu-options.hx =A0 | =A0 =A01 + >> =A011 files changed, 559 insertions(+), 10 deletions(-) >> =A0create mode 100644 block/blk-queue.c >> =A0create mode 100644 block/blk-queue.h >> >> diff --git a/Makefile.objs b/Makefile.objs >> index 9f99ed4..06f2033 100644 >> --- a/Makefile.objs >> +++ b/Makefile.objs >> @@ -23,7 +23,7 @@ block-nested-y +=3D raw.o cow.o qcow.o vdi.o vmdk.= o cloop.o dmg.o bochs.o vpc.o vv >> =A0block-nested-y +=3D qcow2.o qcow2-refcount.o qcow2-cluster.o qcow= 2-snapshot.o qcow2-cache.o >> =A0block-nested-y +=3D qed.o qed-gencb.o qed-l2-cache.o qed-table.o = qed-cluster.o >> =A0block-nested-y +=3D qed-check.o >> -block-nested-y +=3D parallels.o nbd.o blkdebug.o sheepdog.o blkveri= fy.o >> +block-nested-y +=3D parallels.o nbd.o blkdebug.o sheepdog.o blkveri= fy.o blk-queue.o >> =A0block-nested-$(CONFIG_WIN32) +=3D raw-win32.o >> =A0block-nested-$(CONFIG_POSIX) +=3D raw-posix.o >> =A0block-nested-$(CONFIG_CURL) +=3D curl.o >> diff --git a/block.c b/block.c >> index 24a25d5..e54e59c 100644 >> --- a/block.c >> +++ b/block.c >> @@ -29,6 +29,9 @@ >> =A0#include "module.h" >> =A0#include "qemu-objects.h" >> >> +#include "qemu-timer.h" >> +#include "block/blk-queue.h" >> + >> =A0#ifdef CONFIG_BSD >> =A0#include >> =A0#include >> @@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int= 64_t sector_num, >> =A0static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num= , >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 const uint8_t *b= uf, int nb_sectors); >> >> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sec= tors, >> + =A0 =A0 =A0 =A0bool is_write, double elapsed_time, uint64_t *wait)= ; >> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_w= rite, >> + =A0 =A0 =A0 =A0double elapsed_time, uint64_t *wait); >> +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sect= ors, >> + =A0 =A0 =A0 =A0bool is_write, uint64_t *wait); >> + >> =A0static QTAILQ_HEAD(, BlockDriverState) bdrv_states =3D >> =A0 =A0 =A0QTAILQ_HEAD_INITIALIZER(bdrv_states); >> >> @@ -90,6 +100,20 @@ int is_windows_drive(const char *filename) >> =A0} >> =A0#endif >> >> +static int bdrv_io_limits_enable(BlockIOLimit *io_limits) >> +{ >> + =A0 =A0if ((io_limits->bps[0] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->bps[1] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->bps[2] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->iops[0] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->iops[1] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->iops[2] =3D=3D 0)) { >> + =A0 =A0 =A0 =A0return 0; >> + =A0 =A0} >> + >> + =A0 =A0return 1; >> +} >> + >> =A0/* check if the path starts with ":" */ >> =A0static int path_has_protocol(const char *path) >> =A0{ >> @@ -167,6 +191,28 @@ void path_combine(char *dest, int dest_size, >> =A0 =A0 =A0} >> =A0} >> >> +static void bdrv_block_timer(void *opaque) >> +{ >> + =A0 =A0BlockDriverState *bs =3D opaque; >> + =A0 =A0BlockQueue *queue =3D bs->block_queue; >> + >> + =A0 =A0while (!QTAILQ_EMPTY(&queue->requests)) { >> + =A0 =A0 =A0 =A0BlockIORequest *request; >> + =A0 =A0 =A0 =A0int ret; >> + >> + =A0 =A0 =A0 =A0request =3D QTAILQ_FIRST(&queue->requests); >> + =A0 =A0 =A0 =A0QTAILQ_REMOVE(&queue->requests, request, entry); >> + >> + =A0 =A0 =A0 =A0ret =3D qemu_block_queue_handler(request); >> + =A0 =A0 =A0 =A0if (ret =3D=3D 0) { >> + =A0 =A0 =A0 =A0 =A0 =A0QTAILQ_INSERT_HEAD(&queue->requests, reques= t, entry); >> + =A0 =A0 =A0 =A0 =A0 =A0break; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0qemu_free(request); >> + =A0 =A0} >> +} >> + >> =A0void bdrv_register(BlockDriver *bdrv) >> =A0{ >> =A0 =A0 =A0if (!bdrv->bdrv_aio_readv) { >> @@ -642,6 +688,15 @@ int bdrv_open(BlockDriverState *bs, const char = *filename, int flags, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0bs->change_cb(bs->change_opaque, CHANGE_M= EDIA); >> =A0 =A0 =A0} >> >> + =A0 =A0/* throttling disk I/O limits */ >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0bs->block_queue =3D qemu_new_block_queue(); >> + =A0 =A0 =A0 =A0bs->block_timer =3D qemu_new_timer_ns(vm_clock, bdr= v_block_timer, bs); >> + >> + =A0 =A0 =A0 =A0bs->slice_start[0] =3D qemu_get_clock_ns(vm_clock); >> + =A0 =A0 =A0 =A0bs->slice_start[1] =3D qemu_get_clock_ns(vm_clock); >> + =A0 =A0} >> + > > It should be possible to tune the limits on the flight, please introd= uce > QMP commands for that. Yeah, I am working on this. > >> =A0 =A0 =A0return 0; >> >> =A0unlink_and_fail: >> @@ -680,6 +735,16 @@ void bdrv_close(BlockDriverState *bs) >> =A0 =A0 =A0 =A0 =A0if (bs->change_cb) >> =A0 =A0 =A0 =A0 =A0 =A0 =A0bs->change_cb(bs->change_opaque, CHANGE_M= EDIA); >> =A0 =A0 =A0} >> + >> + =A0 =A0/* throttling disk I/O limits */ >> + =A0 =A0if (bs->block_queue) { >> + =A0 =A0 =A0 =A0qemu_del_block_queue(bs->block_queue); >> + =A0 =A0} >> + >> + =A0 =A0if (bs->block_timer) { >> + =A0 =A0 =A0 =A0qemu_del_timer(bs->block_timer); >> + =A0 =A0 =A0 =A0qemu_free_timer(bs->block_timer); >> + =A0 =A0} >> =A0} >> >> =A0void bdrv_close_all(void) >> @@ -1312,6 +1377,14 @@ void bdrv_get_geometry_hint(BlockDriverState = *bs, >> =A0 =A0 =A0*psecs =3D bs->secs; >> =A0} >> >> +/* throttling disk io limits */ >> +void bdrv_set_io_limits(BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockIOLimit *io_l= imits) >> +{ >> + =A0 =A0memset(&bs->io_limits, 0, sizeof(BlockIOLimit)); >> + =A0 =A0bs->io_limits =3D *io_limits; >> +} >> + >> =A0/* Recognize floppy formats */ >> =A0typedef struct FDFormat { >> =A0 =A0 =A0FDriveType drive; >> @@ -2111,6 +2184,155 @@ char *bdrv_snapshot_dump(char *buf, int buf_= size, QEMUSnapshotInfo *sn) >> =A0 =A0 =A0return buf; >> =A0} >> >> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sec= tors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 bool is_write, double elapsed_time= , uint64_t *wait) { >> + =A0 =A0uint64_t bps_limit =3D 0; >> + =A0 =A0double =A0 bytes_limit, bytes_disp, bytes_res; >> + =A0 =A0double =A0 slice_time =3D 0.1, wait_time; >> + >> + =A0 =A0if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0bps_limit =3D bs->io_limits.bps[BLOCK_IO_LIMIT_TOTA= L]; >> + =A0 =A0} else if (bs->io_limits.bps[is_write]) { >> + =A0 =A0 =A0 =A0bps_limit =3D bs->io_limits.bps[is_write]; >> + =A0 =A0} else { >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0bytes_limit =A0 =A0 =A0=3D bps_limit * slice_time; >> + =A0 =A0bytes_disp =A0=3D bs->io_disps.bytes[is_write]; >> + =A0 =A0if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0bytes_disp +=3D bs->io_disps.bytes[!is_write]; >> + =A0 =A0} >> + >> + =A0 =A0bytes_res =A0 =3D (unsigned) nb_sectors * BDRV_SECTOR_SIZE; > > Virtio can submit requests of 512 sectors or more... does not play > well with 1MB/sec limit. Yeah, thanks for you catch. I will fix this. > >> + =A0 =A0if (bytes_disp + bytes_res <=3D bytes_limit) { >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0/* Calc approx time to dispatch */ >> + =A0 =A0wait_time =3D (bytes_disp + bytes_res - bytes_limit) / bps_= limit; >> + =A0 =A0if (!wait_time) { >> + =A0 =A0 =A0 =A0wait_time =3D 1; >> + =A0 =A0} >> + >> + =A0 =A0wait_time =3D wait_time + (slice_time - elapsed_time); >> + =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0*wait =3D wait_time * BLOCK_IO_SLICE_TIME * 10 + 1; >> + =A0 =A0} > > The guest can keep submitting requests where "wait_time =3D 1" above, > and the timer will be rearmed continuously in the future. Can't you > simply arm the timer to the next slice start? _Some_ data must be > transfered by then, anyway (and nothing can be transfered earlier tha= n > that). Sorry, i have got what you mean. Can you elaborate in more detail? > > Same for iops calculation below. > >> + >> + =A0 =A0return true; >> +} >> + >> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_w= rite, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 double ela= psed_time, uint64_t *wait) { >> + =A0 =A0uint64_t iops_limit =3D 0; >> + =A0 =A0double =A0 ios_limit, ios_disp; >> + =A0 =A0double =A0 slice_time =3D 0.1, wait_time; >> + >> + =A0 =A0if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0iops_limit =3D bs->io_limits.iops[BLOCK_IO_LIMIT_TO= TAL]; >> + =A0 =A0} else if (bs->io_limits.iops[is_write]) { >> + =A0 =A0 =A0 =A0iops_limit =3D bs->io_limits.iops[is_write]; >> + =A0 =A0} else { >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0ios_limit =3D iops_limit * slice_time; >> + =A0 =A0ios_disp =A0=3D bs->io_disps.ios[is_write]; >> + =A0 =A0if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0ios_disp +=3D bs->io_disps.ios[!is_write]; >> + =A0 =A0} >> + >> + =A0 =A0if (ios_disp + 1 <=3D ios_limit) { >> + =A0 =A0 if (wait) { >> + =A0 =A0 =A0 =A0 *wait =3D 0; >> + =A0 =A0 } >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0/* Calc approx time to dispatch */ >> + =A0 =A0wait_time =3D (ios_disp + 1) / iops_limit; >> + =A0 =A0if (wait_time > elapsed_time) { >> + =A0 =A0 wait_time =3D wait_time - elapsed_time; >> + =A0 =A0} else { >> + =A0 =A0 wait_time =3D 0; >> + =A0 =A0} >> + >> + =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0*wait =3D wait_time * BLOCK_IO_SLICE_TIME * 10 + 1; >> + =A0 =A0} >> + >> + =A0 =A0return true; >> +} >> + >> +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sect= ors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 bool is_write,= uint64_t *wait) { >> + =A0 =A0int64_t =A0real_time; >> + =A0 =A0uint64_t bps_wait =3D 0, iops_wait =3D 0, max_wait; >> + =A0 =A0double =A0 elapsed_time; >> + =A0 =A0int =A0 =A0 =A0bps_ret, iops_ret; >> + >> + =A0 =A0real_time =3D qemu_get_clock_ns(vm_clock); >> + =A0 =A0if (bs->slice_start[is_write] + BLOCK_IO_SLICE_TIME <=3D re= al_time) { >> + =A0 =A0 =A0 =A0bs->slice_start[is_write] =3D real_time; >> + >> + =A0 =A0 =A0 =A0bs->io_disps.bytes[is_write] =A0 =3D 0; >> + =A0 =A0 =A0 =A0bs->io_disps.bytes[!is_write] =A0=3D 0; >> + >> + =A0 =A0 =A0 =A0bs->io_disps.ios[is_write] =A0 =A0 =3D 0; >> + =A0 =A0 =A0 =A0bs->io_disps.ios[!is_write] =A0 =A0=3D 0; >> + =A0 =A0} >> + >> + =A0 =A0/* If a limit was exceeded, immediately queue this request = */ >> + =A0 =A0if ((bs->req_from_queue =3D=3D false) >> + =A0 =A0 =A0 =A0&& !QTAILQ_EMPTY(&bs->block_queue->requests)) { >> + =A0 =A0 =A0 =A0if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL] >> + =A0 =A0 =A0 =A0 =A0 =A0|| bs->io_limits.bps[is_write] || bs->io_li= mits.iops[is_write] >> + =A0 =A0 =A0 =A0 =A0 =A0|| bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= ) { >> + =A0 =A0 =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0 =A0 =A0return true; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> + =A0 =A0elapsed_time =A0=3D real_time - bs->slice_start[is_write]; >> + =A0 =A0elapsed_time =A0/=3D (BLOCK_IO_SLICE_TIME * 10.0); >> + >> + =A0 =A0bps_ret =A0=3D bdrv_exceed_bps_limits(bs, nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0= =A0 =A0is_write, elapsed_time, &bps_wait); >> + =A0 =A0iops_ret =3D bdrv_exceed_iops_limits(bs, is_write, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0= =A0 =A0elapsed_time, &iops_wait); >> + =A0 =A0if (bps_ret || iops_ret) { >> + =A0 =A0 =A0 =A0max_wait =3D bps_wait > iops_wait ? bps_wait : iops= _wait; >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D max_wait; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return true; >> + =A0 =A0} >> + >> + =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0} >> + >> + =A0 =A0return false; >> +} >> >> =A0/**************************************************************/ >> =A0/* async I/Os */ >> @@ -2121,13 +2343,28 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriver= State *bs, int64_t sector_num, >> =A0{ >> =A0 =A0 =A0BlockDriver *drv =3D bs->drv; >> =A0 =A0 =A0BlockDriverAIOCB *ret; >> + =A0 =A0uint64_t wait_time =3D 0; >> >> =A0 =A0 =A0trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); >> >> - =A0 =A0if (!drv) >> - =A0 =A0 =A0 =A0return NULL; >> - =A0 =A0if (bdrv_check_request(bs, sector_num, nb_sectors)) >> + =A0 =A0if (!drv || bdrv_check_request(bs, sector_num, nb_sectors))= { >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0} >> =A0 =A0 =A0 =A0 =A0return NULL; >> + =A0 =A0} >> + >> + =A0 =A0/* throttling disk read I/O */ >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0if (bdrv_exceed_io_limits(bs, nb_sectors, false, &w= ait_time)) { >> + =A0 =A0 =A0 =A0 =A0 =A0ret =3D qemu_block_queue_enqueue(bs->block_= queue, bs, bdrv_aio_readv, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0sector_= num, qiov, nb_sectors, cb, opaque); >> + =A0 =A0 =A0 =A0 =A0 =A0qemu_mod_timer(bs->block_timer, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 wait_time + qemu_get_c= lock_ns(vm_clock)); >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0 =A0 =A0return ret; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> >> =A0 =A0 =A0ret =3D drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sect= ors, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0cb, o= paque); >> @@ -2136,6 +2373,16 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverS= tate *bs, int64_t sector_num, >> =A0 =A0 =A0 /* Update stats even though technically transfer has not= happened. */ >> =A0 =A0 =A0 bs->rd_bytes +=3D (unsigned) nb_sectors * BDRV_SECTOR_SI= ZE; >> =A0 =A0 =A0 bs->rd_ops ++; >> + >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.bytes[BLOCK_IO_LIMIT_READ] +=3D >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsign= ed) nb_sectors * BDRV_SECTOR_SIZE; >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.ios[BLOCK_IO_LIMIT_READ]++; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> =A0 =A0 =A0} >> >> =A0 =A0 =A0return ret; >> @@ -2184,15 +2431,18 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDrive= rState *bs, int64_t sector_num, >> =A0 =A0 =A0BlockDriver *drv =3D bs->drv; >> =A0 =A0 =A0BlockDriverAIOCB *ret; >> =A0 =A0 =A0BlockCompleteData *blk_cb_data; >> + =A0 =A0uint64_t wait_time =3D 0; >> >> =A0 =A0 =A0trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque)= ; >> >> - =A0 =A0if (!drv) >> - =A0 =A0 =A0 =A0return NULL; >> - =A0 =A0if (bs->read_only) >> - =A0 =A0 =A0 =A0return NULL; >> - =A0 =A0if (bdrv_check_request(bs, sector_num, nb_sectors)) >> + =A0 =A0if (!drv || bs->read_only >> + =A0 =A0 =A0 =A0|| bdrv_check_request(bs, sector_num, nb_sectors)) = { >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0} >> + >> =A0 =A0 =A0 =A0 =A0return NULL; >> + =A0 =A0} >> >> =A0 =A0 =A0if (bs->dirty_bitmap) { >> =A0 =A0 =A0 =A0 =A0blk_cb_data =3D blk_dirty_cb_alloc(bs, sector_num= , nb_sectors, cb, >> @@ -2201,6 +2451,18 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriver= State *bs, int64_t sector_num, >> =A0 =A0 =A0 =A0 =A0opaque =3D blk_cb_data; >> =A0 =A0 =A0} >> >> + =A0 =A0/* throttling disk write I/O */ >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0if (bdrv_exceed_io_limits(bs, nb_sectors, true, &wa= it_time)) { >> + =A0 =A0 =A0 =A0 =A0 =A0ret =3D qemu_block_queue_enqueue(bs->block_= queue, bs, bdrv_aio_writev, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0= sector_num, qiov, nb_sectors, cb, opaque); >> + =A0 =A0 =A0 =A0 =A0 =A0qemu_mod_timer(bs->block_timer, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0= wait_time + qemu_get_clock_ns(vm_clock)); >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0 =A0 =A0return ret; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> =A0 =A0 =A0ret =3D drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sec= tors, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 cb, = opaque); >> >> @@ -2211,6 +2473,16 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriver= State *bs, int64_t sector_num, >> =A0 =A0 =A0 =A0 =A0if (bs->wr_highest_sector < sector_num + nb_secto= rs - 1) { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0bs->wr_highest_sector =3D sector_num + nb= _sectors - 1; >> =A0 =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.bytes[BLOCK_IO_LIMIT_WRITE] +=3D >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 (unsig= ned) nb_sectors * BDRV_SECTOR_SIZE; >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.ios[BLOCK_IO_LIMIT_WRITE]++; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> =A0 =A0 =A0} >> >> =A0 =A0 =A0return ret; >> diff --git a/block.h b/block.h >> index 859d1d9..f0dac62 100644 >> --- a/block.h >> +++ b/block.h >> @@ -97,7 +97,6 @@ int bdrv_change_backing_file(BlockDriverState *bs, >> =A0 =A0 =A0const char *backing_file, const char *backing_fmt); >> =A0void bdrv_register(BlockDriver *bdrv); >> >> - >> =A0typedef struct BdrvCheckResult { >> =A0 =A0 =A0int corruptions; >> =A0 =A0 =A0int leaks; >> diff --git a/block/blk-queue.c b/block/blk-queue.c >> new file mode 100644 >> index 0000000..09fcfe9 >> --- /dev/null >> +++ b/block/blk-queue.c >> @@ -0,0 +1,116 @@ >> +/* >> + * QEMU System Emulator queue definition for block layer >> + * >> + * Copyright (c) 2011 Zhi Yong Wu =A0 >> + * >> + * Permission is hereby granted, free of charge, to any person obta= ining a copy >> + * of this software and associated documentation files (the "Softwa= re"), to deal >> + * in the Software without restriction, including without limitatio= n the rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, an= d/or sell >> + * copies of the Software, and to permit persons to whom the Softwa= re is >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be i= ncluded in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, = EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANT= ABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVEN= T SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGE= S OR OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, = ARISING FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DE= ALINGS IN >> + * THE SOFTWARE. >> + */ >> + >> +#include "block_int.h" >> +#include "qemu-queue.h" >> +#include "block/blk-queue.h" >> + >> +/* The APIs for block request queue on qemu block layer. >> + */ >> + >> +static void qemu_block_queue_cancel(BlockDriverAIOCB *acb) >> +{ >> + =A0 =A0qemu_aio_release(acb); >> +} >> + >> +static AIOPool block_queue_pool =3D { >> + =A0 =A0.aiocb_size =A0 =A0 =A0 =A0 =3D sizeof(struct BlockDriverAI= OCB), >> + =A0 =A0.cancel =A0 =A0 =A0 =A0 =A0 =A0 =3D qemu_block_queue_cancel= , >> +}; >> + >> +static void qemu_block_queue_callback(void *opaque, int ret) >> +{ >> + =A0 =A0BlockDriverAIOCB *acb =3D opaque; >> + >> + =A0 =A0qemu_aio_release(acb); >> +} >> + >> +BlockQueue *qemu_new_block_queue(void) >> +{ >> + =A0 =A0BlockQueue *queue; >> + >> + =A0 =A0queue =3D qemu_mallocz(sizeof(BlockQueue)); >> + >> + =A0 =A0QTAILQ_INIT(&queue->requests); >> + >> + =A0 =A0return queue; >> +} >> + >> +void qemu_del_block_queue(BlockQueue *queue) >> +{ >> + =A0 =A0BlockIORequest *request, *next; >> + >> + =A0 =A0QTAILQ_FOREACH_SAFE(request, &queue->requests, entry, next)= { >> + =A0 =A0 =A0 =A0QTAILQ_REMOVE(&queue->requests, request, entry); >> + =A0 =A0 =A0 =A0qemu_free(request); >> + =A0 =A0} >> + >> + =A0 =A0qemu_free(queue); >> +} >> + >> +BlockDriverAIOCB *qemu_block_queue_enqueue(BlockQueue *queue, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockRequestHandler *handl= er, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int64_t sector_num, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 QEMUIOVector *qiov, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockDriverCompletionFunc = *cb, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void *opaque) >> +{ >> + =A0 =A0BlockIORequest *request; >> + =A0 =A0BlockDriverAIOCB *acb; >> + >> + =A0 =A0request =3D qemu_malloc(sizeof(BlockIORequest)); >> + =A0 =A0request->bs =3D bs; >> + =A0 =A0request->handler =3D handler; >> + =A0 =A0request->sector_num =3D sector_num; >> + =A0 =A0request->qiov =3D qiov; >> + =A0 =A0request->nb_sectors =3D nb_sectors; >> + =A0 =A0request->cb =3D cb; >> + =A0 =A0request->opaque =3D opaque; >> + >> + =A0 =A0QTAILQ_INSERT_TAIL(&queue->requests, request, entry); >> + >> + =A0 =A0acb =3D qemu_aio_get(&block_queue_pool, bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 qemu_block_queue_callb= ack, opaque); >> + >> + =A0 =A0return acb; >> +} >> + >> +int qemu_block_queue_handler(BlockIORequest *request) >> +{ >> + =A0 =A0int ret; >> + =A0 =A0BlockDriverAIOCB *res; >> + >> + =A0 =A0/* indicate this req is from block queue */ >> + =A0 =A0request->bs->req_from_queue =3D true; >> + >> + =A0 =A0res =3D request->handler(request->bs, request->sector_num, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 request->q= iov, request->nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 request->c= b, request->opaque); >> + >> + =A0 =A0ret =3D (res =3D=3D NULL) ? 0 : 1; >> + >> + =A0 =A0return ret; >> +} >> diff --git a/block/blk-queue.h b/block/blk-queue.h >> new file mode 100644 >> index 0000000..47f8a36 >> --- /dev/null >> +++ b/block/blk-queue.h >> @@ -0,0 +1,70 @@ >> +/* >> + * QEMU System Emulator queue declaration for block layer >> + * >> + * Copyright (c) 2011 Zhi Yong Wu =A0 >> + * >> + * Permission is hereby granted, free of charge, to any person obta= ining a copy >> + * of this software and associated documentation files (the "Softwa= re"), to deal >> + * in the Software without restriction, including without limitatio= n the rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, an= d/or sell >> + * copies of the Software, and to permit persons to whom the Softwa= re is >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be i= ncluded in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, = EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANT= ABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVEN= T SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGE= S OR OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, = ARISING FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DE= ALINGS IN >> + * THE SOFTWARE. >> + */ >> + >> +#ifndef QEMU_BLOCK_QUEUE_H >> +#define QEMU_BLOCK_QUEUE_H >> + >> +#include "block.h" >> +#include "qemu-queue.h" >> +#include "qemu-common.h" >> + >> +typedef BlockDriverAIOCB* (BlockRequestHandler) (BlockDriverState *= bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int= 64_t sector_num, QEMUIOVector *qiov, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int= nb_sectors, BlockDriverCompletionFunc *cb, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0voi= d *opaque); >> + >> +struct BlockIORequest { >> + =A0 =A0QTAILQ_ENTRY(BlockIORequest) entry; >> + =A0 =A0BlockDriverState *bs; >> + =A0 =A0BlockRequestHandler *handler; >> + =A0 =A0int64_t sector_num; >> + =A0 =A0QEMUIOVector *qiov; >> + =A0 =A0int nb_sectors; >> + =A0 =A0BlockDriverCompletionFunc *cb; >> + =A0 =A0void *opaque; >> +}; >> + >> +typedef struct BlockIORequest BlockIORequest; >> + >> +struct BlockQueue { >> + =A0 =A0QTAILQ_HEAD(requests, BlockIORequest) requests; >> +}; >> + >> +typedef struct BlockQueue BlockQueue; >> + >> +BlockQueue *qemu_new_block_queue(void); >> + >> +void qemu_del_block_queue(BlockQueue *queue); >> + >> +BlockDriverAIOCB *qemu_block_queue_enqueue(BlockQueue *queue, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockDriverState *b= s, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockRequestHandler= *handler, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int64_t sector_num, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0QEMUIOVector *qiov, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockDriverCompleti= onFunc *cb, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0void *opaque); >> + >> +int qemu_block_queue_handler(BlockIORequest *request); >> +#endif /* QEMU_BLOCK_QUEUE_H */ >> diff --git a/block_int.h b/block_int.h >> index 1e265d2..1587171 100644 >> --- a/block_int.h >> +++ b/block_int.h >> @@ -27,10 +27,17 @@ >> =A0#include "block.h" >> =A0#include "qemu-option.h" >> =A0#include "qemu-queue.h" >> +#include "block/blk-queue.h" >> >> =A0#define BLOCK_FLAG_ENCRYPT =A0 1 >> =A0#define BLOCK_FLAG_COMPAT6 =A0 4 >> >> +#define BLOCK_IO_LIMIT_READ =A0 =A0 0 >> +#define BLOCK_IO_LIMIT_WRITE =A0 =A01 >> +#define BLOCK_IO_LIMIT_TOTAL =A0 =A02 >> + >> +#define BLOCK_IO_SLICE_TIME =A0 =A0 100000000 >> + >> =A0#define BLOCK_OPT_SIZE =A0 =A0 =A0 =A0 =A0"size" >> =A0#define BLOCK_OPT_ENCRYPT =A0 =A0 =A0 "encryption" >> =A0#define BLOCK_OPT_COMPAT6 =A0 =A0 =A0 "compat6" >> @@ -46,6 +53,16 @@ typedef struct AIOPool { >> =A0 =A0 =A0BlockDriverAIOCB *free_aiocb; >> =A0} AIOPool; >> >> +typedef struct BlockIOLimit { >> + =A0 =A0uint64_t bps[3]; >> + =A0 =A0uint64_t iops[3]; >> +} BlockIOLimit; >> + >> +typedef struct BlockIODisp { >> + =A0 =A0uint64_t bytes[2]; >> + =A0 =A0uint64_t ios[2]; >> +} BlockIODisp; >> + >> =A0struct BlockDriver { >> =A0 =A0 =A0const char *format_name; >> =A0 =A0 =A0int instance_size; >> @@ -175,6 +192,14 @@ struct BlockDriverState { >> >> =A0 =A0 =A0void *sync_aiocb; >> >> + =A0 =A0/* the time for latest disk I/O */ >> + =A0 =A0int64_t slice_start[2]; >> + =A0 =A0BlockIOLimit io_limits; >> + =A0 =A0BlockIODisp =A0io_disps; >> + =A0 =A0BlockQueue =A0 *block_queue; >> + =A0 =A0QEMUTimer =A0 =A0*block_timer; >> + =A0 =A0bool =A0 =A0 =A0 =A0 req_from_queue; >> + >> =A0 =A0 =A0/* I/O stats (display with "info blockstats"). */ >> =A0 =A0 =A0uint64_t rd_bytes; >> =A0 =A0 =A0uint64_t wr_bytes; >> @@ -222,6 +247,9 @@ void qemu_aio_release(void *p); >> >> =A0void *qemu_blockalign(BlockDriverState *bs, size_t size); >> >> +void bdrv_set_io_limits(BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockIOLimi= t *io_limits); >> + >> =A0#ifdef _WIN32 >> =A0int is_windows_drive(const char *filename); >> =A0#endif >> diff --git a/blockdev.c b/blockdev.c >> index c263663..45602f4 100644 >> --- a/blockdev.c >> +++ b/blockdev.c >> @@ -238,6 +238,9 @@ DriveInfo *drive_init(QemuOpts *opts, int defaul= t_to_scsi) >> =A0 =A0 =A0int on_read_error, on_write_error; >> =A0 =A0 =A0const char *devaddr; >> =A0 =A0 =A0DriveInfo *dinfo; >> + =A0 =A0BlockIOLimit io_limits; >> + =A0 =A0bool iol_flag =3D false; >> + =A0 =A0const char *iol_opts[7] =3D {"bps", "bps_rd", "bps_wr", "io= ps", "iops_rd", "iops_wr"}; >> =A0 =A0 =A0int is_extboot =3D 0; >> =A0 =A0 =A0int snapshot =3D 0; >> =A0 =A0 =A0int ret; >> @@ -372,6 +375,19 @@ DriveInfo *drive_init(QemuOpts *opts, int defau= lt_to_scsi) >> =A0 =A0 =A0 =A0 =A0return NULL; >> =A0 =A0 =A0} >> >> + =A0 =A0/* disk io limits */ >> + =A0 =A0iol_flag =3D qemu_opt_io_limits_enable_flag(opts, iol_opts)= ; >> + =A0 =A0if (iol_flag) { >> + =A0 =A0 =A0 =A0memset(&io_limits, 0, sizeof(BlockIOLimit)); >> + >> + =A0 =A0 =A0 =A0io_limits.bps[2] =A0=3D qemu_opt_get_number(opts, "= bps", 0); >> + =A0 =A0 =A0 =A0io_limits.bps[0] =A0=3D qemu_opt_get_number(opts, "= bps_rd", 0); >> + =A0 =A0 =A0 =A0io_limits.bps[1] =A0=3D qemu_opt_get_number(opts, "= bps_wr", 0); >> + =A0 =A0 =A0 =A0io_limits.iops[2] =3D qemu_opt_get_number(opts, "io= ps", 0); >> + =A0 =A0 =A0 =A0io_limits.iops[0] =3D qemu_opt_get_number(opts, "io= ps_rd", 0); >> + =A0 =A0 =A0 =A0io_limits.iops[1] =3D qemu_opt_get_number(opts, "io= ps_wr", 0); >> + =A0 =A0} >> + >> =A0 =A0 =A0on_write_error =3D BLOCK_ERR_STOP_ENOSPC; >> =A0 =A0 =A0if ((buf =3D qemu_opt_get(opts, "werror")) !=3D NULL) { >> =A0 =A0 =A0 =A0 =A0if (type !=3D IF_IDE && type !=3D IF_SCSI && type= !=3D IF_VIRTIO && type !=3D IF_NONE) { >> @@ -483,6 +499,11 @@ DriveInfo *drive_init(QemuOpts *opts, int defau= lt_to_scsi) >> >> =A0 =A0 =A0bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_er= ror); >> >> + =A0 =A0/* throttling disk io limits */ >> + =A0 =A0if (iol_flag) { >> + =A0 =A0 =A0 =A0bdrv_set_io_limits(dinfo->bdrv, &io_limits); >> + =A0 =A0} >> + >> =A0 =A0 =A0switch(type) { >> =A0 =A0 =A0case IF_IDE: >> =A0 =A0 =A0case IF_SCSI: >> diff --git a/qemu-config.c b/qemu-config.c >> index efa892c..9232bbb 100644 >> --- a/qemu-config.c >> +++ b/qemu-config.c >> @@ -82,6 +82,30 @@ static QemuOptsList qemu_drive_opts =3D { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0.name =3D "boot", >> =A0 =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_BOOL, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0.help =3D "make this a boot drive", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "iops", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit total I/O operations per s= econd", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "iops_rd", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit read operations per second= ", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "iops_wr", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit write operations per secon= d", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "bps", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit total bytes per second", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "bps_rd", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit read bytes per second", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "bps_wr", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit write bytes per second", >> =A0 =A0 =A0 =A0 =A0}, >> =A0 =A0 =A0 =A0 =A0{ /* end of list */ } >> =A0 =A0 =A0}, >> diff --git a/qemu-option.c b/qemu-option.c >> index 65db542..9fe234d 100644 >> --- a/qemu-option.c >> +++ b/qemu-option.c >> @@ -562,6 +562,23 @@ uint64_t qemu_opt_get_number(QemuOpts *opts, co= nst char *name, uint64_t defval) >> =A0 =A0 =A0return opt->value.uint; >> =A0} >> >> +bool qemu_opt_io_limits_enable_flag(QemuOpts *opts, const char **io= l_opts) >> +{ >> + =A0 =A0 int i; >> + =A0 =A0 uint64_t opt_val =A0 =3D 0; >> + =A0 =A0 bool iol_flag =3D false; >> + >> + =A0 =A0 for (i =3D 0; iol_opts[i]; i++) { >> + =A0 =A0 =A0opt_val =3D qemu_opt_get_number(opts, iol_opts[i], 0); >> + =A0 =A0 =A0if (opt_val !=3D 0) { >> + =A0 =A0 =A0 =A0 =A0iol_flag =3D true; >> + =A0 =A0 =A0 =A0 =A0break; >> + =A0 =A0 =A0} >> + =A0 =A0 } >> + >> + =A0 =A0 return iol_flag; >> +} >> + >> =A0uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint= 64_t defval) >> =A0{ >> =A0 =A0 =A0QemuOpt *opt =3D qemu_opt_find(opts, name); >> diff --git a/qemu-option.h b/qemu-option.h >> index b515813..fc909f9 100644 >> --- a/qemu-option.h >> +++ b/qemu-option.h >> @@ -107,6 +107,7 @@ struct QemuOptsList { >> =A0const char *qemu_opt_get(QemuOpts *opts, const char *name); >> =A0int qemu_opt_get_bool(QemuOpts *opts, const char *name, int defva= l); >> =A0uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, ui= nt64_t defval); >> +bool qemu_opt_io_limits_enable_flag(QemuOpts *opts, const char **io= l_opts); >> =A0uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint= 64_t defval); >> =A0int qemu_opt_set(QemuOpts *opts, const char *name, const char *va= lue); >> =A0typedef int (*qemu_opt_loopfunc)(const char *name, const char *va= lue, void *opaque); >> diff --git a/qemu-options.hx b/qemu-options.hx >> index cb3347e..ae219f5 100644 >> --- a/qemu-options.hx >> +++ b/qemu-options.hx >> @@ -121,6 +121,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, >> =A0 =A0 =A0" =A0 =A0 =A0 [,cache=3Dwritethrough|writeback|none|unsaf= e][,format=3Df]\n" >> =A0 =A0 =A0" =A0 =A0 =A0 [,serial=3Ds][,addr=3DA][,id=3Dname][,aio=3D= threads|native]\n" >> =A0 =A0 =A0" =A0 =A0 =A0 [,readonly=3Don|off][,boot=3Don|off]\n" >> + =A0 =A0" =A0 =A0 =A0 [[,bps=3Db]|[[,bps_rd=3Dr][,bps_wr=3Dw]]][[,i= ops=3Di]|[[,iops_rd=3Dr][,iops_wr=3Dw]]\n" >> =A0 =A0 =A0" =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0use 'file' as a drive im= age\n", QEMU_ARCH_ALL) >> =A0STEXI >> =A0@item -drive @var{option}[,@var{option}[,@var{option}[,...]]] >> -- >> 1.7.2.3 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html > --=20 Regards, Zhi Yong Wu From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([140.186.70.92]:48560) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Qm1B3-00085s-3O for qemu-devel@nongnu.org; Wed, 27 Jul 2011 06:17:23 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Qm1B0-0002wO-8g for qemu-devel@nongnu.org; Wed, 27 Jul 2011 06:17:21 -0400 Received: from mail-gw0-f45.google.com ([74.125.83.45]:59444) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Qm1Az-0002wF-Q6 for qemu-devel@nongnu.org; Wed, 27 Jul 2011 06:17:18 -0400 Received: by gwb19 with SMTP id 19so1102353gwb.4 for ; Wed, 27 Jul 2011 03:17:17 -0700 (PDT) MIME-Version: 1.0 In-Reply-To: <20110726192618.GA8126@amt.cnet> References: <1311670746-20498-1-git-send-email-wuzhy@linux.vnet.ibm.com> <1311670746-20498-2-git-send-email-wuzhy@linux.vnet.ibm.com> <20110726192618.GA8126@amt.cnet> Date: Wed, 27 Jul 2011 18:17:15 +0800 Message-ID: From: Zhi Yong Wu Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v2 1/1] The codes V2 for QEMU disk I/O limits. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Marcelo Tosatti Cc: kwolf@redhat.com, aliguori@us.ibm.com, stefanha@linux.vnet.ibm.com, kvm@vger.kernel.org, Zhi Yong Wu , qemu-devel@nongnu.org, ryanh@us.ibm.com, vgoyal@redhat.com On Wed, Jul 27, 2011 at 3:26 AM, Marcelo Tosatti wrot= e: > On Tue, Jul 26, 2011 at 04:59:06PM +0800, Zhi Yong Wu wrote: >> Welcome to give me your comments, thanks. >> >> Signed-off-by: Zhi Yong Wu >> --- >> =A0Makefile.objs =A0 =A0 | =A0 =A02 +- >> =A0block.c =A0 =A0 =A0 =A0 =A0 | =A0288 ++++++++++++++++++++++++++++++++= +++++++++++++++++++-- >> =A0block.h =A0 =A0 =A0 =A0 =A0 | =A0 =A01 - >> =A0block/blk-queue.c | =A0116 +++++++++++++++++++++ >> =A0block/blk-queue.h | =A0 70 +++++++++++++ >> =A0block_int.h =A0 =A0 =A0 | =A0 28 +++++ >> =A0blockdev.c =A0 =A0 =A0 =A0| =A0 21 ++++ >> =A0qemu-config.c =A0 =A0 | =A0 24 +++++ >> =A0qemu-option.c =A0 =A0 | =A0 17 +++ >> =A0qemu-option.h =A0 =A0 | =A0 =A01 + >> =A0qemu-options.hx =A0 | =A0 =A01 + >> =A011 files changed, 559 insertions(+), 10 deletions(-) >> =A0create mode 100644 block/blk-queue.c >> =A0create mode 100644 block/blk-queue.h >> >> diff --git a/Makefile.objs b/Makefile.objs >> index 9f99ed4..06f2033 100644 >> --- a/Makefile.objs >> +++ b/Makefile.objs >> @@ -23,7 +23,7 @@ block-nested-y +=3D raw.o cow.o qcow.o vdi.o vmdk.o cl= oop.o dmg.o bochs.o vpc.o vv >> =A0block-nested-y +=3D qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-sn= apshot.o qcow2-cache.o >> =A0block-nested-y +=3D qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-= cluster.o >> =A0block-nested-y +=3D qed-check.o >> -block-nested-y +=3D parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o >> +block-nested-y +=3D parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o= blk-queue.o >> =A0block-nested-$(CONFIG_WIN32) +=3D raw-win32.o >> =A0block-nested-$(CONFIG_POSIX) +=3D raw-posix.o >> =A0block-nested-$(CONFIG_CURL) +=3D curl.o >> diff --git a/block.c b/block.c >> index 24a25d5..e54e59c 100644 >> --- a/block.c >> +++ b/block.c >> @@ -29,6 +29,9 @@ >> =A0#include "module.h" >> =A0#include "qemu-objects.h" >> >> +#include "qemu-timer.h" >> +#include "block/blk-queue.h" >> + >> =A0#ifdef CONFIG_BSD >> =A0#include >> =A0#include >> @@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t= sector_num, >> =A0static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 const uint8_t *buf, = int nb_sectors); >> >> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors= , >> + =A0 =A0 =A0 =A0bool is_write, double elapsed_time, uint64_t *wait); >> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write= , >> + =A0 =A0 =A0 =A0double elapsed_time, uint64_t *wait); >> +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, >> + =A0 =A0 =A0 =A0bool is_write, uint64_t *wait); >> + >> =A0static QTAILQ_HEAD(, BlockDriverState) bdrv_states =3D >> =A0 =A0 =A0QTAILQ_HEAD_INITIALIZER(bdrv_states); >> >> @@ -90,6 +100,20 @@ int is_windows_drive(const char *filename) >> =A0} >> =A0#endif >> >> +static int bdrv_io_limits_enable(BlockIOLimit *io_limits) >> +{ >> + =A0 =A0if ((io_limits->bps[0] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->bps[1] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->bps[2] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->iops[0] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->iops[1] =3D=3D 0) >> + =A0 =A0 =A0 =A0 && (io_limits->iops[2] =3D=3D 0)) { >> + =A0 =A0 =A0 =A0return 0; >> + =A0 =A0} >> + >> + =A0 =A0return 1; >> +} >> + >> =A0/* check if the path starts with ":" */ >> =A0static int path_has_protocol(const char *path) >> =A0{ >> @@ -167,6 +191,28 @@ void path_combine(char *dest, int dest_size, >> =A0 =A0 =A0} >> =A0} >> >> +static void bdrv_block_timer(void *opaque) >> +{ >> + =A0 =A0BlockDriverState *bs =3D opaque; >> + =A0 =A0BlockQueue *queue =3D bs->block_queue; >> + >> + =A0 =A0while (!QTAILQ_EMPTY(&queue->requests)) { >> + =A0 =A0 =A0 =A0BlockIORequest *request; >> + =A0 =A0 =A0 =A0int ret; >> + >> + =A0 =A0 =A0 =A0request =3D QTAILQ_FIRST(&queue->requests); >> + =A0 =A0 =A0 =A0QTAILQ_REMOVE(&queue->requests, request, entry); >> + >> + =A0 =A0 =A0 =A0ret =3D qemu_block_queue_handler(request); >> + =A0 =A0 =A0 =A0if (ret =3D=3D 0) { >> + =A0 =A0 =A0 =A0 =A0 =A0QTAILQ_INSERT_HEAD(&queue->requests, request, e= ntry); >> + =A0 =A0 =A0 =A0 =A0 =A0break; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0qemu_free(request); >> + =A0 =A0} >> +} >> + >> =A0void bdrv_register(BlockDriver *bdrv) >> =A0{ >> =A0 =A0 =A0if (!bdrv->bdrv_aio_readv) { >> @@ -642,6 +688,15 @@ int bdrv_open(BlockDriverState *bs, const char *fil= ename, int flags, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0bs->change_cb(bs->change_opaque, CHANGE_MEDIA= ); >> =A0 =A0 =A0} >> >> + =A0 =A0/* throttling disk I/O limits */ >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0bs->block_queue =3D qemu_new_block_queue(); >> + =A0 =A0 =A0 =A0bs->block_timer =3D qemu_new_timer_ns(vm_clock, bdrv_bl= ock_timer, bs); >> + >> + =A0 =A0 =A0 =A0bs->slice_start[0] =3D qemu_get_clock_ns(vm_clock); >> + =A0 =A0 =A0 =A0bs->slice_start[1] =3D qemu_get_clock_ns(vm_clock); >> + =A0 =A0} >> + > > It should be possible to tune the limits on the flight, please introduce > QMP commands for that. Yeah, I am working on this. > >> =A0 =A0 =A0return 0; >> >> =A0unlink_and_fail: >> @@ -680,6 +735,16 @@ void bdrv_close(BlockDriverState *bs) >> =A0 =A0 =A0 =A0 =A0if (bs->change_cb) >> =A0 =A0 =A0 =A0 =A0 =A0 =A0bs->change_cb(bs->change_opaque, CHANGE_MEDIA= ); >> =A0 =A0 =A0} >> + >> + =A0 =A0/* throttling disk I/O limits */ >> + =A0 =A0if (bs->block_queue) { >> + =A0 =A0 =A0 =A0qemu_del_block_queue(bs->block_queue); >> + =A0 =A0} >> + >> + =A0 =A0if (bs->block_timer) { >> + =A0 =A0 =A0 =A0qemu_del_timer(bs->block_timer); >> + =A0 =A0 =A0 =A0qemu_free_timer(bs->block_timer); >> + =A0 =A0} >> =A0} >> >> =A0void bdrv_close_all(void) >> @@ -1312,6 +1377,14 @@ void bdrv_get_geometry_hint(BlockDriverState *bs, >> =A0 =A0 =A0*psecs =3D bs->secs; >> =A0} >> >> +/* throttling disk io limits */ >> +void bdrv_set_io_limits(BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockIOLimit *io_limit= s) >> +{ >> + =A0 =A0memset(&bs->io_limits, 0, sizeof(BlockIOLimit)); >> + =A0 =A0bs->io_limits =3D *io_limits; >> +} >> + >> =A0/* Recognize floppy formats */ >> =A0typedef struct FDFormat { >> =A0 =A0 =A0FDriveType drive; >> @@ -2111,6 +2184,155 @@ char *bdrv_snapshot_dump(char *buf, int buf_size= , QEMUSnapshotInfo *sn) >> =A0 =A0 =A0return buf; >> =A0} >> >> +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors= , >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 bool is_write, double elapsed_time, ui= nt64_t *wait) { >> + =A0 =A0uint64_t bps_limit =3D 0; >> + =A0 =A0double =A0 bytes_limit, bytes_disp, bytes_res; >> + =A0 =A0double =A0 slice_time =3D 0.1, wait_time; >> + >> + =A0 =A0if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0bps_limit =3D bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; >> + =A0 =A0} else if (bs->io_limits.bps[is_write]) { >> + =A0 =A0 =A0 =A0bps_limit =3D bs->io_limits.bps[is_write]; >> + =A0 =A0} else { >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0bytes_limit =A0 =A0 =A0=3D bps_limit * slice_time; >> + =A0 =A0bytes_disp =A0=3D bs->io_disps.bytes[is_write]; >> + =A0 =A0if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0bytes_disp +=3D bs->io_disps.bytes[!is_write]; >> + =A0 =A0} >> + >> + =A0 =A0bytes_res =A0 =3D (unsigned) nb_sectors * BDRV_SECTOR_SIZE; > > Virtio can submit requests of 512 sectors or more... does not play > well with 1MB/sec limit. Yeah, thanks for you catch. I will fix this. > >> + =A0 =A0if (bytes_disp + bytes_res <=3D bytes_limit) { >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0/* Calc approx time to dispatch */ >> + =A0 =A0wait_time =3D (bytes_disp + bytes_res - bytes_limit) / bps_limi= t; >> + =A0 =A0if (!wait_time) { >> + =A0 =A0 =A0 =A0wait_time =3D 1; >> + =A0 =A0} >> + >> + =A0 =A0wait_time =3D wait_time + (slice_time - elapsed_time); >> + =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0*wait =3D wait_time * BLOCK_IO_SLICE_TIME * 10 + 1; >> + =A0 =A0} > > The guest can keep submitting requests where "wait_time =3D 1" above, > and the timer will be rearmed continuously in the future. Can't you > simply arm the timer to the next slice start? _Some_ data must be > transfered by then, anyway (and nothing can be transfered earlier than > that). Sorry, i have got what you mean. Can you elaborate in more detail? > > Same for iops calculation below. > >> + >> + =A0 =A0return true; >> +} >> + >> +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write= , >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 double elapsed= _time, uint64_t *wait) { >> + =A0 =A0uint64_t iops_limit =3D 0; >> + =A0 =A0double =A0 ios_limit, ios_disp; >> + =A0 =A0double =A0 slice_time =3D 0.1, wait_time; >> + >> + =A0 =A0if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0iops_limit =3D bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= ; >> + =A0 =A0} else if (bs->io_limits.iops[is_write]) { >> + =A0 =A0 =A0 =A0iops_limit =3D bs->io_limits.iops[is_write]; >> + =A0 =A0} else { >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0ios_limit =3D iops_limit * slice_time; >> + =A0 =A0ios_disp =A0=3D bs->io_disps.ios[is_write]; >> + =A0 =A0if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0ios_disp +=3D bs->io_disps.ios[!is_write]; >> + =A0 =A0} >> + >> + =A0 =A0if (ios_disp + 1 <=3D ios_limit) { >> + =A0 =A0 if (wait) { >> + =A0 =A0 =A0 =A0 *wait =3D 0; >> + =A0 =A0 } >> + >> + =A0 =A0 =A0 =A0return false; >> + =A0 =A0} >> + >> + =A0 =A0/* Calc approx time to dispatch */ >> + =A0 =A0wait_time =3D (ios_disp + 1) / iops_limit; >> + =A0 =A0if (wait_time > elapsed_time) { >> + =A0 =A0 wait_time =3D wait_time - elapsed_time; >> + =A0 =A0} else { >> + =A0 =A0 wait_time =3D 0; >> + =A0 =A0} >> + >> + =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0*wait =3D wait_time * BLOCK_IO_SLICE_TIME * 10 + 1; >> + =A0 =A0} >> + >> + =A0 =A0return true; >> +} >> + >> +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 bool is_write, uin= t64_t *wait) { >> + =A0 =A0int64_t =A0real_time; >> + =A0 =A0uint64_t bps_wait =3D 0, iops_wait =3D 0, max_wait; >> + =A0 =A0double =A0 elapsed_time; >> + =A0 =A0int =A0 =A0 =A0bps_ret, iops_ret; >> + >> + =A0 =A0real_time =3D qemu_get_clock_ns(vm_clock); >> + =A0 =A0if (bs->slice_start[is_write] + BLOCK_IO_SLICE_TIME <=3D real_t= ime) { >> + =A0 =A0 =A0 =A0bs->slice_start[is_write] =3D real_time; >> + >> + =A0 =A0 =A0 =A0bs->io_disps.bytes[is_write] =A0 =3D 0; >> + =A0 =A0 =A0 =A0bs->io_disps.bytes[!is_write] =A0=3D 0; >> + >> + =A0 =A0 =A0 =A0bs->io_disps.ios[is_write] =A0 =A0 =3D 0; >> + =A0 =A0 =A0 =A0bs->io_disps.ios[!is_write] =A0 =A0=3D 0; >> + =A0 =A0} >> + >> + =A0 =A0/* If a limit was exceeded, immediately queue this request */ >> + =A0 =A0if ((bs->req_from_queue =3D=3D false) >> + =A0 =A0 =A0 =A0&& !QTAILQ_EMPTY(&bs->block_queue->requests)) { >> + =A0 =A0 =A0 =A0if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL] >> + =A0 =A0 =A0 =A0 =A0 =A0|| bs->io_limits.bps[is_write] || bs->io_limits= .iops[is_write] >> + =A0 =A0 =A0 =A0 =A0 =A0|| bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { >> + =A0 =A0 =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0 =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0 =A0 =A0return true; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> + =A0 =A0elapsed_time =A0=3D real_time - bs->slice_start[is_write]; >> + =A0 =A0elapsed_time =A0/=3D (BLOCK_IO_SLICE_TIME * 10.0); >> + >> + =A0 =A0bps_ret =A0=3D bdrv_exceed_bps_limits(bs, nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0is_write, elapsed_time, &bps_wait); >> + =A0 =A0iops_ret =3D bdrv_exceed_iops_limits(bs, is_write, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0elapsed_time, &iops_wait); >> + =A0 =A0if (bps_ret || iops_ret) { >> + =A0 =A0 =A0 =A0max_wait =3D bps_wait > iops_wait ? bps_wait : iops_wai= t; >> + =A0 =A0 =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0 =A0 =A0*wait =3D max_wait; >> + =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0return true; >> + =A0 =A0} >> + >> + =A0 =A0if (wait) { >> + =A0 =A0 =A0 =A0*wait =3D 0; >> + =A0 =A0} >> + >> + =A0 =A0return false; >> +} >> >> =A0/**************************************************************/ >> =A0/* async I/Os */ >> @@ -2121,13 +2343,28 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverStat= e *bs, int64_t sector_num, >> =A0{ >> =A0 =A0 =A0BlockDriver *drv =3D bs->drv; >> =A0 =A0 =A0BlockDriverAIOCB *ret; >> + =A0 =A0uint64_t wait_time =3D 0; >> >> =A0 =A0 =A0trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); >> >> - =A0 =A0if (!drv) >> - =A0 =A0 =A0 =A0return NULL; >> - =A0 =A0if (bdrv_check_request(bs, sector_num, nb_sectors)) >> + =A0 =A0if (!drv || bdrv_check_request(bs, sector_num, nb_sectors)) { >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0} >> =A0 =A0 =A0 =A0 =A0return NULL; >> + =A0 =A0} >> + >> + =A0 =A0/* throttling disk read I/O */ >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0if (bdrv_exceed_io_limits(bs, nb_sectors, false, &wait_= time)) { >> + =A0 =A0 =A0 =A0 =A0 =A0ret =3D qemu_block_queue_enqueue(bs->block_queu= e, bs, bdrv_aio_readv, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0sector_num,= qiov, nb_sectors, cb, opaque); >> + =A0 =A0 =A0 =A0 =A0 =A0qemu_mod_timer(bs->block_timer, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 wait_time + qemu_get_clock= _ns(vm_clock)); >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0 =A0 =A0return ret; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> >> =A0 =A0 =A0ret =3D drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0cb, opaqu= e); >> @@ -2136,6 +2373,16 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState= *bs, int64_t sector_num, >> =A0 =A0 =A0 /* Update stats even though technically transfer has not hap= pened. */ >> =A0 =A0 =A0 bs->rd_bytes +=3D (unsigned) nb_sectors * BDRV_SECTOR_SIZE; >> =A0 =A0 =A0 bs->rd_ops ++; >> + >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.bytes[BLOCK_IO_LIMIT_READ] +=3D >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned) = nb_sectors * BDRV_SECTOR_SIZE; >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.ios[BLOCK_IO_LIMIT_READ]++; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> =A0 =A0 =A0} >> >> =A0 =A0 =A0return ret; >> @@ -2184,15 +2431,18 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverSta= te *bs, int64_t sector_num, >> =A0 =A0 =A0BlockDriver *drv =3D bs->drv; >> =A0 =A0 =A0BlockDriverAIOCB *ret; >> =A0 =A0 =A0BlockCompleteData *blk_cb_data; >> + =A0 =A0uint64_t wait_time =3D 0; >> >> =A0 =A0 =A0trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); >> >> - =A0 =A0if (!drv) >> - =A0 =A0 =A0 =A0return NULL; >> - =A0 =A0if (bs->read_only) >> - =A0 =A0 =A0 =A0return NULL; >> - =A0 =A0if (bdrv_check_request(bs, sector_num, nb_sectors)) >> + =A0 =A0if (!drv || bs->read_only >> + =A0 =A0 =A0 =A0|| bdrv_check_request(bs, sector_num, nb_sectors)) { >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0} >> + >> =A0 =A0 =A0 =A0 =A0return NULL; >> + =A0 =A0} >> >> =A0 =A0 =A0if (bs->dirty_bitmap) { >> =A0 =A0 =A0 =A0 =A0blk_cb_data =3D blk_dirty_cb_alloc(bs, sector_num, nb= _sectors, cb, >> @@ -2201,6 +2451,18 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverStat= e *bs, int64_t sector_num, >> =A0 =A0 =A0 =A0 =A0opaque =3D blk_cb_data; >> =A0 =A0 =A0} >> >> + =A0 =A0/* throttling disk write I/O */ >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0if (bdrv_exceed_io_limits(bs, nb_sectors, true, &wait_t= ime)) { >> + =A0 =A0 =A0 =A0 =A0 =A0ret =3D qemu_block_queue_enqueue(bs->block_queu= e, bs, bdrv_aio_writev, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0sec= tor_num, qiov, nb_sectors, cb, opaque); >> + =A0 =A0 =A0 =A0 =A0 =A0qemu_mod_timer(bs->block_timer, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0wai= t_time + qemu_get_clock_ns(vm_clock)); >> + =A0 =A0 =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> + =A0 =A0 =A0 =A0 =A0 =A0return ret; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> =A0 =A0 =A0ret =3D drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors= , >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 cb, opaq= ue); >> >> @@ -2211,6 +2473,16 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverStat= e *bs, int64_t sector_num, >> =A0 =A0 =A0 =A0 =A0if (bs->wr_highest_sector < sector_num + nb_sectors -= 1) { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0bs->wr_highest_sector =3D sector_num + nb_sec= tors - 1; >> =A0 =A0 =A0 =A0 =A0} >> + >> + =A0 =A0 =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.bytes[BLOCK_IO_LIMIT_WRITE] +=3D >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 (unsigned)= nb_sectors * BDRV_SECTOR_SIZE; >> + =A0 =A0 =A0 =A0 =A0 =A0bs->io_disps.ios[BLOCK_IO_LIMIT_WRITE]++; >> + =A0 =A0 =A0 =A0} >> + =A0 =A0} >> + >> + =A0 =A0if (bdrv_io_limits_enable(&bs->io_limits)) { >> + =A0 =A0 =A0 =A0bs->req_from_queue =3D false; >> =A0 =A0 =A0} >> >> =A0 =A0 =A0return ret; >> diff --git a/block.h b/block.h >> index 859d1d9..f0dac62 100644 >> --- a/block.h >> +++ b/block.h >> @@ -97,7 +97,6 @@ int bdrv_change_backing_file(BlockDriverState *bs, >> =A0 =A0 =A0const char *backing_file, const char *backing_fmt); >> =A0void bdrv_register(BlockDriver *bdrv); >> >> - >> =A0typedef struct BdrvCheckResult { >> =A0 =A0 =A0int corruptions; >> =A0 =A0 =A0int leaks; >> diff --git a/block/blk-queue.c b/block/blk-queue.c >> new file mode 100644 >> index 0000000..09fcfe9 >> --- /dev/null >> +++ b/block/blk-queue.c >> @@ -0,0 +1,116 @@ >> +/* >> + * QEMU System Emulator queue definition for block layer >> + * >> + * Copyright (c) 2011 Zhi Yong Wu =A0 >> + * >> + * Permission is hereby granted, free of charge, to any person obtainin= g a copy >> + * of this software and associated documentation files (the "Software")= , to deal >> + * in the Software without restriction, including without limitation th= e rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or= sell >> + * copies of the Software, and to permit persons to whom the Software i= s >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be inclu= ded in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPR= ESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL= ITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SH= ALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR= OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARIS= ING FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALIN= GS IN >> + * THE SOFTWARE. >> + */ >> + >> +#include "block_int.h" >> +#include "qemu-queue.h" >> +#include "block/blk-queue.h" >> + >> +/* The APIs for block request queue on qemu block layer. >> + */ >> + >> +static void qemu_block_queue_cancel(BlockDriverAIOCB *acb) >> +{ >> + =A0 =A0qemu_aio_release(acb); >> +} >> + >> +static AIOPool block_queue_pool =3D { >> + =A0 =A0.aiocb_size =A0 =A0 =A0 =A0 =3D sizeof(struct BlockDriverAIOCB)= , >> + =A0 =A0.cancel =A0 =A0 =A0 =A0 =A0 =A0 =3D qemu_block_queue_cancel, >> +}; >> + >> +static void qemu_block_queue_callback(void *opaque, int ret) >> +{ >> + =A0 =A0BlockDriverAIOCB *acb =3D opaque; >> + >> + =A0 =A0qemu_aio_release(acb); >> +} >> + >> +BlockQueue *qemu_new_block_queue(void) >> +{ >> + =A0 =A0BlockQueue *queue; >> + >> + =A0 =A0queue =3D qemu_mallocz(sizeof(BlockQueue)); >> + >> + =A0 =A0QTAILQ_INIT(&queue->requests); >> + >> + =A0 =A0return queue; >> +} >> + >> +void qemu_del_block_queue(BlockQueue *queue) >> +{ >> + =A0 =A0BlockIORequest *request, *next; >> + >> + =A0 =A0QTAILQ_FOREACH_SAFE(request, &queue->requests, entry, next) { >> + =A0 =A0 =A0 =A0QTAILQ_REMOVE(&queue->requests, request, entry); >> + =A0 =A0 =A0 =A0qemu_free(request); >> + =A0 =A0} >> + >> + =A0 =A0qemu_free(queue); >> +} >> + >> +BlockDriverAIOCB *qemu_block_queue_enqueue(BlockQueue *queue, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockRequestHandler *handler, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int64_t sector_num, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 QEMUIOVector *qiov, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BlockDriverCompletionFunc *cb, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void *opaque) >> +{ >> + =A0 =A0BlockIORequest *request; >> + =A0 =A0BlockDriverAIOCB *acb; >> + >> + =A0 =A0request =3D qemu_malloc(sizeof(BlockIORequest)); >> + =A0 =A0request->bs =3D bs; >> + =A0 =A0request->handler =3D handler; >> + =A0 =A0request->sector_num =3D sector_num; >> + =A0 =A0request->qiov =3D qiov; >> + =A0 =A0request->nb_sectors =3D nb_sectors; >> + =A0 =A0request->cb =3D cb; >> + =A0 =A0request->opaque =3D opaque; >> + >> + =A0 =A0QTAILQ_INSERT_TAIL(&queue->requests, request, entry); >> + >> + =A0 =A0acb =3D qemu_aio_get(&block_queue_pool, bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 qemu_block_queue_callback,= opaque); >> + >> + =A0 =A0return acb; >> +} >> + >> +int qemu_block_queue_handler(BlockIORequest *request) >> +{ >> + =A0 =A0int ret; >> + =A0 =A0BlockDriverAIOCB *res; >> + >> + =A0 =A0/* indicate this req is from block queue */ >> + =A0 =A0request->bs->req_from_queue =3D true; >> + >> + =A0 =A0res =3D request->handler(request->bs, request->sector_num, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 request->qiov,= request->nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 request->cb, r= equest->opaque); >> + >> + =A0 =A0ret =3D (res =3D=3D NULL) ? 0 : 1; >> + >> + =A0 =A0return ret; >> +} >> diff --git a/block/blk-queue.h b/block/blk-queue.h >> new file mode 100644 >> index 0000000..47f8a36 >> --- /dev/null >> +++ b/block/blk-queue.h >> @@ -0,0 +1,70 @@ >> +/* >> + * QEMU System Emulator queue declaration for block layer >> + * >> + * Copyright (c) 2011 Zhi Yong Wu =A0 >> + * >> + * Permission is hereby granted, free of charge, to any person obtainin= g a copy >> + * of this software and associated documentation files (the "Software")= , to deal >> + * in the Software without restriction, including without limitation th= e rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or= sell >> + * copies of the Software, and to permit persons to whom the Software i= s >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be inclu= ded in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPR= ESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL= ITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SH= ALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR= OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARIS= ING FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALIN= GS IN >> + * THE SOFTWARE. >> + */ >> + >> +#ifndef QEMU_BLOCK_QUEUE_H >> +#define QEMU_BLOCK_QUEUE_H >> + >> +#include "block.h" >> +#include "qemu-queue.h" >> +#include "qemu-common.h" >> + >> +typedef BlockDriverAIOCB* (BlockRequestHandler) (BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int64_t= sector_num, QEMUIOVector *qiov, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int nb_= sectors, BlockDriverCompletionFunc *cb, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0void *o= paque); >> + >> +struct BlockIORequest { >> + =A0 =A0QTAILQ_ENTRY(BlockIORequest) entry; >> + =A0 =A0BlockDriverState *bs; >> + =A0 =A0BlockRequestHandler *handler; >> + =A0 =A0int64_t sector_num; >> + =A0 =A0QEMUIOVector *qiov; >> + =A0 =A0int nb_sectors; >> + =A0 =A0BlockDriverCompletionFunc *cb; >> + =A0 =A0void *opaque; >> +}; >> + >> +typedef struct BlockIORequest BlockIORequest; >> + >> +struct BlockQueue { >> + =A0 =A0QTAILQ_HEAD(requests, BlockIORequest) requests; >> +}; >> + >> +typedef struct BlockQueue BlockQueue; >> + >> +BlockQueue *qemu_new_block_queue(void); >> + >> +void qemu_del_block_queue(BlockQueue *queue); >> + >> +BlockDriverAIOCB *qemu_block_queue_enqueue(BlockQueue *queue, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockRequestHandler *ha= ndler, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int64_t sector_num, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0QEMUIOVector *qiov, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0int nb_sectors, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockDriverCompletionFu= nc *cb, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0void *opaque); >> + >> +int qemu_block_queue_handler(BlockIORequest *request); >> +#endif /* QEMU_BLOCK_QUEUE_H */ >> diff --git a/block_int.h b/block_int.h >> index 1e265d2..1587171 100644 >> --- a/block_int.h >> +++ b/block_int.h >> @@ -27,10 +27,17 @@ >> =A0#include "block.h" >> =A0#include "qemu-option.h" >> =A0#include "qemu-queue.h" >> +#include "block/blk-queue.h" >> >> =A0#define BLOCK_FLAG_ENCRYPT =A0 1 >> =A0#define BLOCK_FLAG_COMPAT6 =A0 4 >> >> +#define BLOCK_IO_LIMIT_READ =A0 =A0 0 >> +#define BLOCK_IO_LIMIT_WRITE =A0 =A01 >> +#define BLOCK_IO_LIMIT_TOTAL =A0 =A02 >> + >> +#define BLOCK_IO_SLICE_TIME =A0 =A0 100000000 >> + >> =A0#define BLOCK_OPT_SIZE =A0 =A0 =A0 =A0 =A0"size" >> =A0#define BLOCK_OPT_ENCRYPT =A0 =A0 =A0 "encryption" >> =A0#define BLOCK_OPT_COMPAT6 =A0 =A0 =A0 "compat6" >> @@ -46,6 +53,16 @@ typedef struct AIOPool { >> =A0 =A0 =A0BlockDriverAIOCB *free_aiocb; >> =A0} AIOPool; >> >> +typedef struct BlockIOLimit { >> + =A0 =A0uint64_t bps[3]; >> + =A0 =A0uint64_t iops[3]; >> +} BlockIOLimit; >> + >> +typedef struct BlockIODisp { >> + =A0 =A0uint64_t bytes[2]; >> + =A0 =A0uint64_t ios[2]; >> +} BlockIODisp; >> + >> =A0struct BlockDriver { >> =A0 =A0 =A0const char *format_name; >> =A0 =A0 =A0int instance_size; >> @@ -175,6 +192,14 @@ struct BlockDriverState { >> >> =A0 =A0 =A0void *sync_aiocb; >> >> + =A0 =A0/* the time for latest disk I/O */ >> + =A0 =A0int64_t slice_start[2]; >> + =A0 =A0BlockIOLimit io_limits; >> + =A0 =A0BlockIODisp =A0io_disps; >> + =A0 =A0BlockQueue =A0 *block_queue; >> + =A0 =A0QEMUTimer =A0 =A0*block_timer; >> + =A0 =A0bool =A0 =A0 =A0 =A0 req_from_queue; >> + >> =A0 =A0 =A0/* I/O stats (display with "info blockstats"). */ >> =A0 =A0 =A0uint64_t rd_bytes; >> =A0 =A0 =A0uint64_t wr_bytes; >> @@ -222,6 +247,9 @@ void qemu_aio_release(void *p); >> >> =A0void *qemu_blockalign(BlockDriverState *bs, size_t size); >> >> +void bdrv_set_io_limits(BlockDriverState *bs, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0BlockIOLimit *i= o_limits); >> + >> =A0#ifdef _WIN32 >> =A0int is_windows_drive(const char *filename); >> =A0#endif >> diff --git a/blockdev.c b/blockdev.c >> index c263663..45602f4 100644 >> --- a/blockdev.c >> +++ b/blockdev.c >> @@ -238,6 +238,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to= _scsi) >> =A0 =A0 =A0int on_read_error, on_write_error; >> =A0 =A0 =A0const char *devaddr; >> =A0 =A0 =A0DriveInfo *dinfo; >> + =A0 =A0BlockIOLimit io_limits; >> + =A0 =A0bool iol_flag =3D false; >> + =A0 =A0const char *iol_opts[7] =3D {"bps", "bps_rd", "bps_wr", "iops",= "iops_rd", "iops_wr"}; >> =A0 =A0 =A0int is_extboot =3D 0; >> =A0 =A0 =A0int snapshot =3D 0; >> =A0 =A0 =A0int ret; >> @@ -372,6 +375,19 @@ DriveInfo *drive_init(QemuOpts *opts, int default_t= o_scsi) >> =A0 =A0 =A0 =A0 =A0return NULL; >> =A0 =A0 =A0} >> >> + =A0 =A0/* disk io limits */ >> + =A0 =A0iol_flag =3D qemu_opt_io_limits_enable_flag(opts, iol_opts); >> + =A0 =A0if (iol_flag) { >> + =A0 =A0 =A0 =A0memset(&io_limits, 0, sizeof(BlockIOLimit)); >> + >> + =A0 =A0 =A0 =A0io_limits.bps[2] =A0=3D qemu_opt_get_number(opts, "bps"= , 0); >> + =A0 =A0 =A0 =A0io_limits.bps[0] =A0=3D qemu_opt_get_number(opts, "bps_= rd", 0); >> + =A0 =A0 =A0 =A0io_limits.bps[1] =A0=3D qemu_opt_get_number(opts, "bps_= wr", 0); >> + =A0 =A0 =A0 =A0io_limits.iops[2] =3D qemu_opt_get_number(opts, "iops",= 0); >> + =A0 =A0 =A0 =A0io_limits.iops[0] =3D qemu_opt_get_number(opts, "iops_r= d", 0); >> + =A0 =A0 =A0 =A0io_limits.iops[1] =3D qemu_opt_get_number(opts, "iops_w= r", 0); >> + =A0 =A0} >> + >> =A0 =A0 =A0on_write_error =3D BLOCK_ERR_STOP_ENOSPC; >> =A0 =A0 =A0if ((buf =3D qemu_opt_get(opts, "werror")) !=3D NULL) { >> =A0 =A0 =A0 =A0 =A0if (type !=3D IF_IDE && type !=3D IF_SCSI && type != =3D IF_VIRTIO && type !=3D IF_NONE) { >> @@ -483,6 +499,11 @@ DriveInfo *drive_init(QemuOpts *opts, int default_t= o_scsi) >> >> =A0 =A0 =A0bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error)= ; >> >> + =A0 =A0/* throttling disk io limits */ >> + =A0 =A0if (iol_flag) { >> + =A0 =A0 =A0 =A0bdrv_set_io_limits(dinfo->bdrv, &io_limits); >> + =A0 =A0} >> + >> =A0 =A0 =A0switch(type) { >> =A0 =A0 =A0case IF_IDE: >> =A0 =A0 =A0case IF_SCSI: >> diff --git a/qemu-config.c b/qemu-config.c >> index efa892c..9232bbb 100644 >> --- a/qemu-config.c >> +++ b/qemu-config.c >> @@ -82,6 +82,30 @@ static QemuOptsList qemu_drive_opts =3D { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0.name =3D "boot", >> =A0 =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_BOOL, >> =A0 =A0 =A0 =A0 =A0 =A0 =A0.help =3D "make this a boot drive", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "iops", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit total I/O operations per secon= d", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "iops_rd", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit read operations per second", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "iops_wr", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit write operations per second", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "bps", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit total bytes per second", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "bps_rd", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit read bytes per second", >> + =A0 =A0 =A0 =A0},{ >> + =A0 =A0 =A0 =A0 =A0 =A0.name =3D "bps_wr", >> + =A0 =A0 =A0 =A0 =A0 =A0.type =3D QEMU_OPT_NUMBER, >> + =A0 =A0 =A0 =A0 =A0 =A0.help =3D "limit write bytes per second", >> =A0 =A0 =A0 =A0 =A0}, >> =A0 =A0 =A0 =A0 =A0{ /* end of list */ } >> =A0 =A0 =A0}, >> diff --git a/qemu-option.c b/qemu-option.c >> index 65db542..9fe234d 100644 >> --- a/qemu-option.c >> +++ b/qemu-option.c >> @@ -562,6 +562,23 @@ uint64_t qemu_opt_get_number(QemuOpts *opts, const = char *name, uint64_t defval) >> =A0 =A0 =A0return opt->value.uint; >> =A0} >> >> +bool qemu_opt_io_limits_enable_flag(QemuOpts *opts, const char **iol_op= ts) >> +{ >> + =A0 =A0 int i; >> + =A0 =A0 uint64_t opt_val =A0 =3D 0; >> + =A0 =A0 bool iol_flag =3D false; >> + >> + =A0 =A0 for (i =3D 0; iol_opts[i]; i++) { >> + =A0 =A0 =A0opt_val =3D qemu_opt_get_number(opts, iol_opts[i], 0); >> + =A0 =A0 =A0if (opt_val !=3D 0) { >> + =A0 =A0 =A0 =A0 =A0iol_flag =3D true; >> + =A0 =A0 =A0 =A0 =A0break; >> + =A0 =A0 =A0} >> + =A0 =A0 } >> + >> + =A0 =A0 return iol_flag; >> +} >> + >> =A0uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t= defval) >> =A0{ >> =A0 =A0 =A0QemuOpt *opt =3D qemu_opt_find(opts, name); >> diff --git a/qemu-option.h b/qemu-option.h >> index b515813..fc909f9 100644 >> --- a/qemu-option.h >> +++ b/qemu-option.h >> @@ -107,6 +107,7 @@ struct QemuOptsList { >> =A0const char *qemu_opt_get(QemuOpts *opts, const char *name); >> =A0int qemu_opt_get_bool(QemuOpts *opts, const char *name, int defval); >> =A0uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64= _t defval); >> +bool qemu_opt_io_limits_enable_flag(QemuOpts *opts, const char **iol_op= ts); >> =A0uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t= defval); >> =A0int qemu_opt_set(QemuOpts *opts, const char *name, const char *value)= ; >> =A0typedef int (*qemu_opt_loopfunc)(const char *name, const char *value,= void *opaque); >> diff --git a/qemu-options.hx b/qemu-options.hx >> index cb3347e..ae219f5 100644 >> --- a/qemu-options.hx >> +++ b/qemu-options.hx >> @@ -121,6 +121,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, >> =A0 =A0 =A0" =A0 =A0 =A0 [,cache=3Dwritethrough|writeback|none|unsafe][,= format=3Df]\n" >> =A0 =A0 =A0" =A0 =A0 =A0 [,serial=3Ds][,addr=3DA][,id=3Dname][,aio=3Dthr= eads|native]\n" >> =A0 =A0 =A0" =A0 =A0 =A0 [,readonly=3Don|off][,boot=3Don|off]\n" >> + =A0 =A0" =A0 =A0 =A0 [[,bps=3Db]|[[,bps_rd=3Dr][,bps_wr=3Dw]]][[,iops= =3Di]|[[,iops_rd=3Dr][,iops_wr=3Dw]]\n" >> =A0 =A0 =A0" =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0use 'file' as a drive image\= n", QEMU_ARCH_ALL) >> =A0STEXI >> =A0@item -drive @var{option}[,@var{option}[,@var{option}[,...]]] >> -- >> 1.7.2.3 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html > --=20 Regards, Zhi Yong Wu