From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexandre DERUMIER Subject: Re: [RFC v5] RBD: Add support readv,writev for rbd Date: Thu, 16 Feb 2017 16:13:41 +0100 (CET) Message-ID: <887093105.8854266.1487258021988.JavaMail.zimbra@oxygem.tv> References: <20170216084318.11122-1-jazeltq@gmail.com> <20170216090002.12511-1-jazeltq@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8BIT Return-path: Received: from mailpro.odiso.net ([89.248.211.110]:41612 "EHLO mailpro.odiso.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932188AbdBPPW3 (ORCPT ); Thu, 16 Feb 2017 10:22:29 -0500 In-Reply-To: Sender: ceph-devel-owner@vger.kernel.org List-ID: To: jazeltq Cc: Tiger Hu , Josh Durgin , Jeff Cody , dillaman , Kevin Wolf , mreitz@redhat.com, qemu-block@nongnu.org, qemu-devel , ceph-devel , tianqing >>No yet. I just test on one qemu-kvm vm. It works fine. >>The performance may need more time. >>Any one can test on this patch if you do fast.... Hi, I would like to bench it with small 4k read/write. On the ceph side,do we need this PR ? : https://github.com/ceph/ceph/pull/13447 ----- Mail original ----- De: "jazeltq" À: "Tiger Hu" Cc: "Josh Durgin" , "Jeff Cody" , "dillaman" , "Kevin Wolf" , mreitz@redhat.com, qemu-block@nongnu.org, "qemu-devel" , "ceph-devel" , "tianqing" Envoyé: Jeudi 16 Février 2017 15:03:52 Objet: Re: [RFC v5] RBD: Add support readv,writev for rbd No yet. I just test on one qemu-kvm vm. It works fine. The performance may need more time. Any one can test on this patch if you do fast.... 2017-02-16 20:07 GMT+08:00 Tiger Hu : > Tianqing, > > Do we have any performance data for this patch? Thanks. > > Tiger >> 在 2017年2月16日,下午5:00,jazeltq@gmail.com 写道: >> >> From: tianqing >> >> Rbd can do readv and writev directly, so wo do not need to transform >> iov to buf or vice versa any more. >> >> Signed-off-by: tianqing >> --- >> block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- >> 1 file changed, 42 insertions(+), 7 deletions(-) >> >> diff --git a/block/rbd.c b/block/rbd.c >> index a57b3e3..75ae1d6 100644 >> --- a/block/rbd.c >> +++ b/block/rbd.c >> @@ -47,7 +47,7 @@ >> */ >> >> /* rbd_aio_discard added in 0.1.2 */ >> -#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2) >> +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(12, 0, 0) >> #define LIBRBD_SUPPORTS_DISCARD >> #else >> #undef LIBRBD_SUPPORTS_DISCARD >> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB { >> BlockAIOCB common; >> int64_t ret; >> QEMUIOVector *qiov; >> +/* Note: >> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h. >> + */ >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *bounce; >> +#endif >> RBDAIOCmd cmd; >> int error; >> struct BDRVRBDState *s; >> @@ -83,7 +88,9 @@ typedef struct RADOSCB { >> RBDAIOCB *acb; >> struct BDRVRBDState *s; >> int64_t size; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> char *buf; >> +#endif >> int64_t ret; >> } RADOSCB; >> >> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> } >> } else { >> if (r < 0) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf, 0, rcb->size); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size); >> +#endif >> acb->ret = r; >> acb->error = 1; >> } else if (r < rcb->size) { >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> memset(rcb->buf + r, 0, rcb->size - r); >> +#else >> + iov_memset(acb->qiov->iov, acb->qiov->niov, >> + r, 0, acb->qiov->size - r); >> +#endif >> + >> if (!acb->error) { >> acb->ret = rcb->size; >> } >> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) >> >> g_free(rcb); >> >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> if (acb->cmd == RBD_AIO_READ) { >> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); >> } >> qemu_vfree(acb->bounce); >> +#endif >> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); >> >> qemu_aio_unref(acb); >> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> RBDAIOCB *acb; >> RADOSCB *rcb = NULL; >> rbd_completion_t c; >> - char *buf; >> int r; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + char *buf = NULL; >> +#endif >> >> BDRVRBDState *s = bs->opaque; >> >> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> acb->cmd = cmd; >> acb->qiov = qiov; >> assert(!qiov || qiov->size == size); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> + >> if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { >> acb->bounce = NULL; >> } else { >> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> goto failed; >> } >> } >> - acb->ret = 0; >> - acb->error = 0; >> - acb->s = s; >> - >> if (cmd == RBD_AIO_WRITE) { >> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); >> } >> - >> buf = acb->bounce; >> +#endif >> + acb->ret = 0; >> + acb->error = 0; >> + acb->s = s; >> >> rcb = g_new(RADOSCB, 1); >> + >> rcb->acb = acb; >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> rcb->buf = buf; >> +#endif >> rcb->s = acb->s; >> rcb->size = size; >> r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c); >> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, >> >> switch (cmd) { >> case RBD_AIO_WRITE: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_write(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_READ: >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> r = rbd_aio_read(s->image, off, size, buf, c); >> +#else >> + r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); >> +#endif >> break; >> case RBD_AIO_DISCARD: >> r = rbd_aio_discard_wrapper(s->image, off, size, c); >> @@ -719,7 +752,9 @@ failed_completion: >> rbd_aio_release(c); >> failed: >> g_free(rcb); >> +#ifndef LIBRBD_SUPPORTS_IOVEC >> qemu_vfree(acb->bounce); >> +#endif >> qemu_aio_unref(acb); >> return NULL; >> } >> -- >> 2.10.2 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > -- 谦谦君子 -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:34676) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ceNlN-0003ho-DJ for qemu-devel@nongnu.org; Thu, 16 Feb 2017 10:14:32 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ceNlH-000189-3W for qemu-devel@nongnu.org; Thu, 16 Feb 2017 10:14:29 -0500 Date: Thu, 16 Feb 2017 16:13:41 +0100 (CET) From: Alexandre DERUMIER Message-ID: <887093105.8854266.1487258021988.JavaMail.zimbra@oxygem.tv> In-Reply-To: References: <20170216084318.11122-1-jazeltq@gmail.com> <20170216090002.12511-1-jazeltq@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [RFC v5] RBD: Add support readv,writev for rbd List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: jazeltq Cc: Tiger Hu , Josh Durgin , Jeff Cody , dillaman , Kevin Wolf , mreitz@redhat.com, qemu-block@nongnu.org, qemu-devel , ceph-devel , tianqing >>No yet. I just test on one qemu-kvm vm. It works fine.=20 >>The performance may need more time.=20 >>Any one can test on this patch if you do fast....=20 Hi, I would like to bench it with small 4k read/write. On the ceph side,do we need this PR ? : https://github.com/ceph/ceph/pull/13447 ----- Mail original ----- De: "jazeltq" =C3=80: "Tiger Hu" Cc: "Josh Durgin" , "Jeff Cody" , "di= llaman" , "Kevin Wolf" , mreitz@redh= at.com, qemu-block@nongnu.org, "qemu-devel" , "ceph-= devel" , "tianqing" Envoy=C3=A9: Jeudi 16 F=C3=A9vrier 2017 15:03:52 Objet: Re: [RFC v5] RBD: Add support readv,writev for rbd No yet. I just test on one qemu-kvm vm. It works fine.=20 The performance may need more time.=20 Any one can test on this patch if you do fast....=20 2017-02-16 20:07 GMT+08:00 Tiger Hu :=20 > Tianqing,=20 >=20 > Do we have any performance data for this patch? Thanks.=20 >=20 > Tiger=20 >> =E5=9C=A8 2017=E5=B9=B42=E6=9C=8816=E6=97=A5=EF=BC=8C=E4=B8=8B=E5=8D=885= :00=EF=BC=8Cjazeltq@gmail.com =E5=86=99=E9=81=93=EF=BC=9A=20 >>=20 >> From: tianqing =20 >>=20 >> Rbd can do readv and writev directly, so wo do not need to transform=20 >> iov to buf or vice versa any more.=20 >>=20 >> Signed-off-by: tianqing =20 >> ---=20 >> block/rbd.c | 49 ++++++++++++++++++++++++++++++++++++++++++-------=20 >> 1 file changed, 42 insertions(+), 7 deletions(-)=20 >>=20 >> diff --git a/block/rbd.c b/block/rbd.c=20 >> index a57b3e3..75ae1d6 100644=20 >> --- a/block/rbd.c=20 >> +++ b/block/rbd.c=20 >> @@ -47,7 +47,7 @@=20 >> */=20 >>=20 >> /* rbd_aio_discard added in 0.1.2 */=20 >> -#if LIBRBD_VERSION_CODE >=3D LIBRBD_VERSION(0, 1, 2)=20 >> +#if LIBRBD_VERSION_CODE >=3D LIBRBD_VERSION(12, 0, 0)=20 >> #define LIBRBD_SUPPORTS_DISCARD=20 >> #else=20 >> #undef LIBRBD_SUPPORTS_DISCARD=20 >> @@ -73,7 +73,12 @@ typedef struct RBDAIOCB {=20 >> BlockAIOCB common;=20 >> int64_t ret;=20 >> QEMUIOVector *qiov;=20 >> +/* Note:=20 >> + * The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h.=20 >> + */=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> char *bounce;=20 >> +#endif=20 >> RBDAIOCmd cmd;=20 >> int error;=20 >> struct BDRVRBDState *s;=20 >> @@ -83,7 +88,9 @@ typedef struct RADOSCB {=20 >> RBDAIOCB *acb;=20 >> struct BDRVRBDState *s;=20 >> int64_t size;=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> char *buf;=20 >> +#endif=20 >> int64_t ret;=20 >> } RADOSCB;=20 >>=20 >> @@ -426,11 +433,21 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)=20 >> }=20 >> } else {=20 >> if (r < 0) {=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> memset(rcb->buf, 0, rcb->size);=20 >> +#else=20 >> + iov_memset(acb->qiov->iov, acb->qiov->niov, 0, 0, acb->qiov->size);=20 >> +#endif=20 >> acb->ret =3D r;=20 >> acb->error =3D 1;=20 >> } else if (r < rcb->size) {=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> memset(rcb->buf + r, 0, rcb->size - r);=20 >> +#else=20 >> + iov_memset(acb->qiov->iov, acb->qiov->niov,=20 >> + r, 0, acb->qiov->size - r);=20 >> +#endif=20 >> +=20 >> if (!acb->error) {=20 >> acb->ret =3D rcb->size;=20 >> }=20 >> @@ -441,10 +458,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)=20 >>=20 >> g_free(rcb);=20 >>=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> if (acb->cmd =3D=3D RBD_AIO_READ) {=20 >> qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);=20 >> }=20 >> qemu_vfree(acb->bounce);=20 >> +#endif=20 >> acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));=20 >>=20 >> qemu_aio_unref(acb);=20 >> @@ -655,8 +674,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *= bs,=20 >> RBDAIOCB *acb;=20 >> RADOSCB *rcb =3D NULL;=20 >> rbd_completion_t c;=20 >> - char *buf;=20 >> int r;=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> + char *buf =3D NULL;=20 >> +#endif=20 >>=20 >> BDRVRBDState *s =3D bs->opaque;=20 >>=20 >> @@ -664,6 +685,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *b= s,=20 >> acb->cmd =3D cmd;=20 >> acb->qiov =3D qiov;=20 >> assert(!qiov || qiov->size =3D=3D size);=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> +=20 >> if (cmd =3D=3D RBD_AIO_DISCARD || cmd =3D=3D RBD_AIO_FLUSH) {=20 >> acb->bounce =3D NULL;=20 >> } else {=20 >> @@ -672,19 +695,21 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState = *bs,=20 >> goto failed;=20 >> }=20 >> }=20 >> - acb->ret =3D 0;=20 >> - acb->error =3D 0;=20 >> - acb->s =3D s;=20 >> -=20 >> if (cmd =3D=3D RBD_AIO_WRITE) {=20 >> qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);=20 >> }=20 >> -=20 >> buf =3D acb->bounce;=20 >> +#endif=20 >> + acb->ret =3D 0;=20 >> + acb->error =3D 0;=20 >> + acb->s =3D s;=20 >>=20 >> rcb =3D g_new(RADOSCB, 1);=20 >> +=20 >> rcb->acb =3D acb;=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> rcb->buf =3D buf;=20 >> +#endif=20 >> rcb->s =3D acb->s;=20 >> rcb->size =3D size;=20 >> r =3D rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, = &c);=20 >> @@ -694,10 +719,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState = *bs,=20 >>=20 >> switch (cmd) {=20 >> case RBD_AIO_WRITE:=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> r =3D rbd_aio_write(s->image, off, size, buf, c);=20 >> +#else=20 >> + r =3D rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);=20 >> +#endif=20 >> break;=20 >> case RBD_AIO_READ:=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> r =3D rbd_aio_read(s->image, off, size, buf, c);=20 >> +#else=20 >> + r =3D rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);=20 >> +#endif=20 >> break;=20 >> case RBD_AIO_DISCARD:=20 >> r =3D rbd_aio_discard_wrapper(s->image, off, size, c);=20 >> @@ -719,7 +752,9 @@ failed_completion:=20 >> rbd_aio_release(c);=20 >> failed:=20 >> g_free(rcb);=20 >> +#ifndef LIBRBD_SUPPORTS_IOVEC=20 >> qemu_vfree(acb->bounce);=20 >> +#endif=20 >> qemu_aio_unref(acb);=20 >> return NULL;=20 >> }=20 >> --=20 >> 2.10.2=20 >>=20 >> --=20 >> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in= =20 >> the body of a message to majordomo@vger.kernel.org=20 >> More majordomo info at http://vger.kernel.org/majordomo-info.html=20 >=20 --=20 =E8=B0=A6=E8=B0=A6=E5=90=9B=E5=AD=90=20 --=20 To unsubscribe from this list: send the line "unsubscribe ceph-devel" in=20 the body of a message to majordomo@vger.kernel.org=20 More majordomo info at http://vger.kernel.org/majordomo-info.html=20