All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: "Benoît Canet" <benoit.canet@irqsave.net>
Cc: pl@kamp.de, qemu-devel@nongnu.org, mreitz@redhat.com,
	stefanha@redhat.com, pbonzini@redhat.com,
	xiawenc@linux.vnet.ibm.com
Subject: Re: [Qemu-devel] [PATCH v3 08/29] raw: Probe required direct I/O alignment
Date: Tue, 21 Jan 2014 14:29:50 +0100	[thread overview]
Message-ID: <20140121132950.GI3307@dhcp-200-207.str.redhat.com> (raw)
In-Reply-To: <20140121130335.GC9834@irqsave.net>

Am 21.01.2014 um 14:03 hat Benoît Canet geschrieben:
> Le Friday 17 Jan 2014 à 15:14:58 (+0100), Kevin Wolf a écrit :
> > From: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > Add a bs->request_alignment field that contains the required
> > offset/length alignment for I/O requests and fill it in the raw block
> > drivers. Use ioctls if possible, else see what alignment it takes for
> > O_DIRECT to succeed.
> > 
> > While at it, also expose the memory alignment requirements, which may be
> > (and in practice are) different from the disk alignment requirements.
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > Reviewed-by: Max Reitz <mreitz@redhat.com>
> > ---
> >  block.c                   |   3 ++
> >  block/raw-posix.c         | 102 ++++++++++++++++++++++++++++++++++++++--------
> >  block/raw-win32.c         |  41 +++++++++++++++++++
> >  include/block/block_int.h |   3 ++
> >  4 files changed, 132 insertions(+), 17 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index b738abe..25ae896 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -813,6 +813,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
> >  
> >      bs->open_flags = flags;
> >      bs->guest_block_size = 512;
> > +    bs->request_alignment = 512;
> >      bs->zero_beyond_eof = true;
> >      open_flags = bdrv_open_flags(bs, flags);
> >      bs->read_only = !(open_flags & BDRV_O_RDWR);
> > @@ -881,6 +882,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
> >      }
> >  
> >      bdrv_refresh_limits(bs);
> > +    assert(bdrv_opt_mem_align(bs) != 0);
> > +    assert(bs->request_alignment != 0);
> >  
> >  #ifndef _WIN32
> >      if (bs->is_temporary) {
> > diff --git a/block/raw-posix.c b/block/raw-posix.c
> > index 0676037..126a634 100644
> > --- a/block/raw-posix.c
> > +++ b/block/raw-posix.c
> > @@ -127,6 +127,8 @@ typedef struct BDRVRawState {
> >      int fd;
> >      int type;
> >      int open_flags;
> > +    size_t buf_align;
> > +
> >  #if defined(__linux__)
> >      /* linux floppy specific */
> >      int64_t fd_open_time;
> > @@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename)
> >  }
> >  #endif
> >  
> > +static void raw_probe_alignment(BlockDriverState *bs)
> > +{
> > +    BDRVRawState *s = bs->opaque;
> > +    char *buf;
> > +    unsigned int sector_size;
> > +
> > +    /* For /dev/sg devices the alignment is not really used.
> > +       With buffered I/O, we don't have any restrictions. */
> > +    if (bs->sg || !(s->open_flags & O_DIRECT)) {
> > +        bs->request_alignment = 1;
> > +        s->buf_align = 1;
> > +        return;
> > +    }
> > +
> > +    /* Try a few ioctls to get the right size */
> > +    bs->request_alignment = 0;
> > +    s->buf_align = 0;
> > +
> > +#ifdef BLKSSZGET
> > +    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
> > +        bs->request_alignment = sector_size;
> > +    }
> > +#endif
> > +#ifdef DKIOCGETBLOCKSIZE
> > +    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
> > +        bs->request_alignment = sector_size;
> > +    }
> > +#endif
> > +#ifdef DIOCGSECTORSIZE
> > +    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
> > +        bs->request_alignment = sector_size;
> > +    }
> > +#endif
> > +#ifdef CONFIG_XFS
> > +    if (s->is_xfs) {
> > +        struct dioattr da;
> > +        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
> > +            bs->request_alignment = da.d_miniosz;
> > +            /* The kernel returns wrong information for d_mem */
> > +            /* s->buf_align = da.d_mem; */
> > +        }
> > +    }
> > +#endif
> > +
> > +    /* If we could not get the sizes so far, we can only guess them */
> > +    if (!s->buf_align) {
> > +        size_t align;
> > +        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
> > +        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
> > +            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
> > +                s->buf_align = align;
> > +                break;
> > +            }
> > +        }
> > +        qemu_vfree(buf);
> > +    }
> > +
> > +    if (!bs->request_alignment) {
> > +        size_t align;
> > +        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
> > +        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
> > +            if (pread(s->fd, buf, align, 0) >= 0) {
> > +                bs->request_alignment = align;
> > +                break;
> > +            }
> > +        }
> > +        qemu_vfree(buf);
> > +    }
> > +}
> > +
> >  static void raw_parse_flags(int bdrv_flags, int *open_flags)
> >  {
> >      assert(open_flags != NULL);
> > @@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
> >      return ret;
> >  }
> >  
> > -
> >  static void raw_reopen_commit(BDRVReopenState *state)
> >  {
> >      BDRVRawReopenState *raw_s = state->opaque;
> > @@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state)
> >      state->opaque = NULL;
> >  }
> >  
> > +static int raw_refresh_limits(BlockDriverState *bs)
> > +{
> > +    BDRVRawState *s = bs->opaque;
> >  
> > -/* XXX: use host sector size if necessary with:
> > -#ifdef DIOCGSECTORSIZE
> > -        {
> > -            unsigned int sectorsize = 512;
> > -            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
> > -                sectorsize > bufsize)
> > -                bufsize = sectorsize;
> > -        }
> > -#endif
> > -#ifdef CONFIG_COCOA
> > -        uint32_t blockSize = 512;
> > -        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
> > -            bufsize = blockSize;
> > -        }
> > -#endif
> > -*/
> > +    raw_probe_alignment(bs);
> > +    bs->bl.opt_mem_alignment = s->buf_align;
> > +
> > +    return 0;
> > +}
> >  
> >  static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
> >  {
> > @@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = {
> >      .bdrv_aio_writev = raw_aio_writev,
> >      .bdrv_aio_flush = raw_aio_flush,
> >      .bdrv_aio_discard = raw_aio_discard,
> > +    .bdrv_refresh_limits = raw_refresh_limits,
> >  
> >      .bdrv_truncate = raw_truncate,
> >      .bdrv_getlength = raw_getlength,
> > @@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = {
> >      .bdrv_aio_writev	= raw_aio_writev,
> >      .bdrv_aio_flush	= raw_aio_flush,
> >      .bdrv_aio_discard   = hdev_aio_discard,
> > +    .bdrv_refresh_limits = raw_refresh_limits,
> >  
> >      .bdrv_truncate      = raw_truncate,
> >      .bdrv_getlength	= raw_getlength,
> > @@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = {
> >      .bdrv_aio_readv     = raw_aio_readv,
> >      .bdrv_aio_writev    = raw_aio_writev,
> >      .bdrv_aio_flush	= raw_aio_flush,
> > +    .bdrv_refresh_limits = raw_refresh_limits,
> >  
> >      .bdrv_truncate      = raw_truncate,
> >      .bdrv_getlength      = raw_getlength,
> > @@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = {
> >      .bdrv_aio_readv     = raw_aio_readv,
> >      .bdrv_aio_writev    = raw_aio_writev,
> >      .bdrv_aio_flush	= raw_aio_flush,
> > +    .bdrv_refresh_limits = raw_refresh_limits,
> >  
> >      .bdrv_truncate      = raw_truncate,
> >      .bdrv_getlength      = raw_getlength,
> > @@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = {
> >      .bdrv_aio_readv     = raw_aio_readv,
> >      .bdrv_aio_writev    = raw_aio_writev,
> >      .bdrv_aio_flush	= raw_aio_flush,
> > +    .bdrv_refresh_limits = raw_refresh_limits,
> >  
> >      .bdrv_truncate      = raw_truncate,
> >      .bdrv_getlength      = raw_getlength,
> > diff --git a/block/raw-win32.c b/block/raw-win32.c
> > index ce314fd..beb7f23 100644
> > --- a/block/raw-win32.c
> > +++ b/block/raw-win32.c
> > @@ -202,6 +202,35 @@ static int set_sparse(int fd)
> >  				 NULL, 0, NULL, 0, &returned, NULL);
> >  }
> >  
> > +static void raw_probe_alignment(BlockDriverState *bs)
> > +{
> > +    BDRVRawState *s = bs->opaque;
> > +    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
> > +    DISK_GEOMETRY_EX dg;
> > +    BOOL status;
> > +
> > +    if (s->type == FTYPE_CD) {
> > +        bs->request_alignment = 2048;
> > +        return;
> > +    }
> > +    if (s->type == FTYPE_HARDDISK) {
> > +        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
> > +                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
> > +        if (status != 0) {
> > +            bs->request_alignment = dg.Geometry.BytesPerSector;
> > +            return;
> > +        }
> > +        /* try GetDiskFreeSpace too */
> > +    }
> > +
> > +    if (s->drive_path[0]) {
> > +        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
> > +                         &dg.Geometry.BytesPerSector,
> > +                         &freeClusters, &totalClusters);
> > +        bs->request_alignment = dg.Geometry.BytesPerSector;
> > +    }
> > +}
> > +
> >  static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
> >  {
> >      assert(access_flags != NULL);
> > @@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
> >          }
> >      }
> >  
> > +    if (filename[0] && filename[1] == ':') {
> > +        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
> > +    } else if (filename[0] == '\\' && filename[1] == '\\') {
> > +        s->drive_path[0] = 0;
> > +    } else {
> > +        /* Relative path.  */
> > +        char buf[MAX_PATH];
> > +        GetCurrentDirectory(MAX_PATH, buf);
> > +        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
> > +    }
> > +
> This is not really alignment related.
> Does this really belong to this patch ?

raw_probe_alignment() needs the path, so s->drive_path must now be
initialised for all images. Without this change it is only set in
hdev_open(), i.e. for win32 host_device backends.

Kevin

  reply	other threads:[~2014-01-21 13:30 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-17 14:14 [Qemu-devel] [PATCH v3 00/29] block: Support for 512b-on-4k emulation Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 01/29] block: Move initialisation of BlockLimits to bdrv_refresh_limits() Kevin Wolf
2014-01-17 22:39   ` Benoît Canet
2014-01-20  9:31     ` Kevin Wolf
2014-01-20  9:49       ` Peter Lieven
2014-01-21 12:49   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 02/29] block: Inherit opt_transfer_length Kevin Wolf
2014-01-17 22:42   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 03/29] block: Update BlockLimits when they might have changed Kevin Wolf
2014-01-17 22:47   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 04/29] qemu_memalign: Allow small alignments Kevin Wolf
2014-01-17 22:49   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 05/29] block: Detect unaligned length in bdrv_qiov_is_aligned() Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 06/29] block: Don't use guest sector size for qemu_blockalign() Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 07/29] block: rename buffer_alignment to guest_block_size Kevin Wolf
2014-01-21 12:54   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 08/29] raw: Probe required direct I/O alignment Kevin Wolf
2014-01-21 13:03   ` Benoît Canet
2014-01-21 13:29     ` Kevin Wolf [this message]
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 09/29] block: Introduce bdrv_aligned_preadv() Kevin Wolf
2014-01-21 13:13   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 10/29] block: Introduce bdrv_co_do_preadv() Kevin Wolf
2014-01-17 23:59   ` Max Reitz
2014-01-21 13:29   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 11/29] block: Introduce bdrv_aligned_pwritev() Kevin Wolf
2014-01-21 13:31   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 12/29] block: write: Handle COR dependency after I/O throttling Kevin Wolf
2014-01-21 13:33   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 13/29] block: Introduce bdrv_co_do_pwritev() Kevin Wolf
2014-01-18  0:00   ` Max Reitz
2014-01-21 13:36   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 14/29] block: Switch BdrvTrackedRequest to byte granularity Kevin Wolf
2014-01-17 23:19   ` Max Reitz
2014-01-21 13:49   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 15/29] block: Allow waiting for overlapping requests between begin/end Kevin Wolf
2014-01-22 19:46   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 16/29] block: Make zero-after-EOF work with larger alignment Kevin Wolf
2014-01-17 23:21   ` Max Reitz
2014-01-22 19:50   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 17/29] block: Generalise and optimise COR serialisation Kevin Wolf
2014-01-22 20:00   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 18/29] block: Make overlap range for serialisation dynamic Kevin Wolf
2014-01-22 20:15   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 19/29] block: Allow wait_serialising_requests() at any point Kevin Wolf
2014-01-22 20:21   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 20/29] block: Align requests in bdrv_co_do_pwritev() Kevin Wolf
2014-01-22 20:29   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 21/29] block: Assert serialisation assumptions in pwritev Kevin Wolf
2014-01-17 23:42   ` Max Reitz
2014-01-24 16:09   ` Benoît Canet
2014-01-24 16:18     ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 22/29] block: Change coroutine wrapper to byte granularity Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 23/29] block: Make bdrv_pread() a bdrv_prwv_co() wrapper Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 24/29] block: Make bdrv_pwrite() " Kevin Wolf
2014-01-17 23:43   ` Max Reitz
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 25/29] iscsi: Set bs->request_alignment Kevin Wolf
2014-01-24 16:29   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 26/29] blkdebug: Make required alignment configurable Kevin Wolf
2014-01-17 23:50   ` Max Reitz
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 27/29] qemu-io: New command 'sleep' Kevin Wolf
2014-01-17 23:55   ` Max Reitz
2014-01-20  9:58     ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 28/29] qemu-iotests: Test pwritev RMW logic Kevin Wolf
2014-01-18 16:01   ` Max Reitz
2014-01-20  9:44     ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 29/29] block: Switch bdrv_io_limits_intercept() to byte granularity Kevin Wolf
2014-01-17 23:59   ` Max Reitz
2014-01-22 20:30 ` [Qemu-devel] [PATCH v3 00/29] block: Support for 512b-on-4k emulation Christian Borntraeger
2014-01-23 10:29   ` Kevin Wolf
2014-01-23 11:12     ` Christian Borntraeger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140121132950.GI3307@dhcp-200-207.str.redhat.com \
    --to=kwolf@redhat.com \
    --cc=benoit.canet@irqsave.net \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=pl@kamp.de \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=xiawenc@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.