All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] request_alignment vs file size
@ 2020-01-30 15:22 Vladimir Sementsov-Ogievskiy
  2020-01-30 15:22 ` [PATCH 1/3] block/file-posix: add raw_getlength_fd Vladimir Sementsov-Ogievskiy
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-01-30 15:22 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, den, vsementsov, qemu-devel, mreitz

Hi.

This is a continuation for thread
 "request_alignment vs file size, how to fix crash?"
 <2ca46523-44a2-1a48-dfa3-11bda9eef8e8@virtuozzo.com>
 https://lists.gnu.org/archive/html/qemu-devel/2020-01/msg07276.html

Generic block-layer assumes that file size is aligned to
request_alignment, and may crash otherwise (see patch 03 for simple
reproducer).

So, let's at least check this thing on generic driver open,
and fix file-posix to consider file-size, when it wants to fallback to
large request_alignment.

Vladimir Sementsov-Ogievskiy (3):
  block/file-posix: add raw_getlength_fd
  block/file-posix: consider file size when fallback to max_align
  block: fail on open when file size is unaligned to request_alignment

 block.c            |  7 ++++++
 block/file-posix.c | 60 +++++++++++++++++++++++++++++-----------------
 2 files changed, 45 insertions(+), 22 deletions(-)

-- 
2.21.0



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/3] block/file-posix: add raw_getlength_fd
  2020-01-30 15:22 [PATCH 0/3] request_alignment vs file size Vladimir Sementsov-Ogievskiy
@ 2020-01-30 15:22 ` Vladimir Sementsov-Ogievskiy
  2020-03-11 10:21   ` Max Reitz
  2020-01-30 15:22 ` [PATCH 2/3] block/file-posix: consider file size when fallback to max_align Vladimir Sementsov-Ogievskiy
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-01-30 15:22 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, den, vsementsov, qemu-devel, mreitz

Add function which can handle separate fd, to be called from
raw_probe_alignment in the following commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/file-posix.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 1b805bd938..7f366046c2 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -178,7 +178,21 @@ typedef struct BDRVRawReopenState {
 } BDRVRawReopenState;
 
 static int fd_open(BlockDriverState *bs);
-static int64_t raw_getlength(BlockDriverState *bs);
+static int64_t raw_getlength_fd(BlockDriverState *bs, int fd);
+
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    ret = fd_open(bs);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return raw_getlength_fd(bs, s->fd);
+}
+
 
 typedef struct RawPosixAIOData {
     BlockDriverState *bs;
@@ -2063,10 +2077,8 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
 }
 
 #ifdef __OpenBSD__
-static int64_t raw_getlength(BlockDriverState *bs)
+static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
 {
-    BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
     struct stat st;
 
     if (fstat(fd, &st))
@@ -2082,10 +2094,8 @@ static int64_t raw_getlength(BlockDriverState *bs)
         return st.st_size;
 }
 #elif defined(__NetBSD__)
-static int64_t raw_getlength(BlockDriverState *bs)
+static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
 {
-    BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
     struct stat st;
 
     if (fstat(fd, &st))
@@ -2107,22 +2117,16 @@ static int64_t raw_getlength(BlockDriverState *bs)
         return st.st_size;
 }
 #elif defined(__sun__)
-static int64_t raw_getlength(BlockDriverState *bs)
+static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
 {
-    BDRVRawState *s = bs->opaque;
     struct dk_minfo minfo;
     int ret;
     int64_t size;
 
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
-
     /*
      * Use the DKIOCGMEDIAINFO ioctl to read the size.
      */
-    ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
+    ret = ioctl(fd, DKIOCGMEDIAINFO, &minfo);
     if (ret != -1) {
         return minfo.dki_lbsize * minfo.dki_capacity;
     }
@@ -2131,17 +2135,16 @@ static int64_t raw_getlength(BlockDriverState *bs)
      * There are reports that lseek on some devices fails, but
      * irc discussion said that contingency on contingency was overkill.
      */
-    size = lseek(s->fd, 0, SEEK_END);
+    size = lseek(fd, 0, SEEK_END);
     if (size < 0) {
         return -errno;
     }
     return size;
 }
 #elif defined(CONFIG_BSD)
-static int64_t raw_getlength(BlockDriverState *bs)
+static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
 {
     BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
     int64_t size;
     struct stat sb;
 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -2212,9 +2215,8 @@ again:
     return size;
 }
 #else
-static int64_t raw_getlength(BlockDriverState *bs)
+static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
 {
-    BDRVRawState *s = bs->opaque;
     int ret;
     int64_t size;
 
@@ -2223,7 +2225,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
         return ret;
     }
 
-    size = lseek(s->fd, 0, SEEK_END);
+    size = lseek(fd, 0, SEEK_END);
     if (size < 0) {
         return -errno;
     }
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] block/file-posix: consider file size when fallback to max_align
  2020-01-30 15:22 [PATCH 0/3] request_alignment vs file size Vladimir Sementsov-Ogievskiy
  2020-01-30 15:22 ` [PATCH 1/3] block/file-posix: add raw_getlength_fd Vladimir Sementsov-Ogievskiy
@ 2020-01-30 15:22 ` Vladimir Sementsov-Ogievskiy
  2020-03-11 10:46   ` Max Reitz
  2020-01-30 15:22 ` [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment Vladimir Sementsov-Ogievskiy
  2020-01-30 15:52 ` [PATCH 0/3] request_alignment vs file size no-reply
  3 siblings, 1 reply; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-01-30 15:22 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, den, vsementsov, qemu-devel, mreitz

If we failed to probe request_align, we fallback to max_align. But
this is wrong, if file size is not aligned to our max_align. Let's
instead chose alignment so that file size is a multiple of it.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/file-posix.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 7f366046c2..e9c4e509f6 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -385,11 +385,25 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
             align = alignments[i];
             if (raw_is_io_aligned(fd, buf, align)) {
                 /* Fallback to safe value. */
-                bs->bl.request_alignment = (align != 1) ? align : max_align;
+                bs->bl.request_alignment = align;
                 break;
             }
         }
         qemu_vfree(buf);
+
+        if (bs->bl.request_alignment == 1) {
+            /*
+             * Succeed to read with alignment = 1. But it may be unallocated
+             * area on XFS, and we'll fail later if keep request_alignment = 1.
+             *
+             * Chose safer alignment, keeping in mind file size if possible.
+             */
+
+            int64_t len = raw_getlength_fd(bs, fd);
+
+            bs->bl.request_alignment =
+                    len <= 0 ? max_align : MIN(max_align, len & ~(len - 1));
+        }
     }
 
     if (!s->buf_align) {
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-01-30 15:22 [PATCH 0/3] request_alignment vs file size Vladimir Sementsov-Ogievskiy
  2020-01-30 15:22 ` [PATCH 1/3] block/file-posix: add raw_getlength_fd Vladimir Sementsov-Ogievskiy
  2020-01-30 15:22 ` [PATCH 2/3] block/file-posix: consider file size when fallback to max_align Vladimir Sementsov-Ogievskiy
@ 2020-01-30 15:22 ` Vladimir Sementsov-Ogievskiy
  2020-03-11 11:06   ` Max Reitz
  2020-01-30 15:52 ` [PATCH 0/3] request_alignment vs file size no-reply
  3 siblings, 1 reply; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-01-30 15:22 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, den, vsementsov, qemu-devel, mreitz

Prior to the commit the following command lead to crash:

  ./qemu-io --image-opts -c 'write 0 512' \
  driver=blkdebug,align=4096,image.driver=null-co,image.size=512

It failes on assertion in bdrv_aligned_pwritev:
  "end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE"

The problem is obvious: 512 is aligned to 4096 and becomes larger than
file size. And the core bad thing is that file size is unaligned to
request_alignment.

Let's catch such case on bdrv_open_driver and fail.

Note, that file size and request_alignment may become out of sync
later, so this commit is not full fix of the problem, but it's better
than nothing.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/block.c b/block.c
index ecd09dbbfd..4cfc6c33a2 100644
--- a/block.c
+++ b/block.c
@@ -1324,6 +1324,13 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
     assert(bdrv_min_mem_align(bs) != 0);
     assert(is_power_of_2(bs->bl.request_alignment));
 
+    if (bs->bl.request_alignment > 512 &&
+        !QEMU_IS_ALIGNED(bs->total_sectors, bs->bl.request_alignment / 512))
+    {
+        error_setg(errp, "File size is unaligned to request alignment");
+        return -EINVAL;
+    }
+
     for (i = 0; i < bs->quiesce_counter; i++) {
         if (drv->bdrv_co_drain_begin) {
             drv->bdrv_co_drain_begin(bs);
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/3] request_alignment vs file size
  2020-01-30 15:22 [PATCH 0/3] request_alignment vs file size Vladimir Sementsov-Ogievskiy
                   ` (2 preceding siblings ...)
  2020-01-30 15:22 ` [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment Vladimir Sementsov-Ogievskiy
@ 2020-01-30 15:52 ` no-reply
  2020-01-30 16:06   ` Vladimir Sementsov-Ogievskiy
  3 siblings, 1 reply; 15+ messages in thread
From: no-reply @ 2020-01-30 15:52 UTC (permalink / raw)
  To: vsementsov; +Cc: kwolf, vsementsov, qemu-block, qemu-devel, mreitz, den

Patchew URL: https://patchew.org/QEMU/20200130152218.7600-1-vsementsov@virtuozzo.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

 *** done
Failures: 268
Failed 1 of 108 iotests
make: *** [check-tests/check-block.sh] Error 1
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 662, in <module>
    sys.exit(main())
---
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=393f106d3cef4268a1d74151451d2a08', '-u', '1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-rrfmlftz/src/docker-src.2020-01-30-10.43.08.18508:/var/tmp/qemu:z,ro', 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit status 2.
filter=--filter=label=com.qemu.instance.uuid=393f106d3cef4268a1d74151451d2a08
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-rrfmlftz/src'
make: *** [docker-run-test-quick@centos7] Error 2

real    9m28.931s
user    0m9.285s


The full log is available at
http://patchew.org/logs/20200130152218.7600-1-vsementsov@virtuozzo.com/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/3] request_alignment vs file size
  2020-01-30 15:52 ` [PATCH 0/3] request_alignment vs file size no-reply
@ 2020-01-30 16:06   ` Vladimir Sementsov-Ogievskiy
  0 siblings, 0 replies; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-01-30 16:06 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, den, qemu-block, mreitz

30.01.2020 18:52, no-reply@patchew.org wrote:
> Patchew URL: https://patchew.org/QEMU/20200130152218.7600-1-vsementsov@virtuozzo.com/
> 
> 
> 
> Hi,
> 
> This series failed the docker-quick@centos7 build test. Please find the testing commands and
> their output below. If you have Docker installed, you can probably reproduce it
> locally.
> 
> === TEST SCRIPT BEGIN ===
> #!/bin/bash
> make docker-image-centos7 V=1 NETWORK=1
> time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
> === TEST SCRIPT END ===
> 
>   *** done
> Failures: 268
> Failed 1 of 108 iotests
> make: *** [check-tests/check-block.sh] Error 1
> Traceback (most recent call last):
>    File "./tests/docker/docker.py", line 662, in <module>
>      sys.exit(main())
> ---
>      raise CalledProcessError(retcode, cmd)
> subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=393f106d3cef4268a1d74151451d2a08', '-u', '1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-rrfmlftz/src/docker-src.2020-01-30-10.43.08.18508:/var/tmp/qemu:z,ro', 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit status 2.
> filter=--filter=label=com.qemu.instance.uuid=393f106d3cef4268a1d74151451d2a08
> make[1]: *** [docker-run] Error 1
> make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-rrfmlftz/src'
> make: *** [docker-run-test-quick@centos7] Error 2
> 
> real    9m28.931s
> user    0m9.285s
> 
> 
> The full log is available at
> http://patchew.org/logs/20200130152218.7600-1-vsementsov@virtuozzo.com/testing.docker-quick@centos7/?type=message.
> ---
> Email generated automatically by Patchew [https://patchew.org/].
> Please send your feedback to patchew-devel@redhat.com
> 


It's unrelated I think.

-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] block/file-posix: add raw_getlength_fd
  2020-01-30 15:22 ` [PATCH 1/3] block/file-posix: add raw_getlength_fd Vladimir Sementsov-Ogievskiy
@ 2020-03-11 10:21   ` Max Reitz
  0 siblings, 0 replies; 15+ messages in thread
From: Max Reitz @ 2020-03-11 10:21 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy, qemu-block; +Cc: kwolf, den, qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 4958 bytes --]

On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
> Add function which can handle separate fd, to be called from
> raw_probe_alignment in the following commit.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/file-posix.c | 44 +++++++++++++++++++++++---------------------
>  1 file changed, 23 insertions(+), 21 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 1b805bd938..7f366046c2 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -178,7 +178,21 @@ typedef struct BDRVRawReopenState {
>  } BDRVRawReopenState;
>  
>  static int fd_open(BlockDriverState *bs);
> -static int64_t raw_getlength(BlockDriverState *bs);
> +static int64_t raw_getlength_fd(BlockDriverState *bs, int fd);
> +
> +static int64_t raw_getlength(BlockDriverState *bs)
> +{
> +    BDRVRawState *s = bs->opaque;
> +    int ret;
> +
> +    ret = fd_open(bs);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    return raw_getlength_fd(bs, s->fd);
> +}
> +
>  
>  typedef struct RawPosixAIOData {
>      BlockDriverState *bs;
> @@ -2063,10 +2077,8 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
>  }
>  
>  #ifdef __OpenBSD__
> -static int64_t raw_getlength(BlockDriverState *bs)
> +static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
>  {
> -    BDRVRawState *s = bs->opaque;
> -    int fd = s->fd;
>      struct stat st;
>  
>      if (fstat(fd, &st))
> @@ -2082,10 +2094,8 @@ static int64_t raw_getlength(BlockDriverState *bs)
>          return st.st_size;
>  }
>  #elif defined(__NetBSD__)
> -static int64_t raw_getlength(BlockDriverState *bs)
> +static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
>  {
> -    BDRVRawState *s = bs->opaque;
> -    int fd = s->fd;
>      struct stat st;
>  
>      if (fstat(fd, &st))
> @@ -2107,22 +2117,16 @@ static int64_t raw_getlength(BlockDriverState *bs)
>          return st.st_size;
>  }
>  #elif defined(__sun__)
> -static int64_t raw_getlength(BlockDriverState *bs)
> +static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
>  {
> -    BDRVRawState *s = bs->opaque;
>      struct dk_minfo minfo;
>      int ret;
>      int64_t size;
>  
> -    ret = fd_open(bs);
> -    if (ret < 0) {
> -        return ret;
> -    }
> -
>      /*
>       * Use the DKIOCGMEDIAINFO ioctl to read the size.
>       */
> -    ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
> +    ret = ioctl(fd, DKIOCGMEDIAINFO, &minfo);
>      if (ret != -1) {
>          return minfo.dki_lbsize * minfo.dki_capacity;
>      }
> @@ -2131,17 +2135,16 @@ static int64_t raw_getlength(BlockDriverState *bs)
>       * There are reports that lseek on some devices fails, but
>       * irc discussion said that contingency on contingency was overkill.
>       */
> -    size = lseek(s->fd, 0, SEEK_END);
> +    size = lseek(fd, 0, SEEK_END);
>      if (size < 0) {
>          return -errno;
>      }
>      return size;
>  }
>  #elif defined(CONFIG_BSD)
> -static int64_t raw_getlength(BlockDriverState *bs)
> +static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
>  {
>      BDRVRawState *s = bs->opaque;

I think we should also drop this, and the fd_open() call in this function.

> -    int fd = s->fd;
>      int64_t size;
>      struct stat sb;
>  #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
> @@ -2212,9 +2215,8 @@ again:
>      return size;
>  }
>  #else
> -static int64_t raw_getlength(BlockDriverState *bs)
> +static int64_t raw_getlength_fd(BlockDriverState *bs, int fd)
>  {
> -    BDRVRawState *s = bs->opaque;
>      int ret;
>      int64_t size;
>  
> @@ -2223,7 +2225,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
>          return ret;
>      }

And we can drop the fd_open() call here, too.

>  
> -    size = lseek(s->fd, 0, SEEK_END);
> +    size = lseek(fd, 0, SEEK_END);
>      if (size < 0) {
>          return -errno;
>      }
> 

If we drop those fd_open() calls, there is only one reason we’d still
need the @bs parameter anyway, and that’s the CD-ROM handling code for
FreeBSD.  Speaking of which: That code is broken after this patch
because cdrom_reopen() will change s->fd, which, after this patch, is
completely ignored by raw_getlength_fd().

So I don’t know.  Should that CD handling code set “fd = s->fd” after
cdrom_reopen()?  Or should we just drop it, because I actually can’t
imagine it to be that important or used ever?  (I suppose it might do
something when you change the physical disk while qemu is running and
the device file is attached to qemu, but I’m not sure whether such
passthrough things even work anymore.)

If we dropped it, we could drop the @bs parameter from
raw_getlength_fd() altogether.  I don’t know whether that would actually
have any benefit in practice, though.

Max


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] block/file-posix: consider file size when fallback to max_align
  2020-01-30 15:22 ` [PATCH 2/3] block/file-posix: consider file size when fallback to max_align Vladimir Sementsov-Ogievskiy
@ 2020-03-11 10:46   ` Max Reitz
  0 siblings, 0 replies; 15+ messages in thread
From: Max Reitz @ 2020-03-11 10:46 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy, qemu-block; +Cc: kwolf, den, qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 1881 bytes --]

On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
> If we failed to probe request_align, we fallback to max_align. But
> this is wrong, if file size is not aligned to our max_align. Let's
> instead chose alignment so that file size is a multiple of it.

It’s entirely possible that the file size is not aligned to the request
alignment, though.  If so, this patch will make the whole file inaccessible.

> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/file-posix.c | 16 +++++++++++++++-
>  1 file changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 7f366046c2..e9c4e509f6 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -385,11 +385,25 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
>              align = alignments[i];
>              if (raw_is_io_aligned(fd, buf, align)) {
>                  /* Fallback to safe value. */
> -                bs->bl.request_alignment = (align != 1) ? align : max_align;
> +                bs->bl.request_alignment = align;

The comment above is now outdated.

Max

>                  break;
>              }
>          }
>          qemu_vfree(buf);
> +
> +        if (bs->bl.request_alignment == 1) {
> +            /*
> +             * Succeed to read with alignment = 1. But it may be unallocated
> +             * area on XFS, and we'll fail later if keep request_alignment = 1.
> +             *
> +             * Chose safer alignment, keeping in mind file size if possible.
> +             */
> +
> +            int64_t len = raw_getlength_fd(bs, fd);
> +
> +            bs->bl.request_alignment =
> +                    len <= 0 ? max_align : MIN(max_align, len & ~(len - 1));
> +        }
>      }
>  
>      if (!s->buf_align) {
> 



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-01-30 15:22 ` [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment Vladimir Sementsov-Ogievskiy
@ 2020-03-11 11:06   ` Max Reitz
  2020-03-11 12:29     ` Eric Blake
  2020-03-12 11:59     ` Vladimir Sementsov-Ogievskiy
  0 siblings, 2 replies; 15+ messages in thread
From: Max Reitz @ 2020-03-11 11:06 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy, qemu-block; +Cc: kwolf, den, qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 2608 bytes --]

On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
> Prior to the commit the following command lead to crash:
> 
>   ./qemu-io --image-opts -c 'write 0 512' \
>   driver=blkdebug,align=4096,image.driver=null-co,image.size=512
> 
> It failes on assertion in bdrv_aligned_pwritev:
>   "end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE"
> 
> The problem is obvious: 512 is aligned to 4096 and becomes larger than
> file size. And the core bad thing is that file size is unaligned to
> request_alignment.
> 
> Let's catch such case on bdrv_open_driver and fail.

I think we had a discussion on this before, but I can’t find it right
now.  (Although I think that had more to do with something in the
file-posix driver, because it wasn’t limited to alignments above 512.)

In any case, the file itself is totally valid.  Most importantly, qcow2
will regularly create files with unaligned file lengths.

So let me create a qcow2 image on a 4k-aligned device:

$ truncate 512M fs.img
$ sudo losetup -f --show -b 4096 fs.img
/dev/loop0
$ sudo mkfs.ext4 /dev/loop0
[...]
$ sudo mount /dev/loop0 /mnt/tmp

$ sudo ./qemu-img create -f qcow2 /mnt/tmp/foo.qcow2 64M
Formatting '/mnt/tmp/foo.qcow2', fmt=qcow2 size=67108864
cluster_size=65536 lazy_refcounts=off refcount_bits=16
$ sudo ./qemu-io -t none -c quit /mnt/tmp/foo.qcow2
qemu-io: can't open device /mnt/tmp/foo.qcow2: File size is unaligned to
request alignment

Which is too bad.

So the real solution would probably...  Be to align the file size up to
the alignment?

Max

> Note, that file size and request_alignment may become out of sync
> later, so this commit is not full fix of the problem, but it's better
> than nothing.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/block.c b/block.c
> index ecd09dbbfd..4cfc6c33a2 100644
> --- a/block.c
> +++ b/block.c
> @@ -1324,6 +1324,13 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
>      assert(bdrv_min_mem_align(bs) != 0);
>      assert(is_power_of_2(bs->bl.request_alignment));
>  
> +    if (bs->bl.request_alignment > 512 &&
> +        !QEMU_IS_ALIGNED(bs->total_sectors, bs->bl.request_alignment / 512))
> +    {
> +        error_setg(errp, "File size is unaligned to request alignment");
> +        return -EINVAL;
> +    }
> +
>      for (i = 0; i < bs->quiesce_counter; i++) {
>          if (drv->bdrv_co_drain_begin) {
>              drv->bdrv_co_drain_begin(bs);
> 



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-03-11 11:06   ` Max Reitz
@ 2020-03-11 12:29     ` Eric Blake
  2020-03-12  9:06       ` Vladimir Sementsov-Ogievskiy
  2020-03-12 11:59     ` Vladimir Sementsov-Ogievskiy
  1 sibling, 1 reply; 15+ messages in thread
From: Eric Blake @ 2020-03-11 12:29 UTC (permalink / raw)
  To: Max Reitz, Vladimir Sementsov-Ogievskiy, qemu-block
  Cc: kwolf, den, qemu-devel

On 3/11/20 6:06 AM, Max Reitz wrote:
> On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
>> Prior to the commit the following command lead to crash:
>>
>>    ./qemu-io --image-opts -c 'write 0 512' \
>>    driver=blkdebug,align=4096,image.driver=null-co,image.size=512
>>
>> It failes on assertion in bdrv_aligned_pwritev:
>>    "end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE"
>>
>> The problem is obvious: 512 is aligned to 4096 and becomes larger than
>> file size. And the core bad thing is that file size is unaligned to
>> request_alignment.
>>
>> Let's catch such case on bdrv_open_driver and fail.
> 
> I think we had a discussion on this before, but I can’t find it right
> now.  (Although I think that had more to do with something in the
> file-posix driver, because it wasn’t limited to alignments above 512.)
> 
> In any case, the file itself is totally valid.  Most importantly, qcow2
> will regularly create files with unaligned file lengths.
> 
> So let me create a qcow2 image on a 4k-aligned device:
> 
> $ truncate 512M fs.img
> $ sudo losetup -f --show -b 4096 fs.img
> /dev/loop0
> $ sudo mkfs.ext4 /dev/loop0
> [...]
> $ sudo mount /dev/loop0 /mnt/tmp
> 
> $ sudo ./qemu-img create -f qcow2 /mnt/tmp/foo.qcow2 64M
> Formatting '/mnt/tmp/foo.qcow2', fmt=qcow2 size=67108864
> cluster_size=65536 lazy_refcounts=off refcount_bits=16
> $ sudo ./qemu-io -t none -c quit /mnt/tmp/foo.qcow2
> qemu-io: can't open device /mnt/tmp/foo.qcow2: File size is unaligned to
> request alignment
> 
> Which is too bad.
> 
> So the real solution would probably...  Be to align the file size up to
> the alignment?

Or to bite the bullet and finally implement byte-accurate size 
everywhere (instead of our current insistence on rounding size up to 
512-byte multiples).  If we have to deal with unaligned tails anyways, 
it's better to make the code universally applicable whether that 
unaligned tail is to 512 or to 4k, than to have it work for 512 but to 
fail for 4k.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-03-11 12:29     ` Eric Blake
@ 2020-03-12  9:06       ` Vladimir Sementsov-Ogievskiy
  2020-03-12 11:31         ` Eric Blake
  0 siblings, 1 reply; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-03-12  9:06 UTC (permalink / raw)
  To: Eric Blake, Max Reitz, qemu-block; +Cc: kwolf, den, qemu-devel

11.03.2020 15:29, Eric Blake wrote:
> On 3/11/20 6:06 AM, Max Reitz wrote:
>> On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
>>> Prior to the commit the following command lead to crash:
>>>
>>>    ./qemu-io --image-opts -c 'write 0 512' \
>>>    driver=blkdebug,align=4096,image.driver=null-co,image.size=512
>>>
>>> It failes on assertion in bdrv_aligned_pwritev:
>>>    "end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE"
>>>
>>> The problem is obvious: 512 is aligned to 4096 and becomes larger than
>>> file size. And the core bad thing is that file size is unaligned to
>>> request_alignment.
>>>
>>> Let's catch such case on bdrv_open_driver and fail.
>>
>> I think we had a discussion on this before, but I can’t find it right
>> now.  (Although I think that had more to do with something in the
>> file-posix driver, because it wasn’t limited to alignments above 512.)
>>
>> In any case, the file itself is totally valid.  Most importantly, qcow2
>> will regularly create files with unaligned file lengths.
>>
>> So let me create a qcow2 image on a 4k-aligned device:
>>
>> $ truncate 512M fs.img
>> $ sudo losetup -f --show -b 4096 fs.img
>> /dev/loop0
>> $ sudo mkfs.ext4 /dev/loop0
>> [...]
>> $ sudo mount /dev/loop0 /mnt/tmp
>>
>> $ sudo ./qemu-img create -f qcow2 /mnt/tmp/foo.qcow2 64M
>> Formatting '/mnt/tmp/foo.qcow2', fmt=qcow2 size=67108864
>> cluster_size=65536 lazy_refcounts=off refcount_bits=16
>> $ sudo ./qemu-io -t none -c quit /mnt/tmp/foo.qcow2
>> qemu-io: can't open device /mnt/tmp/foo.qcow2: File size is unaligned to
>> request alignment
>>
>> Which is too bad.
>>
>> So the real solution would probably...  Be to align the file size up to
>> the alignment?
> 
> Or to bite the bullet and finally implement byte-accurate size everywhere (instead of our current insistence on rounding size up to 512-byte multiples).  If we have to deal with unaligned tails anyways, it's better to make the code universally applicable whether that unaligned tail is to 512 or to 4k, than to have it work for 512 but to fail for 4k.
> 

But how it helps with the problem of files unaligned to request_alignment defined by driver?

-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-03-12  9:06       ` Vladimir Sementsov-Ogievskiy
@ 2020-03-12 11:31         ` Eric Blake
  0 siblings, 0 replies; 15+ messages in thread
From: Eric Blake @ 2020-03-12 11:31 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy, Max Reitz, qemu-block
  Cc: kwolf, den, qemu-devel

On 3/12/20 4:06 AM, Vladimir Sementsov-Ogievskiy wrote:

>>> So the real solution would probably...  Be to align the file size up to
>>> the alignment?
>>
>> Or to bite the bullet and finally implement byte-accurate size 
>> everywhere (instead of our current insistence on rounding size up to 
>> 512-byte multiples).  If we have to deal with unaligned tails anyways, 
>> it's better to make the code universally applicable whether that 
>> unaligned tail is to 512 or to 4k, than to have it work for 512 but to 
>> fail for 4k.
>>
> 
> But how it helps with the problem of files unaligned to 
> request_alignment defined by driver?

In a byte-accurate world, no driver should ever report an unaligned 
size.  If the driver is capable of sub-sector sizes, it is also capable 
of sub-sector I/O and should state as such in its request_alignment. 
The block layer then takes care of ensuring that any access of the final 
unaligned sector or 4k region either leaves the bytes past EOF alone, or 
at most reads those bytes as zeroes, and maybe permits attempts to write 
no-op zeroes to those bytes, but gracefully forbids attempts to store 
data that would cause the file to be resized.

But that's the ideal world, and it requires a lot of code auditing to 
get there. It probably won't happen in time for the 5.0 release.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-03-11 11:06   ` Max Reitz
  2020-03-11 12:29     ` Eric Blake
@ 2020-03-12 11:59     ` Vladimir Sementsov-Ogievskiy
  2020-03-12 12:06       ` Vladimir Sementsov-Ogievskiy
  1 sibling, 1 reply; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-03-12 11:59 UTC (permalink / raw)
  To: Max Reitz, qemu-block; +Cc: kwolf, den, qemu-devel

11.03.2020 14:06, Max Reitz wrote:
> On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
>> Prior to the commit the following command lead to crash:
>>
>>    ./qemu-io --image-opts -c 'write 0 512' \
>>    driver=blkdebug,align=4096,image.driver=null-co,image.size=512
>>
>> It failes on assertion in bdrv_aligned_pwritev:
>>    "end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE"
>>
>> The problem is obvious: 512 is aligned to 4096 and becomes larger than
>> file size. And the core bad thing is that file size is unaligned to
>> request_alignment.
>>
>> Let's catch such case on bdrv_open_driver and fail.
> 
> I think we had a discussion on this before, but I can’t find it right
> now.  (Although I think that had more to do with something in the
> file-posix driver, because it wasn’t limited to alignments above 512.)
> 
> In any case, the file itself is totally valid.  Most importantly, qcow2
> will regularly create files with unaligned file lengths.
> 
> So let me create a qcow2 image on a 4k-aligned device:
> 
> $ truncate 512M fs.img
> $ sudo losetup -f --show -b 4096 fs.img
> /dev/loop0
> $ sudo mkfs.ext4 /dev/loop0
> [...]
> $ sudo mount /dev/loop0 /mnt/tmp
> 
> $ sudo ./qemu-img create -f qcow2 /mnt/tmp/foo.qcow2 64M
> Formatting '/mnt/tmp/foo.qcow2', fmt=qcow2 size=67108864
> cluster_size=65536 lazy_refcounts=off refcount_bits=16
> $ sudo ./qemu-io -t none -c quit /mnt/tmp/foo.qcow2
> qemu-io: can't open device /mnt/tmp/foo.qcow2: File size is unaligned to
> request alignment
> 
> Which is too bad.

What exactly is bad?

Is it correct that create succeeded? Without new error, how would qcow2 driver
read from unaligned tail of file-posix? It will crash, isn't it?

> 
> So the real solution would probably...  Be to align the file size up to
> the alignment?

On creation, you mean?

> 
> Max
> 
>> Note, that file size and request_alignment may become out of sync
>> later, so this commit is not full fix of the problem, but it's better
>> than nothing.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   block.c | 7 +++++++
>>   1 file changed, 7 insertions(+)
>>
>> diff --git a/block.c b/block.c
>> index ecd09dbbfd..4cfc6c33a2 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -1324,6 +1324,13 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
>>       assert(bdrv_min_mem_align(bs) != 0);
>>       assert(is_power_of_2(bs->bl.request_alignment));
>>   
>> +    if (bs->bl.request_alignment > 512 &&
>> +        !QEMU_IS_ALIGNED(bs->total_sectors, bs->bl.request_alignment / 512))
>> +    {
>> +        error_setg(errp, "File size is unaligned to request alignment");
>> +        return -EINVAL;
>> +    }
>> +
>>       for (i = 0; i < bs->quiesce_counter; i++) {
>>           if (drv->bdrv_co_drain_begin) {
>>               drv->bdrv_co_drain_begin(bs);
>>
> 
> 


-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-03-12 11:59     ` Vladimir Sementsov-Ogievskiy
@ 2020-03-12 12:06       ` Vladimir Sementsov-Ogievskiy
  2020-03-23 13:00         ` Max Reitz
  0 siblings, 1 reply; 15+ messages in thread
From: Vladimir Sementsov-Ogievskiy @ 2020-03-12 12:06 UTC (permalink / raw)
  To: Max Reitz, qemu-block; +Cc: kwolf, den, qemu-devel

12.03.2020 14:59, Vladimir Sementsov-Ogievskiy wrote:
> 11.03.2020 14:06, Max Reitz wrote:
>> On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
>>> Prior to the commit the following command lead to crash:
>>>
>>>    ./qemu-io --image-opts -c 'write 0 512' \
>>>    driver=blkdebug,align=4096,image.driver=null-co,image.size=512
>>>
>>> It failes on assertion in bdrv_aligned_pwritev:
>>>    "end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE"
>>>
>>> The problem is obvious: 512 is aligned to 4096 and becomes larger than
>>> file size. And the core bad thing is that file size is unaligned to
>>> request_alignment.
>>>
>>> Let's catch such case on bdrv_open_driver and fail.
>>
>> I think we had a discussion on this before, but I can’t find it right
>> now.  (Although I think that had more to do with something in the
>> file-posix driver, because it wasn’t limited to alignments above 512.)
>>
>> In any case, the file itself is totally valid.  Most importantly, qcow2
>> will regularly create files with unaligned file lengths.
>>
>> So let me create a qcow2 image on a 4k-aligned device:
>>
>> $ truncate 512M fs.img
>> $ sudo losetup -f --show -b 4096 fs.img
>> /dev/loop0
>> $ sudo mkfs.ext4 /dev/loop0
>> [...]
>> $ sudo mount /dev/loop0 /mnt/tmp
>>
>> $ sudo ./qemu-img create -f qcow2 /mnt/tmp/foo.qcow2 64M
>> Formatting '/mnt/tmp/foo.qcow2', fmt=qcow2 size=67108864
>> cluster_size=65536 lazy_refcounts=off refcount_bits=16
>> $ sudo ./qemu-io -t none -c quit /mnt/tmp/foo.qcow2
>> qemu-io: can't open device /mnt/tmp/foo.qcow2: File size is unaligned to
>> request alignment
>>
>> Which is too bad.
> 
> What exactly is bad?
> 
> Is it correct that create succeeded? Without new error, how would qcow2 driver
> read from unaligned tail of file-posix? It will crash, isn't it?

Hmm, it crashes only on write-part if don't have RESIZE permission.. So for qcow2
everything is OK.

And generic read don't care about reading past-EOF because of alignment, read is passed
to driver.

> 
>>
>> So the real solution would probably...  Be to align the file size up to
>> the alignment?
> 
> On creation, you mean?
> 
>>
>> Max
>>
>>> Note, that file size and request_alignment may become out of sync
>>> later, so this commit is not full fix of the problem, but it's better
>>> than nothing.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> ---
>>>   block.c | 7 +++++++
>>>   1 file changed, 7 insertions(+)
>>>
>>> diff --git a/block.c b/block.c
>>> index ecd09dbbfd..4cfc6c33a2 100644
>>> --- a/block.c
>>> +++ b/block.c
>>> @@ -1324,6 +1324,13 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
>>>       assert(bdrv_min_mem_align(bs) != 0);
>>>       assert(is_power_of_2(bs->bl.request_alignment));
>>> +    if (bs->bl.request_alignment > 512 &&
>>> +        !QEMU_IS_ALIGNED(bs->total_sectors, bs->bl.request_alignment / 512))
>>> +    {
>>> +        error_setg(errp, "File size is unaligned to request alignment");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>>       for (i = 0; i < bs->quiesce_counter; i++) {
>>>           if (drv->bdrv_co_drain_begin) {
>>>               drv->bdrv_co_drain_begin(bs);
>>>
>>
>>
> 
> 


-- 
Best regards,
Vladimir


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment
  2020-03-12 12:06       ` Vladimir Sementsov-Ogievskiy
@ 2020-03-23 13:00         ` Max Reitz
  0 siblings, 0 replies; 15+ messages in thread
From: Max Reitz @ 2020-03-23 13:00 UTC (permalink / raw)
  To: Vladimir Sementsov-Ogievskiy, qemu-block; +Cc: kwolf, den, qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 6223 bytes --]

On 12.03.20 13:06, Vladimir Sementsov-Ogievskiy wrote:
> 12.03.2020 14:59, Vladimir Sementsov-Ogievskiy wrote:
>> 11.03.2020 14:06, Max Reitz wrote:
>>> On 30.01.20 16:22, Vladimir Sementsov-Ogievskiy wrote:
>>>> Prior to the commit the following command lead to crash:
>>>>
>>>>    ./qemu-io --image-opts -c 'write 0 512' \
>>>>    driver=blkdebug,align=4096,image.driver=null-co,image.size=512
>>>>
>>>> It failes on assertion in bdrv_aligned_pwritev:
>>>>    "end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE"
>>>>
>>>> The problem is obvious: 512 is aligned to 4096 and becomes larger than
>>>> file size. And the core bad thing is that file size is unaligned to
>>>> request_alignment.
>>>>
>>>> Let's catch such case on bdrv_open_driver and fail.
>>>
>>> I think we had a discussion on this before, but I can’t find it right
>>> now.  (Although I think that had more to do with something in the
>>> file-posix driver, because it wasn’t limited to alignments above 512.)
>>>
>>> In any case, the file itself is totally valid.  Most importantly, qcow2
>>> will regularly create files with unaligned file lengths.
>>>
>>> So let me create a qcow2 image on a 4k-aligned device:
>>>
>>> $ truncate 512M fs.img

Oops, “truncate --size=512M fs.img”, of course.

>>> $ sudo losetup -f --show -b 4096 fs.img
>>> /dev/loop0
>>> $ sudo mkfs.ext4 /dev/loop0
>>> [...]
>>> $ sudo mount /dev/loop0 /mnt/tmp
>>>
>>> $ sudo ./qemu-img create -f qcow2 /mnt/tmp/foo.qcow2 64M
>>> Formatting '/mnt/tmp/foo.qcow2', fmt=qcow2 size=67108864
>>> cluster_size=65536 lazy_refcounts=off refcount_bits=16
>>> $ sudo ./qemu-io -t none -c quit /mnt/tmp/foo.qcow2
>>> qemu-io: can't open device /mnt/tmp/foo.qcow2: File size is unaligned to
>>> request alignment
>>>
>>> Which is too bad.
>>
>> What exactly is bad?

That with this patch you can no longer create a qcow2 image on a 4k
block size medium and then open it with O_DIRECT.

>> Is it correct that create succeeded? Without new error, how would
>> qcow2 driver
>> read from unaligned tail of file-posix? It will crash, isn't it?

Well, the above test works, which is better already.

Basically part of the problem seems to be changing from unsafe caching
(during create) to O_DIRECT.  If we used O_DIRECT all the time, the
image would always be aligned to the block alignment.

Now that makes me wonder how such cases are handled right now.  Do we crash?

So I tried to create an image with 512 byte clusters, write to it with
normal WB caching, thus creating an image that isn’t aligned to 4k:

$ sudo ./qemu-img create -f qcow2 -o cluster_size=512 \
      /mnt/tmp/foo.qcow2 64M
[...]
$ sudo ./qemu-io -c 'write -P 42 0 512' /mnt/tmp/foo.qcow2
[...]
$ printf '%x\n' $(stat -c '%s' /mnt/tmp/foo.qcow2)
4a00

(So not aligned to 4k)

$ sudo ./qemu-io -t none -c 'read -P 42 0 512' /mnt/tmp/foo.qcow2
read 512/512 bytes at offset 0
512 bytes, 1 ops; 00.00 sec (4.567 MiB/sec and 9352.6997 ops/sec)

So actually, that works just fine right now.

When I let file-posix print all pread() calls, this is what I can see:

[...]
pread(9, buf_ofs=+0, file_ofs=0x4000, size=0x1000)
-> 2560
pread(9, buf_ofs=+0xa00, file_ofs=0x4a00, size=0x600)
-> 0
[...]

So all reads are increased to the alignment (4k), and to read the data
cluster, we read all 4k from 0x4000 (even though it actually is just 512
bytes at 0x4800).  This succeeds, even though just as a partial read
(2560 == 0xa00), so we try to read the rest of the 4k block, but that
doesn’t work (it’s at the EOF), and so we stop reading.

handle_aiocb_rw() then zeroes out the rest of the buffer (below the
“out” label) and everyone’s happy.

So to me it looks like it works perfectly fine right now.  No crashes.

> Hmm, it crashes only on write-part if don't have RESIZE permission.. So
> for qcow2
> everything is OK.
> 
> And generic read don't care about reading past-EOF because of alignment,
> read is passed
> to driver.

(And only after the test, I realize that you apparently answered the
question yourself...  Oops.)

>>> So the real solution would probably...  Be to align the file size up to
>>> the alignment?
>>
>> On creation, you mean?

Yes.  But I suspect we’d need to do it all the time, because see my
above example: The user might access an image with WB caching at one
point, and then with O_DIRECT the next time.

But I can see two problems:

First, we don’t even know what the block alignment is.  It’s hard enough
to figure it out for O_DIRECT images, I’m not sure we can reliably do so
for WB-cached images.

Second, this wouldn’t help with pre-existing images.  And note that
“pre-existing” might mean an image a user creates on an FS with 512 byte
blocks and then moves it to another with 4k blocks.

So I don’t think aligning the file size up would work, actually.  Except
if we decided to always align it up to 4k, but then we’d still have a
problem with older pre-existing images...

Max

>>>
>>> Max
>>>
>>>> Note, that file size and request_alignment may become out of sync
>>>> later, so this commit is not full fix of the problem, but it's better
>>>> than nothing.
>>>>
>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>>> ---
>>>>   block.c | 7 +++++++
>>>>   1 file changed, 7 insertions(+)
>>>>
>>>> diff --git a/block.c b/block.c
>>>> index ecd09dbbfd..4cfc6c33a2 100644
>>>> --- a/block.c
>>>> +++ b/block.c
>>>> @@ -1324,6 +1324,13 @@ static int bdrv_open_driver(BlockDriverState
>>>> *bs, BlockDriver *drv,
>>>>       assert(bdrv_min_mem_align(bs) != 0);
>>>>       assert(is_power_of_2(bs->bl.request_alignment));
>>>> +    if (bs->bl.request_alignment > 512 &&
>>>> +        !QEMU_IS_ALIGNED(bs->total_sectors,
>>>> bs->bl.request_alignment / 512))
>>>> +    {
>>>> +        error_setg(errp, "File size is unaligned to request
>>>> alignment");
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>>       for (i = 0; i < bs->quiesce_counter; i++) {
>>>>           if (drv->bdrv_co_drain_begin) {
>>>>               drv->bdrv_co_drain_begin(bs);
>>>>
>>>
>>>
>>
>>
> 
> 



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-03-23 13:01 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-30 15:22 [PATCH 0/3] request_alignment vs file size Vladimir Sementsov-Ogievskiy
2020-01-30 15:22 ` [PATCH 1/3] block/file-posix: add raw_getlength_fd Vladimir Sementsov-Ogievskiy
2020-03-11 10:21   ` Max Reitz
2020-01-30 15:22 ` [PATCH 2/3] block/file-posix: consider file size when fallback to max_align Vladimir Sementsov-Ogievskiy
2020-03-11 10:46   ` Max Reitz
2020-01-30 15:22 ` [PATCH 3/3] block: fail on open when file size is unaligned to request_alignment Vladimir Sementsov-Ogievskiy
2020-03-11 11:06   ` Max Reitz
2020-03-11 12:29     ` Eric Blake
2020-03-12  9:06       ` Vladimir Sementsov-Ogievskiy
2020-03-12 11:31         ` Eric Blake
2020-03-12 11:59     ` Vladimir Sementsov-Ogievskiy
2020-03-12 12:06       ` Vladimir Sementsov-Ogievskiy
2020-03-23 13:00         ` Max Reitz
2020-01-30 15:52 ` [PATCH 0/3] request_alignment vs file size no-reply
2020-01-30 16:06   ` Vladimir Sementsov-Ogievskiy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.