All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Blake <eblake@redhat.com>
To: libguestfs@redhat.com
Cc: qemu-devel@nongnu.org, qemu-block@nongnu.org, nbd@other.debian.org
Subject: [libnbd PATCH v2 08/23] block_status: Track 64-bit extents internally
Date: Mon, 14 Nov 2022 16:51:43 -0600	[thread overview]
Message-ID: <20221114225158.2186742-9-eblake@redhat.com> (raw)
In-Reply-To: <20221114225158.2186742-1-eblake@redhat.com>

When extended headers are in use, the server can send us 64-bit
extents, even for a 32-bit query (if the server knows the entire image
is data, for example, or if the metacontext has a status definition
that uses more than 32 bits).  Also, while most contexts only have
32-bit flags, a server is allowed to negotiate contexts with 64-bit
flags when extended headers are in use.  Thus, for maximum
flexibility, we are better off storing 64-bit data internally, with a
goal of letting the client's 32-bit interface work as much as
possible, and for a future API addition of a 64-bit interface to work
even when the server only gave 32-bit results.

For backwards compatibility, a client that never negotiates a 64-bit
status context can be handled without errors by truncating any 64-bit
lengths down to just under 4G; so the old 32-bit interface will
continue to work in most cases.  But we can't truncate status down; if
a user requests an extended status, the 32-bit interface can now
report EOVERFLOW for that context (although that situation can't
happen until a later patch actually turns on the use of extended
headers).

Note that the existing 32-bit nbd_block_status() API is now slightly
slower, particularly when talking with a server that lacks extended
headers: we are doing double size conversions.  But this speed penalty
is likely in the noise compared to the network delays, and ideally
clients will switch over to the new 64-bit interfaces as more and more
servers start supporting extended headers.

One of the trickier aspects of this patch is auditing that both the
user's extent and our malloc'd shim get cleaned up once on all
possible paths, so that there is neither a leak nor a double free.
---
 lib/internal.h                      |  8 +++-
 generator/states-reply-structured.c | 30 ++++++++-----
 lib/handle.c                        |  2 +-
 lib/rw.c                            | 70 ++++++++++++++++++++++++++++-
 4 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/lib/internal.h b/lib/internal.h
index 73fd24c0..0c23f882 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -80,7 +80,7 @@ struct export {

 struct command_cb {
   union {
-    nbd_extent_callback extent;
+    nbd_extent64_callback extent;
     nbd_chunk_callback chunk;
     nbd_list_callback list;
     nbd_context_callback context;
@@ -302,7 +302,11 @@ struct nbd_handle {
   size_t querynum;

   /* When receiving block status, this is used. */
-  uint32_t *bs_entries;
+  union {
+    char *storage;      /* malloc's view */
+    nbd_extent *normal; /* Our 64-bit preferred internal form; n slots */
+    uint32_t *narrow;   /* 32-bit NBD_REPLY_TYPE_BLOCK_STATUS form; n*2 slots */
+  } bs_entries;

   /* Commands which are waiting to be issued [meaning the request
    * packet is sent to the server].  This is used as a simple linked
diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c
index da9894c6..d23e56a9 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -436,6 +436,7 @@  REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE:
  REPLY.STRUCTURED_REPLY.RECV_BS_HEADER:
   struct command *cmd = h->reply_cmd;
   uint32_t length;
+  uint32_t count;

   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return 0;
@@ -450,15 +451,19 @@  REPLY.STRUCTURED_REPLY.RECV_BS_HEADER:
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
     assert (length >= 12);
     length -= sizeof h->sbuf.reply.payload.bs_hdr;
+    count = length / (2 * sizeof (uint32_t));

-    free (h->bs_entries);
-    h->bs_entries = malloc (length);
-    if (h->bs_entries == NULL) {
+    /* Read raw data into a subset of h->bs_entries, then expand it
+     * into place later during byte-swapping.
+     */
+    free (h->bs_entries.storage);
+    h->bs_entries.storage = malloc (count * sizeof *h->bs_entries.normal);
+    if (h->bs_entries.storage == NULL) {
       SET_NEXT_STATE (%.DEAD);
       set_error (errno, "malloc");
       return 0;
     }
-    h->rbuf = h->bs_entries;
+    h->rbuf = h->bs_entries.narrow;
     h->rlen = length;
     SET_NEXT_STATE (%RECV_BS_ENTRIES);
   }
@@ -470,6 +475,7 @@  REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES:
   uint32_t count;
   size_t i;
   uint32_t context_id;
+  uint32_t *raw;

   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return 0;
@@ -483,17 +489,21 @@  REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES:
     assert (cmd); /* guaranteed by CHECK */
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
     assert (CALLBACK_IS_NOT_NULL (cmd->cb.fn.extent));
-    assert (h->bs_entries);
+    assert (h->bs_entries.normal);
     assert (length >= 12);
     assert (h->meta_valid);
     count = (length - sizeof h->sbuf.reply.payload.bs_hdr) /
-      sizeof *h->bs_entries;
+      (2 * sizeof (uint32_t));

     /* Need to byte-swap the entries returned, but apart from that we
-     * don't validate them.
+     * don't validate them.  Reverse order is essential, since we are
+     * expanding in-place from narrow to wider type.
      */
-    for (i = 0; i < count; ++i)
-      h->bs_entries[i] = be32toh (h->bs_entries[i]);
+    raw = h->bs_entries.narrow;
+    for (i = count; i-- > 0; ) {
+      h->bs_entries.normal[i].flags = be32toh (raw[i * 2 + 1]);
+      h->bs_entries.normal[i].length = be32toh (raw[i * 2]);
+    }

     /* Look up the context ID. */
     context_id = be32toh (h->sbuf.reply.payload.bs_hdr.context_id);
@@ -507,7 +517,7 @@  REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES:

       if (CALL_CALLBACK (cmd->cb.fn.extent,
                          h->meta_contexts.ptr[i].name, cmd->offset,
-                         h->bs_entries, count,
+                         h->bs_entries.normal, count,
                          &error) == -1)
         if (cmd->error == 0)
           cmd->error = error ? error : EPROTO;
diff --git a/lib/handle.c b/lib/handle.c
index 4a186f8f..41e442e7 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -130,7 +130,7 @@ nbd_close (struct nbd_handle *h)

   string_vector_iter (&h->querylist, (void *) free);
   free (h->querylist.ptr);
-  free (h->bs_entries);
+  free (h->bs_entries.storage);
   nbd_internal_reset_size_and_flags (h);
   for (i = 0; i < h->meta_contexts.len; ++i)
     free (h->meta_contexts.ptr[i].name);
diff --git a/lib/rw.c b/lib/rw.c
index 81dded3f..6212bf4c 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -42,6 +42,61 @@ wait_for_command (struct nbd_handle *h, int64_t cookie)
   return r == -1 ? -1 : 0;
 }

+/* Convert from 64-bit to 32-bit extent callback. */
+static int
+nbd_convert_extent (void *data, const char *metacontext, uint64_t offset,
+                    nbd_extent *entries, size_t nr_entries, int *error)
+{
+  nbd_extent_callback *cb = data;
+  uint32_t *array = malloc (nr_entries * 2 * sizeof *array);
+  size_t i;
+  int ret;
+  bool fail = false;
+
+  if (array == NULL) {
+    set_error (*error = errno, "malloc");
+    return -1;
+  }
+
+  for (i = 0; i < nr_entries; i++) {
+    array[i * 2] = entries[i].length;
+    array[i * 2 + 1] = entries[i].flags;
+    /* If an extent is larger than 32 bits, silently truncate the rest
+     * of the server's response; the client can then make progress
+     * instead of needing to see failure.  Rather than track the
+     * connection's alignment, just clamp the large extent to 4G-64M.
+     * However, if flags doesn't fit in 32 bits, it's better to inform
+     * the caller of an EOVERFLOW failure.
+     *
+     * Technically, a server with 64-bit answers is non-compliant if
+     * the client did not negotiate extended headers - contexts that
+     * include 64-bit flags should not have been negotiated in that
+     * case.
+     */
+    if (entries[i].length > UINT32_MAX) {
+      array[i++ * 2] = -MAX_REQUEST_SIZE;
+      break;
+    }
+    if (entries[i].flags > UINT32_MAX) {
+      *error = EOVERFLOW;
+      fail = true;
+      break;
+    }
+  }
+
+  ret = CALL_CALLBACK (*cb, metacontext, offset, array, i * 2, error);
+  free (array);
+  return fail ? -1 : ret;
+}
+
+static void
+nbd_convert_extent_free (void *data)
+{
+  nbd_extent_callback *cb = data;
+  FREE_CALLBACK (*cb);
+  free (cb);
+}
+
 /* Issue a read command and wait for the reply. */
 int
 nbd_unlocked_pread (struct nbd_handle *h, void *buf,
@@ -487,12 +542,23 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h,
                                nbd_completion_callback *completion,
                                uint32_t flags)
 {
-  struct command_cb cb = { .fn.extent = *extent,
+  nbd_extent_callback *shim = malloc (sizeof *shim);
+  struct command_cb cb = { .fn.extent.callback = nbd_convert_extent,
+                           .fn.extent.user_data = shim,
+                           .fn.extent.free = nbd_convert_extent_free,
                            .completion = *completion };

+  if (shim == NULL) {
+    set_error (errno, "malloc");
+    return -1;
+  }
+  *shim = *extent;
+  SET_CALLBACK_TO_NULL (*extent);
+
   if (h->strict & LIBNBD_STRICT_COMMANDS) {
     if (!h->structured_replies) {
       set_error (ENOTSUP, "server does not support structured replies");
+      FREE_CALLBACK (cb.fn.extent);
       return -1;
     }

@@ -500,11 +566,11 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h,
       set_error (ENOTSUP, "did not negotiate any metadata contexts, "
                  "either you did not call nbd_add_meta_context before "
                  "connecting or the server does not support it");
+      FREE_CALLBACK (cb.fn.extent);
       return -1;
     }
   }

-  SET_CALLBACK_TO_NULL (*extent);
   SET_CALLBACK_TO_NULL (*completion);
   return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset,
                                       count, EINVAL, NULL, &cb);
-- 
2.38.1



  parent reply	other threads:[~2022-11-15  0:08 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-14 22:41 [cross-project PATCH v2] NBD 64-bit extensions Eric Blake
2022-11-14 22:46 ` [PATCH v2 0/6] NBD spec changes for " Eric Blake
2022-11-14 22:46   ` [PATCH v2 1/6] spec: Recommend cap on NBD_REPLY_TYPE_BLOCK_STATUS length Eric Blake
2022-12-16 19:32     ` Vladimir Sementsov-Ogievskiy
2023-03-03 22:17       ` Eric Blake
2023-03-05  8:41         ` Wouter Verhelst
2023-03-06  8:48           ` [Libguestfs] " Laszlo Ersek
2023-03-06 13:48           ` Nir Soffer
2022-11-14 22:46   ` [PATCH v2 2/6] spec: Tweak description of maximum block size Eric Blake
2022-12-16 20:22     ` Vladimir Sementsov-Ogievskiy
2023-03-03 22:20       ` Eric Blake
2023-02-21 15:21     ` Wouter Verhelst
2023-03-03 22:26       ` Eric Blake
2023-03-05  8:45         ` Wouter Verhelst
2022-11-14 22:46   ` [PATCH v2 3/6] spec: Add NBD_OPT_EXTENDED_HEADERS Eric Blake
2022-12-19 18:26     ` Vladimir Sementsov-Ogievskiy
2023-02-22  9:49     ` Wouter Verhelst
2023-03-03 22:36       ` Eric Blake
2023-03-05  8:49         ` Wouter Verhelst
2022-11-14 22:46   ` [PATCH v2 4/6] spec: Allow 64-bit block status results Eric Blake
2022-11-14 22:46   ` [PATCH v2 5/6] spec: Introduce NBD_FLAG_BLOCK_STATUS_PAYLOAD Eric Blake
2023-02-22 10:05     ` Wouter Verhelst
2023-03-03 22:40       ` Eric Blake
2023-03-05  8:50         ` Wouter Verhelst
2022-11-14 22:46   ` [PATCH v2 6/6] RFC: spec: Introduce NBD_REPLY_TYPE_OFFSET_HOLE_EXT Eric Blake
2022-11-14 22:48 ` [PATCH v2 00/15] qemu patches for 64-bit NBD extensions Eric Blake
2022-11-14 22:48   ` [PATCH v2 01/15] nbd/client: Add safety check on chunk payload length Eric Blake
2022-11-14 22:48   ` [PATCH v2 02/15] nbd/server: Prepare for alternate-size headers Eric Blake
2022-11-14 22:48   ` [PATCH v2 03/15] nbd: Prepare for 64-bit request effect lengths Eric Blake
2022-11-14 22:48   ` [PATCH v2 04/15] nbd: Add types for extended headers Eric Blake
2022-11-14 22:48   ` [PATCH v2 05/15] nbd/server: Refactor handling of request payload Eric Blake
2022-11-14 22:48   ` [PATCH v2 06/15] nbd/server: Refactor to pass full request around Eric Blake
2022-11-14 22:48   ` [PATCH v2 07/15] nbd/server: Initial support for extended headers Eric Blake
2022-11-14 22:48   ` [PATCH v2 08/15] nbd/server: Support 64-bit block status Eric Blake
2022-11-14 22:48   ` [PATCH v2 09/15] nbd/client: Initial support for extended headers Eric Blake
2022-11-14 22:48   ` [PATCH v2 10/15] nbd/client: Accept 64-bit block status chunks Eric Blake
2022-11-14 22:48   ` [PATCH v2 11/15] nbd/client: Request extended headers during negotiation Eric Blake
2022-11-14 22:48   ` [PATCH v2 12/15] nbd/server: Prepare for per-request filtering of BLOCK_STATUS Eric Blake
2022-11-14 22:48   ` [PATCH v2 13/15] nbd/server: Add FLAG_PAYLOAD support to CMD_BLOCK_STATUS Eric Blake
2022-11-14 22:48   ` [PATCH v2 14/15] RFC: nbd/client: Accept 64-bit hole chunks Eric Blake
2022-11-14 22:48   ` [PATCH v2 15/15] RFC: nbd/server: Send 64-bit hole chunk Eric Blake
2022-11-14 22:51 ` [libnbd PATCH v2 00/23] libnbd 64-bit NBD extensions Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 01/23] block_status: Refactor array storage Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 02/23] internal: Refactor layout of replies in sbuf Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 03/23] protocol: Add definitions for extended headers Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 04/23] states: Prepare to send 64-bit requests Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 05/23] states: Prepare to receive 64-bit replies Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 06/23] states: Break deadlock if server goofs on extended replies Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 07/23] generator: Add struct nbd_extent in prep for 64-bit extents Eric Blake
2022-11-14 22:51   ` Eric Blake [this message]
2022-11-14 22:51   ` [libnbd PATCH v2 09/23] block_status: Accept 64-bit extents during block status Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 10/23] api: Add [aio_]nbd_block_status_64 Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 11/23] api: Add several functions for controlling extended headers Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 12/23] copy: Update nbdcopy to use 64-bit block status Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 13/23] dump: Update nbddump " Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 14/23] info: Expose extended-headers support through nbdinfo Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 15/23] info: Update nbdinfo --map to use 64-bit block status Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 16/23] examples: Update copy-libev " Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 17/23] ocaml: Add example for 64-bit extents Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 18/23] generator: Actually request extended headers Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 19/23] api: Add nbd_[aio_]opt_extended_headers() Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 20/23] interop: Add test of 64-bit block status Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 21/23] api: Add nbd_can_block_status_payload() Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 22/23] api: Add nbd_[aio_]block_status_filter() Eric Blake
2022-11-14 22:51   ` [libnbd PATCH v2 23/23] RFC: pread: Accept 64-bit holes Eric Blake

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221114225158.2186742-9-eblake@redhat.com \
    --to=eblake@redhat.com \
    --cc=libguestfs@redhat.com \
    --cc=nbd@other.debian.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.