All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Blake <eblake@redhat.com>
To: libguestfs@redhat.com
Cc: nsoffer@redhat.com, vsementsov@virtuozzo.com,
	qemu-devel@nongnu.org, qemu-block@nongnu.org,
	nbd@other.debian.org
Subject: [libnbd PATCH 08/13] block_status: Track 64-bit extents internally
Date: Fri,  3 Dec 2021 17:17:36 -0600	[thread overview]
Message-ID: <20211203231741.3901263-9-eblake@redhat.com> (raw)
In-Reply-To: <20211203231741.3901263-1-eblake@redhat.com>

When extended headers are in use, the server can send us 64-bit
extents, even for a 32-bit query (if the server knows the entire image
is data, for example).  For maximum flexibility, we are thus better
off storing 64-bit lengths internally, even if we have to convert it
back to 32-bit lengths when invoking the user's 32-bit callback.  The
next patch will then add a new API for letting the user access the
full 64-bit extent information.  The goal is to let both APIs work all
the time, regardless of the size extents that the server actually
answered with.

Note that when using the old nbd_block_status() API with a server that
lacks extended headers, we now add a double-conversion speed penalty
(converting the server's 32-bit answer into 64-bit internally and back
to 32-bit for the callback).  But the speed penalty will not be a
problem for applications using the new nbd_block_status_64() API (we
have to give a 64-bit answer no matter what the server answered), and
ideally the situation will become less common as more servers learn
extended headers.  So for now I chose to unconditionally use a 64-bit
internal representation; but if it turns out to have noticeable
degredation, we could tweak things to conditionally retain 32-bit
internal representation for servers lacking extended headers at the
expense of more code maintenance.

One of the trickier aspects of this patch is auditing that both the
user's extent and our malloc'd shim get cleaned up once on all
possible paths, so that there is neither a leak nor a double free.
---
 lib/internal.h                      |  7 +++-
 generator/states-reply-structured.c | 31 ++++++++++-----
 lib/handle.c                        |  4 +-
 lib/rw.c                            | 59 ++++++++++++++++++++++++++++-
 4 files changed, 85 insertions(+), 16 deletions(-)

diff --git a/lib/internal.h b/lib/internal.h
index 06f3a65c..4800df83 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -75,7 +75,7 @@ struct export {

 struct command_cb {
   union {
-    nbd_extent_callback extent;
+    nbd_extent64_callback extent;
     nbd_chunk_callback chunk;
     nbd_list_callback list;
     nbd_context_callback context;
@@ -286,7 +286,10 @@ struct nbd_handle {

   /* When receiving block status, this is used. */
   uint32_t bs_contextid;
-  uint32_t *bs_entries;
+  union {
+    nbd_extent *normal; /* Our 64-bit preferred internal form */
+    uint32_t *narrow;   /* 32-bit form of NBD_REPLY_TYPE_BLOCK_STATUS */
+  } bs_entries;

   /* Commands which are waiting to be issued [meaning the request
    * packet is sent to the server].  This is used as a simple linked
diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c
index a3e0e2ac..71c761e9 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -494,6 +494,7 @@ STATE_MACHINE {
  REPLY.STRUCTURED_REPLY.RECV_BS_CONTEXTID:
   struct command *cmd = h->reply_cmd;
   uint32_t length;
+  uint32_t count;

   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return 0;
@@ -508,15 +509,19 @@ STATE_MACHINE {
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
     assert (length >= 12);
     length -= sizeof h->bs_contextid;
+    count = length / (2 * sizeof (uint32_t));

-    free (h->bs_entries);
-    h->bs_entries = malloc (length);
-    if (h->bs_entries == NULL) {
+    /* Read raw data into a subset of h->bs_entries, then expand it
+     * into place later later during byte-swapping.
+     */
+    free (h->bs_entries.normal);
+    h->bs_entries.normal = malloc (count * sizeof *h->bs_entries.normal);
+    if (h->bs_entries.normal == NULL) {
       SET_NEXT_STATE (%.DEAD);
       set_error (errno, "malloc");
       return 0;
     }
-    h->rbuf = h->bs_entries;
+    h->rbuf = h->bs_entries.narrow;
     h->rlen = length;
     SET_NEXT_STATE (%RECV_BS_ENTRIES);
   }
@@ -528,6 +533,7 @@ STATE_MACHINE {
   uint32_t count;
   size_t i;
   uint32_t context_id;
+  uint32_t *raw;
   struct meta_context *meta_context;

   switch (recv_into_rbuf (h)) {
@@ -542,15 +548,20 @@ STATE_MACHINE {
     assert (cmd); /* guaranteed by CHECK */
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
     assert (CALLBACK_IS_NOT_NULL (cmd->cb.fn.extent));
-    assert (h->bs_entries);
+    assert (h->bs_entries.normal);
     assert (length >= 12);
-    count = (length - sizeof h->bs_contextid) / sizeof *h->bs_entries;
+    count = (length - sizeof h->bs_contextid) / (2 * sizeof (uint32_t));

     /* Need to byte-swap the entries returned, but apart from that we
-     * don't validate them.
+     * don't validate them.  Reverse order is essential, since we are
+     * expanding in-place from narrow to wider type.
      */
-    for (i = 0; i < count; ++i)
-      h->bs_entries[i] = be32toh (h->bs_entries[i]);
+    raw = h->bs_entries.narrow;
+    for (i = count; i > 0; ) {
+      --i;
+      h->bs_entries.normal[i].flags = be32toh (raw[i * 2 + 1]);
+      h->bs_entries.normal[i].length = be32toh (raw[i * 2]);
+    }

     /* Look up the context ID. */
     context_id = be32toh (h->bs_contextid);
@@ -566,7 +577,7 @@ STATE_MACHINE {

       if (CALL_CALLBACK (cmd->cb.fn.extent,
                          meta_context->name, cmd->offset,
-                         h->bs_entries, count,
+                         h->bs_entries.normal, count,
                          &error) == -1)
         if (cmd->error == 0)
           cmd->error = error ? error : EPROTO;
diff --git a/lib/handle.c b/lib/handle.c
index cbb37e89..74fe87ec 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -1,5 +1,5 @@
 /* NBD client library in userspace
- * Copyright (C) 2013-2020 Red Hat Inc.
+ * Copyright (C) 2013-2021 Red Hat Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -123,7 +123,7 @@ nbd_close (struct nbd_handle *h)
   /* Free user callbacks first. */
   nbd_unlocked_clear_debug_callback (h);

-  free (h->bs_entries);
+  free (h->bs_entries.normal);
   nbd_internal_reset_size_and_flags (h);
   nbd_internal_free_option (h);
   free_cmd_list (h->cmds_to_issue);
diff --git a/lib/rw.c b/lib/rw.c
index 16c2e848..f36f4e15 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -42,6 +42,50 @@ wait_for_command (struct nbd_handle *h, int64_t cookie)
   return r == -1 ? -1 : 0;
 }

+/* Convert from 64-bit to 32-bit extent callback. */
+static int
+nbd_convert_extent (void *data, const char *metacontext, uint64_t offset,
+                    nbd_extent *entries, size_t nr_entries, int *error)
+{
+  nbd_extent_callback *cb = data;
+  uint32_t *array = malloc (nr_entries * 2 * sizeof *array);
+  size_t i;
+  int ret;
+
+  if (array == NULL) {
+    set_error (*error = errno, "malloc");
+    return -1;
+  }
+
+  for (i = 0; i < nr_entries; i++) {
+    array[i * 2] = entries[i].length;
+    array[i * 2 + 1] = entries[i].flags;
+    /* If an extent is larger than 32 bits, silently truncate the rest
+     * of the server's response.  Technically, such a server was
+     * non-compliant if the client did not negotiate extended headers,
+     * but it is easier to let the caller make progress than to make
+     * the call fail.  Rather than track the connection's alignment,
+     * just blindly truncate the large extent to 4G-64M.
+     */
+    if (entries[i].length > UINT32_MAX) {
+      array[i++ * 2] = -MAX_REQUEST_SIZE;
+      break;
+    }
+  }
+
+  ret = CALL_CALLBACK (*cb, metacontext, offset, array, i * 2, error);
+  free (array);
+  return ret;
+}
+
+static void
+nbd_convert_extent_free (void *data)
+{
+  nbd_extent_callback *cb = data;
+  FREE_CALLBACK (*cb);
+  free (cb);
+}
+
 /* Issue a read command and wait for the reply. */
 int
 nbd_unlocked_pread (struct nbd_handle *h, void *buf,
@@ -469,12 +513,23 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h,
                                nbd_completion_callback *completion,
                                uint32_t flags)
 {
-  struct command_cb cb = { .fn.extent = *extent,
+  nbd_extent_callback *shim = malloc (sizeof *shim);
+  struct command_cb cb = { .fn.extent.callback = nbd_convert_extent,
+                           .fn.extent.user_data = shim,
+                           .fn.extent.free = nbd_convert_extent_free,
                            .completion = *completion };

+  if (shim == NULL) {
+    set_error (errno, "malloc");
+    return -1;
+  }
+  *shim = *extent;
+  SET_CALLBACK_TO_NULL (*extent);
+
   if (h->strict & LIBNBD_STRICT_COMMANDS) {
     if (!h->structured_replies) {
       set_error (ENOTSUP, "server does not support structured replies");
+      FREE_CALLBACK (cb.fn.extent);
       return -1;
     }

@@ -482,11 +537,11 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h,
       set_error (ENOTSUP, "did not negotiate any metadata contexts, "
                  "either you did not call nbd_add_meta_context before "
                  "connecting or the server does not support it");
+      FREE_CALLBACK (cb.fn.extent);
       return -1;
     }
   }

-  SET_CALLBACK_TO_NULL (*extent);
   SET_CALLBACK_TO_NULL (*completion);
   return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset,
                                       count, EINVAL, NULL, &cb);
-- 
2.33.1



  parent reply	other threads:[~2021-12-03 23:29 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-03 23:13 RFC for NBD protocol extension: extended headers Eric Blake
2021-12-03 23:14 ` [PATCH] spec: Add NBD_OPT_EXTENDED_HEADERS Eric Blake
2021-12-06 11:40   ` Vladimir Sementsov-Ogievskiy
2021-12-06 23:00     ` Eric Blake
2021-12-07  9:08       ` Vladimir Sementsov-Ogievskiy
2021-12-10 18:05         ` Vladimir Sementsov-Ogievskiy
2021-12-07 16:14       ` Wouter Verhelst
2022-03-22 15:10         ` Eric Blake
2021-12-10 18:16   ` Vladimir Sementsov-Ogievskiy
2022-03-24 17:31   ` Wouter Verhelst
2022-03-25  0:00     ` Eric Blake
2022-10-04 21:21   ` Eric Blake
2021-12-03 23:15 ` [PATCH 00/14] qemu patches for NBD_OPT_EXTENDED_HEADERS Eric Blake
2021-12-03 23:15   ` [PATCH 01/14] nbd/server: Minor cleanups Eric Blake
2021-12-06 12:03     ` Vladimir Sementsov-Ogievskiy
2021-12-03 23:15   ` [PATCH 02/14] qemu-io: Utilize 64-bit status during map Eric Blake
2021-12-06 12:06     ` Vladimir Sementsov-Ogievskiy
2021-12-03 23:15   ` [PATCH 03/14] qemu-io: Allow larger write zeroes under no fallback Eric Blake
2021-12-06 12:26     ` Vladimir Sementsov-Ogievskiy
2021-12-03 23:15   ` [PATCH 04/14] nbd/client: Add safety check on chunk payload length Eric Blake
2021-12-06 12:33     ` Vladimir Sementsov-Ogievskiy
2021-12-03 23:15   ` [PATCH 05/14] nbd/server: Prepare for alternate-size headers Eric Blake
2021-12-03 23:15   ` [PATCH 06/14] nbd: Prepare for 64-bit requests Eric Blake
2021-12-03 23:15   ` [PATCH 07/14] nbd: Add types for extended headers Eric Blake
2021-12-03 23:15   ` [PATCH 08/14] nbd/server: Initial support " Eric Blake
2021-12-03 23:15   ` [PATCH 09/14] nbd/server: Support 64-bit block status Eric Blake
2021-12-03 23:15   ` [PATCH 10/14] nbd/client: Initial support for extended headers Eric Blake
2021-12-03 23:15   ` [PATCH 11/14] nbd/client: Accept 64-bit hole chunks Eric Blake
2021-12-03 23:15   ` [PATCH 12/14] nbd/client: Accept 64-bit block status chunks Eric Blake
2021-12-03 23:15   ` [PATCH 13/14] nbd/client: Request extended headers during negotiation Eric Blake
2021-12-03 23:15   ` [PATCH 14/14] do not apply: nbd/server: Send 64-bit hole chunk Eric Blake
2021-12-03 23:17 ` [libnbd PATCH 00/13] libnbd patches for NBD_OPT_EXTENDED_HEADERS Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 01/13] golang: Simplify nbd_block_status callback array copy Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 02/13] block_status: Refactor array storage Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 03/13] protocol: Add definitions for extended headers Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 04/13] protocol: Prepare to send 64-bit requests Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 05/13] protocol: Prepare to receive 64-bit replies Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 06/13] protocol: Accept 64-bit holes during pread Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 07/13] generator: Add struct nbd_extent in prep for 64-bit extents Eric Blake
2021-12-03 23:17   ` Eric Blake [this message]
2021-12-03 23:17   ` [libnbd PATCH 09/13] block_status: Accept 64-bit extents during block status Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 10/13] api: Add [aio_]nbd_block_status_64 Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 11/13] api: Add three functions for controlling extended headers Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 12/13] generator: Actually request " Eric Blake
2021-12-03 23:17   ` [libnbd PATCH 13/13] interop: Add test of 64-bit block status Eric Blake
2021-12-10  8:16   ` [Libguestfs] [libnbd PATCH 00/13] libnbd patches for NBD_OPT_EXTENDED_HEADERS Laszlo Ersek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211203231741.3901263-9-eblake@redhat.com \
    --to=eblake@redhat.com \
    --cc=libguestfs@redhat.com \
    --cc=nbd@other.debian.org \
    --cc=nsoffer@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=vsementsov@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.