qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: qemu-block@nongnu.org
Cc: qemu-devel@nongnu.org, mreitz@redhat.com, kwolf@redhat.com,
	vsementsov@virtuozzo.com, den@openvz.org
Subject: [PATCH v5 4/6] qcow2: introduce inflight-write-counters
Date: Fri, 26 Mar 2021 23:00:43 +0300	[thread overview]
Message-ID: <20210326200045.363290-5-vsementsov@virtuozzo.com> (raw)
In-Reply-To: <20210326200045.363290-1-vsementsov@virtuozzo.com>

We have a bug in qcow2: assume we've started data write into host
cluster A. s->lock is unlocked. During the write the refcount of
cluster A may become zero, cluster may be reallocated for other needs,
and our in-flight write become a use-after-free. More details will be
in the further commit which actually fixes the bug.

For now, let's prepare infrastructure for the following fix. We are
going to track these in-flight data writes. So, we create a hash map

  cluster_index -> Qcow2InFlightRefcount

For now, add only basic structure and simple counting logic. No guest
write is actually counted, we only add infrastructure.
Qcow2InFlightRefcount will be expanded in the following commit, that's
why we need a structure.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/qcow2.h          | 16 ++++++++
 block/qcow2-refcount.c | 86 ++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.c          |  5 +++
 3 files changed, 107 insertions(+)

diff --git a/block/qcow2.h b/block/qcow2.h
index 0fe5f74ed3..b25ef06111 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -420,6 +420,17 @@ typedef struct BDRVQcow2State {
      * is to convert the image with the desired compression type set.
      */
     Qcow2CompressionType compression_type;
+
+    /*
+     * inflight_writes_counters:
+     *   Map cluster index (int64_t) -> Qcow2InFlightWriteCounter
+     *
+     * The map contains entries only for clusters that have in-flight data
+     * (not-metadata) writes. So Qcow2InFlightWriteCounter::inflight_writes_cnt
+     * is always (except for when being removed in update_inflight_write_cnt())
+     * >= 1 for stored elements.
+     */
+    GHashTable *inflight_writes_counters;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
@@ -896,6 +907,11 @@ int qcow2_shrink_reftable(BlockDriverState *bs);
 int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
 int qcow2_detect_metadata_preallocation(BlockDriverState *bs);
 
+void qcow2_inflight_writes_inc(BlockDriverState *bs, int64_t offset,
+                               int64_t length);
+void qcow2_inflight_writes_dec(BlockDriverState *bs, int64_t offset,
+                               int64_t length);
+
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 1369724b41..eedc83ea4a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -799,6 +799,92 @@ found:
     }
 }
 
+typedef struct Qcow2InFlightWriteCounter {
+    /*
+     * Number of in-flight writes to the cluster, always > 0, as when it becomes
+     * 0 the entry is removed from s->inflight_writes_counters.
+     */
+    uint64_t inflight_writes_cnt;
+} Qcow2InFlightWriteCounter;
+
+/* Find Qcow2InFlightWriteCounter corresponding to @cluster_index */
+static Qcow2InFlightWriteCounter *find_infl_wr(BDRVQcow2State *s,
+                                               int64_t cluster_index)
+{
+    Qcow2InFlightWriteCounter *infl;
+
+    if (!s->inflight_writes_counters) {
+        return NULL;
+    }
+
+    infl = g_hash_table_lookup(s->inflight_writes_counters, &cluster_index);
+
+    if (infl) {
+        assert(infl->inflight_writes_cnt > 0);
+    }
+
+    return infl;
+}
+
+/*
+ * The function is intended to be called with decrease=false before writing
+ * guest data and with decrease=true after write finish.
+ */
+static void coroutine_fn
+update_inflight_write_cnt(BlockDriverState *bs, int64_t offset, int64_t length,
+                          bool decrease)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t start, last, cluster_index;
+
+    start = start_of_cluster(s, offset) >> s->cluster_bits;
+    last = start_of_cluster(s, offset + length - 1) >> s->cluster_bits;
+    for (cluster_index = start; cluster_index <= last; cluster_index++) {
+        Qcow2InFlightWriteCounter *infl = find_infl_wr(s, cluster_index);
+
+        if (!decrease) {
+            if (!infl) {
+                infl = g_new0(Qcow2InFlightWriteCounter, 1);
+                g_hash_table_insert(s->inflight_writes_counters,
+                                    g_memdup(&cluster_index,
+                                             sizeof(cluster_index)), infl);
+            }
+            infl->inflight_writes_cnt++;
+            continue;
+        }
+
+        /* decrease */
+        assert(infl);
+        assert(infl->inflight_writes_cnt >= 1);
+
+        infl->inflight_writes_cnt--;
+
+        if (infl->inflight_writes_cnt == 0) {
+            g_hash_table_remove(s->inflight_writes_counters, &cluster_index);
+        }
+    }
+}
+
+/*
+ * Works both with locked and unlocked s->lock. It just doesn't touch s->lock in
+ * contrast to qcow2_inflight_writes_dec()
+ */
+void qcow2_inflight_writes_inc(BlockDriverState *bs, int64_t offset,
+                               int64_t length)
+{
+    update_inflight_write_cnt(bs, offset, length, false);
+}
+
+/*
+ * Called with s->lock not locked by caller. Will take s->lock only if need to
+ * actually discard some clusters.
+ */
+void qcow2_inflight_writes_dec(BlockDriverState *bs, int64_t offset,
+                               int64_t length)
+{
+    update_inflight_write_cnt(bs, offset, length, true);
+}
+
 /* XXX: cache several refcount block clusters ? */
 /* @addend is the absolute value of the addend; if @decrease is set, @addend
  * will be subtracted from the current refcount, otherwise it will be added */
diff --git a/block/qcow2.c b/block/qcow2.c
index 0db1227ac9..0a5bd4ea4e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1833,6 +1833,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
 #endif
 
     qemu_co_queue_init(&s->thread_task_queue);
+    s->inflight_writes_counters =
+        g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, g_free);
 
     return ret;
 
@@ -2709,6 +2711,9 @@ static void qcow2_close(BlockDriverState *bs)
     g_free(s->image_backing_file);
     g_free(s->image_backing_format);
 
+    assert(g_hash_table_size(s->inflight_writes_counters) == 0);
+    g_hash_table_unref(s->inflight_writes_counters);
+
     if (has_data_file(bs)) {
         bdrv_unref_child(bs, s->data_file);
         s->data_file = NULL;
-- 
2.29.2



  parent reply	other threads:[~2021-03-26 20:26 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-26 20:00 [PATCH v5 0/6] qcow2: fix parallel rewrite and discard (lockless) Vladimir Sementsov-Ogievskiy
2021-03-26 20:00 ` [PATCH v5 1/6] iotests: add qcow2-discard-during-rewrite Vladimir Sementsov-Ogievskiy
2021-03-26 20:00 ` [PATCH v5 2/6] qcow2: fix cache discarding in update_refcount() Vladimir Sementsov-Ogievskiy
2021-04-07 16:34   ` Alberto Garcia
2021-03-26 20:00 ` [PATCH v5 3/6] qcow2: introduce is_cluster_free() helper Vladimir Sementsov-Ogievskiy
2021-03-26 20:00 ` Vladimir Sementsov-Ogievskiy [this message]
2021-03-30 12:05   ` [PATCH v5 4/6] qcow2: introduce inflight-write-counters Vladimir Sementsov-Ogievskiy
2021-03-26 20:00 ` [PATCH v5 5/6] qcow2: consider in-flight-writes when freeing clusters Vladimir Sementsov-Ogievskiy
2021-03-26 20:00 ` [PATCH v5 6/6] qcow2: do not discard host clusters during in-flight writes Vladimir Sementsov-Ogievskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210326200045.363290-5-vsementsov@virtuozzo.com \
    --to=vsementsov@virtuozzo.com \
    --cc=den@openvz.org \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).