All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 15/36] memory: fix race between TCG and accesses to dirty bitmap
Date: Tue, 20 Aug 2019 08:59:34 +0200	[thread overview]
Message-ID: <1566284395-30287-16-git-send-email-pbonzini@redhat.com> (raw)
In-Reply-To: <1566284395-30287-1-git-send-email-pbonzini@redhat.com>

There is a race between TCG and accesses to the dirty log:

      vCPU thread                  reader thread
      -----------------------      -----------------------
      TLB check -> slow path
        notdirty_mem_write
          write to RAM
          set dirty flag
                                   clear dirty flag
      TLB check -> fast path
                                   read memory
        write to RAM

Fortunately, in order to fix it, no change is required to the
vCPU thread.  However, the reader thread must delay the read after
the vCPU thread has finished the write.  This can be approximated
conservatively by run_on_cpu, which waits for the end of the current
translation block.

A similar technique is used by KVM, which has to do a synchronous TLB
flush after doing a test-and-clear of the dirty-page flags.

Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c                | 31 +++++++++++++++++++++++++++++++
 include/exec/memory.h | 12 ++++++++++++
 memory.c              | 10 +++++++++-
 migration/ram.c       |  1 +
 4 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/exec.c b/exec.c
index 3e78de3..ae68f72 100644
--- a/exec.c
+++ b/exec.c
@@ -198,6 +198,7 @@ typedef struct subpage_t {
 
 static void io_mem_init(void);
 static void memory_map_init(void);
+static void tcg_log_global_after_sync(MemoryListener *listener);
 static void tcg_commit(MemoryListener *listener);
 
 static MemoryRegion io_mem_watch;
@@ -906,6 +907,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
     newas->cpu = cpu;
     newas->as = as;
     if (tcg_enabled()) {
+        newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync;
         newas->tcg_as_listener.commit = tcg_commit;
         memory_listener_register(&newas->tcg_as_listener, as);
     }
@@ -3143,6 +3145,35 @@ void address_space_dispatch_free(AddressSpaceDispatch *d)
     g_free(d);
 }
 
+static void do_nothing(CPUState *cpu, run_on_cpu_data d)
+{
+}
+
+static void tcg_log_global_after_sync(MemoryListener *listener)
+{
+    CPUAddressSpace *cpuas;
+
+    /* Wait for the CPU to end the current TB.  This avoids the following
+     * incorrect race:
+     *
+     *      vCPU                         migration
+     *      ----------------------       -------------------------
+     *      TLB check -> slow path
+     *        notdirty_mem_write
+     *          write to RAM
+     *          mark dirty
+     *                                   clear dirty flag
+     *      TLB check -> fast path
+     *                                   read memory
+     *        write to RAM
+     *
+     * by pushing the migration thread's memory read after the vCPU thread has
+     * written the memory.
+     */
+    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
+    run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
+}
+
 static void tcg_commit(MemoryListener *listener)
 {
     CPUAddressSpace *cpuas;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index bb0961d..b6bcf31 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -419,6 +419,7 @@ struct MemoryListener {
     void (*log_clear)(MemoryListener *listener, MemoryRegionSection *section);
     void (*log_global_start)(MemoryListener *listener);
     void (*log_global_stop)(MemoryListener *listener);
+    void (*log_global_after_sync)(MemoryListener *listener);
     void (*eventfd_add)(MemoryListener *listener, MemoryRegionSection *section,
                         bool match_data, uint64_t data, EventNotifier *e);
     void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section,
@@ -1682,6 +1683,17 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
 void memory_global_dirty_log_sync(void);
 
 /**
+ * memory_global_dirty_log_sync: synchronize the dirty log for all memory
+ *
+ * Synchronizes the vCPUs with a thread that is reading the dirty bitmap.
+ * This function must be called after the dirty log bitmap is cleared, and
+ * before dirty guest memory pages are read.  If you are using
+ * #DirtyBitmapSnapshot, memory_region_snapshot_and_clear_dirty() takes
+ * care of doing this.
+ */
+void memory_global_after_dirty_log_sync(void);
+
+/**
  * memory_region_transaction_begin: Start a transaction.
  *
  * During a transaction, changes will be accumulated and made visible
diff --git a/memory.c b/memory.c
index e42d63a..edd0c13 100644
--- a/memory.c
+++ b/memory.c
@@ -2127,9 +2127,12 @@ DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr,
                                                             hwaddr size,
                                                             unsigned client)
 {
+    DirtyBitmapSnapshot *snapshot;
     assert(mr->ram_block);
     memory_region_sync_dirty_bitmap(mr);
-    return cpu_physical_memory_snapshot_and_clear_dirty(mr, addr, size, client);
+    snapshot = cpu_physical_memory_snapshot_and_clear_dirty(mr, addr, size, client);
+    memory_global_after_dirty_log_sync();
+    return snapshot;
 }
 
 bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snap,
@@ -2620,6 +2623,11 @@ void memory_global_dirty_log_sync(void)
     memory_region_sync_dirty_bitmap(NULL);
 }
 
+void memory_global_after_dirty_log_sync(void)
+{
+    MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward);
+}
+
 static VMChangeStateEntry *vmstate_change;
 
 void memory_global_dirty_log_start(void)
diff --git a/migration/ram.c b/migration/ram.c
index 889148d..4e3a6ae 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1847,6 +1847,7 @@ static void migration_bitmap_sync(RAMState *rs)
     rcu_read_unlock();
     qemu_mutex_unlock(&rs->bitmap_mutex);
 
+    memory_global_after_dirty_log_sync();
     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
 
     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-- 
1.8.3.1




  parent reply	other threads:[~2019-08-20  7:12 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-20  6:59 [Qemu-devel] [PULL 00/36] QEMU patches for 2018-08-20 Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 01/36] kvm: i386: halt poll control MSR support Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 02/36] target-i386: adds PV_SCHED_YIELD CPUID feature bit Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 03/36] loader: Handle memory-mapped ELFs Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 04/36] elf-ops.h: Map into memory the ELF to load Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 05/36] hw/i386/pc: Map into memory the initrd Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 06/36] memory: assert on out of scope notification Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 07/36] configure: Define target access alignment in configure Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 08/36] block: fix NetBSD qemu-iotests failure Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 09/36] 9p: simplify source file selection Paolo Bonzini
2020-11-03 20:31   ` Philippe Mathieu-Daudé
2020-11-03 20:31     ` Philippe Mathieu-Daudé
2019-08-20  6:59 ` [Qemu-devel] [PULL 10/36] target-i386: kvm: 'kvm_get_supported_msrs' cleanup Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 11/36] test-throttle: Fix uninitialized use of burst_length Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 12/36] tests: Fix uninitialized byte in test_visitor_in_fuzz Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 13/36] i386/kvm: initialize struct at full before ioctl call Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 14/36] target/i386: Return 'indefinite integer value' for invalid SSE fp->int conversions Paolo Bonzini
2019-08-20  6:59 ` Paolo Bonzini [this message]
2019-08-26 12:19   ` [Qemu-devel] [PULL 15/36] memory: fix race between TCG and accesses to dirty bitmap dovgaluk
2019-09-12  6:54     ` Pavel Dovgalyuk
2019-09-12 17:43       ` Richard Henderson
2019-09-12 22:16         ` Paolo Bonzini
2019-09-12 12:45     ` Paolo Bonzini
2022-08-02 16:17   ` Peter Maydell
2019-08-20  6:59 ` [Qemu-devel] [PULL 16/36] mc146818rtc: Remove reset notifiers Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 17/36] timer: " Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 18/36] replay: Remove host_clock_last Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 19/36] timer: last, remove last bits of last Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 20/36] kconfig: do not select VMMOUSE Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 21/36] replay: add missing fix for internal function Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 22/36] replay: document development rules Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 23/36] util/qemu-timer: refactor deadline calculation for external timers Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 24/36] replay: fix replay shutdown Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 25/36] replay: refine replay-time module Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 26/36] replay: rename step-related variables and functions Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 27/36] icount: clean up cpu_can_io at the entry to the block Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 28/36] icount: remove unnecessary gen_io_end calls Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 29/36] cpus-common: nuke finish_safe_work Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 30/36] cpus-common: assert BQL nesting within cpu-exclusive sections Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 31/36] kvm: vmxcap: Enhance with latest features Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 32/36] HACKING: Document 'struct' keyword usage Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 33/36] migration: do not rom_reset() during incoming migration Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 34/36] test-bitmap: test set 1 bit case for bitmap_set Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 35/36] scsi: lsi: exit infinite loop while executing script (CVE-2019-12068) Paolo Bonzini
2019-08-20  6:59 ` [Qemu-devel] [PULL 36/36] x86: Intel AVX512_BF16 feature enabling Paolo Bonzini
2019-08-20  7:42 ` [Qemu-devel] [PULL 00/36] QEMU patches for 2018-08-20 no-reply
2019-08-20  9:26 ` Peter Maydell
2019-08-20 23:42 ` no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1566284395-30287-16-git-send-email-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.