All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: peter.maydell@linaro.org
Cc: qemu-devel@nongnu.org, "Alex Bennée" <alex.bennee@linaro.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Peter Crosthwaite" <crosthwaite.peter@gmail.com>,
	"Richard Henderson" <rth@twiddle.net>
Subject: [Qemu-devel] [PULL 17/24] cputlb: add tlb_flush_by_mmuidx async routines
Date: Fri, 24 Feb 2017 11:21:02 +0000	[thread overview]
Message-ID: <20170224112109.3147-18-alex.bennee@linaro.org> (raw)
In-Reply-To: <20170224112109.3147-1-alex.bennee@linaro.org>

This converts the remaining TLB flush routines to use async work when
detecting a cross-vCPU flush. The only minor complication is having to
serialise the var_list of MMU indexes into a form that can be punted
to an asynchronous job.

The pending_tlb_flush field on QOM's CPU structure also becomes a
bitfield rather than a boolean.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
 cputlb.c          | 110 +++++++++++++++++++++++++++++++++++++++++++-----------
 include/qom/cpu.h |   2 +-
 2 files changed, 89 insertions(+), 23 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 97e5c12de8..c50254be26 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -68,6 +68,11 @@
  * target_ulong even on 32 bit builds */
 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
 
+/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
+ */
+QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
+#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
+
 /* statistics */
 int tlb_flush_count;
 
@@ -102,7 +107,7 @@ static void tlb_flush_nocheck(CPUState *cpu)
 
     tb_unlock();
 
-    atomic_mb_set(&cpu->pending_tlb_flush, false);
+    atomic_mb_set(&cpu->pending_tlb_flush, 0);
 }
 
 static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
@@ -113,7 +118,8 @@ static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
 void tlb_flush(CPUState *cpu)
 {
     if (cpu->created && !qemu_cpu_is_self(cpu)) {
-        if (atomic_cmpxchg(&cpu->pending_tlb_flush, false, true) == true) {
+        if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
+            atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
             async_run_on_cpu(cpu, tlb_flush_global_async_work,
                              RUN_ON_CPU_NULL);
         }
@@ -122,17 +128,18 @@ void tlb_flush(CPUState *cpu)
     }
 }
 
-static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
+static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
-    unsigned long mmu_idx_bitmask = idxmap;
+    unsigned long mmu_idx_bitmask = data.host_int;
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
-    tlb_debug("start\n");
 
     tb_lock();
 
+    tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
+
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 
         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
@@ -145,12 +152,30 @@ static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 
     memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
 
+    tlb_debug("done\n");
+
     tb_unlock();
 }
 
 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 {
-    v_tlb_flush_by_mmuidx(cpu, idxmap);
+    tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
+
+    if (!qemu_cpu_is_self(cpu)) {
+        uint16_t pending_flushes = idxmap;
+        pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
+
+        if (pending_flushes) {
+            tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
+
+            atomic_or(&cpu->pending_tlb_flush, pending_flushes);
+            async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
+                             RUN_ON_CPU_HOST_INT(pending_flushes));
+        }
+    } else {
+        tlb_flush_by_mmuidx_async_work(cpu,
+                                       RUN_ON_CPU_HOST_INT(idxmap));
+    }
 }
 
 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
@@ -215,27 +240,26 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
     }
 }
 
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
+/* As we are going to hijack the bottom bits of the page address for a
+ * mmuidx bit mask we need to fail to build if we can't do that
+ */
+QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
+
+static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
+                                                run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
-    unsigned long mmu_idx_bitmap = idxmap;
-    int i, page, mmu_idx;
+    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
+    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
+    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
+    int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    int mmu_idx;
+    int i;
 
     assert_cpu_is_self(cpu);
-    tlb_debug("addr "TARGET_FMT_lx"\n", addr);
-
-    /* Check if we need to flush due to large pages.  */
-    if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
-        tlb_debug("forced full flush ("
-                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
-                  env->tlb_flush_addr, env->tlb_flush_mask);
-
-        v_tlb_flush_by_mmuidx(cpu, idxmap);
-        return;
-    }
 
-    addr &= TARGET_PAGE_MASK;
-    page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
+              page, addr, mmu_idx_bitmap);
 
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
@@ -251,6 +275,48 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
     tb_flush_jmp_cache(cpu, addr);
 }
 
+static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
+                                                          run_on_cpu_data data)
+{
+    CPUArchState *env = cpu->env_ptr;
+    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
+    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
+    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
+
+    tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
+
+    /* Check if we need to flush due to large pages.  */
+    if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
+        tlb_debug("forced full flush ("
+                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+                  env->tlb_flush_addr, env->tlb_flush_mask);
+
+        tlb_flush_by_mmuidx_async_work(cpu,
+                                       RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
+    } else {
+        tlb_flush_page_by_mmuidx_async_work(cpu, data);
+    }
+}
+
+void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
+{
+    target_ulong addr_and_mmu_idx;
+
+    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
+
+    /* This should already be page aligned */
+    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
+    addr_and_mmu_idx |= idxmap;
+
+    if (!qemu_cpu_is_self(cpu)) {
+        async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
+                         RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    } else {
+        tlb_check_page_and_flush_by_mmuidx_async_work(
+            cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    }
+}
+
 void tlb_flush_page_all(target_ulong addr)
 {
     CPUState *cpu;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index e80bf7a64a..3e61c880da 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -407,7 +407,7 @@ struct CPUState {
      * avoid potential races. The aim of the flag is to avoid
      * unnecessary flushes.
      */
-    bool pending_tlb_flush;
+    uint16_t pending_tlb_flush;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
-- 
2.11.0

  parent reply	other threads:[~2017-02-24 11:21 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-24 11:20 [Qemu-devel] [PULL 00/24] MTTCG Base enabling patches with ARM enablement Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 01/24] docs: new design document multi-thread-tcg.txt Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 02/24] mttcg: translate-all: Enable locking debug in a debug build Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 03/24] mttcg: Add missing tb_lock/unlock() in cpu_exec_step() Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 04/24] tcg: move TCG_MO/BAR types into own file Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 05/24] tcg: add options for enabling MTTCG Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 06/24] tcg: add kick timer for single-threaded vCPU emulation Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 07/24] tcg: rename tcg_current_cpu to tcg_current_rr_cpu Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 08/24] tcg: drop global lock during TCG code execution Alex Bennée
2017-02-27 12:48   ` Laurent Desnogues
2017-02-27 14:39     ` Alex Bennée
2017-03-03 20:59       ` Aaron Lindsay
2017-03-03 21:08         ` Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 09/24] tcg: remove global exit_request Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 10/24] tcg: enable tb_lock() for SoftMMU Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 11/24] tcg: enable thread-per-vCPU Alex Bennée
2017-02-27 12:48   ` Laurent Vivier
2017-02-27 14:38     ` Alex Bennée
2017-03-13 14:03       ` Laurent Vivier
2017-03-13 16:58         ` Alex Bennée
2017-03-13 18:21           ` Laurent Vivier
2017-03-16 17:31         ` Alex Bennée
2017-03-16 18:36           ` Laurent Vivier
2017-03-17 20:43         ` Alex Bennée
2017-03-18 11:19           ` Laurent Vivier
2017-03-20 11:19           ` Paolo Bonzini
2017-03-20 11:47             ` Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 12/24] tcg: handle EXCP_ATOMIC exception for system emulation Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 13/24] cputlb: add assert_cpu_is_self checks Alex Bennée
2017-02-24 11:20 ` [Qemu-devel] [PULL 14/24] cputlb: tweak qemu_ram_addr_from_host_nofail reporting Alex Bennée
2017-02-24 11:21 ` [Qemu-devel] [PULL 15/24] cputlb: introduce tlb_flush_* async work Alex Bennée
2017-02-24 11:21 ` [Qemu-devel] [PULL 16/24] cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap Alex Bennée
2017-02-24 11:21 ` Alex Bennée [this message]
2017-02-24 11:21 ` [Qemu-devel] [PULL 18/24] cputlb: atomically update tlb fields used by tlb_reset_dirty Alex Bennée
2017-02-24 11:21 ` [Qemu-devel] [PULL 19/24] cputlb: introduce tlb_flush_*_all_cpus[_synced] Alex Bennée
2017-02-24 11:21 ` [Qemu-devel] [PULL 20/24] target-arm/powerctl: defer cpu reset work to CPU context Alex Bennée
2017-02-24 11:21 ` [Qemu-devel] [PULL 21/24] target-arm: don't generate WFE/YIELD calls for MTTCG Alex Bennée
2017-02-24 11:21 ` [Qemu-devel] [PULL 22/24] target-arm: ensure all cross vCPUs TLB flushes complete Alex Bennée
2017-09-17 13:07   ` Dmitry Osipenko
2017-09-17 13:22     ` Alex Bennée
2017-09-17 13:46       ` Dmitry Osipenko
2017-09-18 10:10         ` Alex Bennée
2017-09-18 12:23           ` Dmitry Osipenko
2017-09-18 14:00             ` Alex Bennée
2017-09-18 15:32               ` Dmitry Osipenko
2017-02-24 11:21 ` [Qemu-devel] [PULL 23/24] hw/misc/imx6_src: defer clearing of SRC_SCR reset bits Alex Bennée
2017-02-24 11:21 ` [Qemu-devel] [PULL 24/24] tcg: enable MTTCG by default for ARM on x86 hosts Alex Bennée
2017-02-25 21:14 ` [Qemu-devel] [PULL 00/24] MTTCG Base enabling patches with ARM enablement Peter Maydell
2017-02-27  8:48   ` Christian Borntraeger
2017-02-27  9:11     ` Alex Bennée
2017-02-27  9:25       ` Christian Borntraeger
2017-02-27  9:35       ` Christian Borntraeger
2017-02-27 12:39 ` Paolo Bonzini
2017-02-27 15:48   ` Alex Bennée
2017-02-27 16:17     ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170224112109.3147-18-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=crosthwaite.peter@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.