All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: alex.bennee@linaro.org
Subject: [PATCH 1/1] accel/tcg: Replace parallel_cpus with cpu->cflags_base
Date: Wed, 24 Feb 2021 17:07:08 -0800	[thread overview]
Message-ID: <20210225010708.288696-1-richard.henderson@linaro.org> (raw)

Precompute the initial tb->cflags value for a cpu from its cluster
and the number of cpus live in the system.  This avoids having to
compute this constant data every time we look up a TB.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
Based-on: <20210224165811.11567-1-alex.bennee@linaro.org>
("Experimenting with tb-lookup tweaks")
---
 accel/tcg/tcg-accel-ops.h       |  1 +
 include/exec/exec-all.h         |  5 +----
 include/hw/core/cpu.h           |  2 ++
 accel/tcg/cpu-exec.c            |  3 ---
 accel/tcg/tcg-accel-ops-mttcg.c |  3 +--
 accel/tcg/tcg-accel-ops-rr.c    |  2 +-
 accel/tcg/tcg-accel-ops.c       | 10 ++++++++++
 accel/tcg/translate-all.c       |  1 -
 linux-user/main.c               |  1 +
 linux-user/sh4/signal.c         |  2 +-
 linux-user/syscall.c            | 18 ++++++++++--------
 11 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h
index 48130006de..656c922a67 100644
--- a/accel/tcg/tcg-accel-ops.h
+++ b/accel/tcg/tcg-accel-ops.h
@@ -17,5 +17,6 @@
 void tcg_cpus_destroy(CPUState *cpu);
 int tcg_cpus_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
+void tcg_set_cpus_cflags(uint32_t cflags_base);
 
 #endif /* TCG_CPUS_H */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 75f8c3981a..074d88f99a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -510,8 +510,6 @@ struct TranslationBlock {
     uintptr_t jmp_dest[2];
 };
 
-extern bool parallel_cpus;
-
 /* Hide the qatomic_read to make code a little easier on the eyes */
 static inline uint32_t tb_cflags(const TranslationBlock *tb)
 {
@@ -521,8 +519,7 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb)
 /* current cflags for hashing/comparison */
 static inline uint32_t curr_cflags(CPUState *cpu)
 {
-    uint32_t cflags = deposit32(0, CF_CLUSTER_SHIFT, 8, cpu->cluster_index);
-    cflags |= parallel_cpus ? CF_PARALLEL : 0;
+    uint32_t cflags = cpu->cflags_base;
     cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
     return cflags;
 }
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index c005d3dc2d..5fd397f37d 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -282,6 +282,7 @@ struct qemu_work_item;
  *   to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
  *   be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER
  *   QOM parent.
+ * @cflags_base: Precompute @cluster_index and #CF_PARALLEL for this cpu.
  * @nr_cores: Number of cores within this CPU package.
  * @nr_threads: Number of threads within this CPU.
  * @running: #true if CPU is currently running (lockless).
@@ -412,6 +413,7 @@ struct CPUState {
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index;
     int cluster_index;
+    uint32_t cflags_base;
     uint32_t halted;
     uint32_t can_do_io;
     int32_t exception_index;
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 931da96c2b..bdfa036ac8 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -267,8 +267,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
             mmap_unlock();
         }
 
-        /* Since we got here, we know that parallel_cpus must be true.  */
-        parallel_cpus = false;
         cpu_exec_enter(cpu);
         /* execute the generated code */
         trace_exec_tb(tb, pc);
@@ -296,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
      * the execution.
      */
     g_assert(cpu_in_exclusive_context(cpu));
-    parallel_cpus = true;
     cpu->running = false;
     end_exclusive();
 }
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 42973fb062..6676e05182 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
     char thread_name[VCPU_THREAD_NAME_SIZE];
 
     g_assert(tcg_enabled());
-
-    parallel_cpus = (current_machine->smp.max_cpus > 1);
+    tcg_set_cpus_cflags(current_machine->smp.max_cpus > 1 ? CF_PARALLEL : 0);
 
     cpu->thread = g_malloc0(sizeof(QemuThread));
     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 4a66055e0d..c10868f736 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu)
     static QemuThread *single_tcg_cpu_thread;
 
     g_assert(tcg_enabled());
-    parallel_cpus = false;
+    tcg_set_cpus_cflags(0);
 
     if (!single_tcg_cpu_thread) {
         cpu->thread = g_malloc0(sizeof(QemuThread));
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 6144d9df87..6bf260acf0 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -41,6 +41,16 @@
 
 /* common functionality among all TCG variants */
 
+void tcg_set_cpus_cflags(uint32_t cflags_base)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu->cflags_base = deposit32(cflags_base, CF_CLUSTER_SHIFT, 8,
+                                     cpu->cluster_index);
+    }
+}
+
 void tcg_cpus_destroy(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 0b0bfd35ab..ec46b88bfe 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE];
 TCGContext tcg_init_ctx;
 __thread TCGContext *tcg_ctx;
 TBContext tb_ctx;
-bool parallel_cpus;
 
 static void page_table_config_init(void)
 {
diff --git a/linux-user/main.c b/linux-user/main.c
index 81f48ff54e..c37040ceec 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -205,6 +205,7 @@ CPUArchState *cpu_copy(CPUArchState *env)
     /* Reset non arch specific state */
     cpu_reset(new_cpu);
 
+    new_cpu->cflags_base = cpu->cflags_base;
     memcpy(new_env, env, sizeof(CPUArchState));
 
     /* Clone all break/watchpoints.
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index cc89a48ff8..deb569babe 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -83,7 +83,7 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
 }
 
 /* Notice when we're in the middle of a gUSA region and reset.
-   Note that this will only occur for !parallel_cpus, as we will
+   Note that this will only occur for !CF_PARALLEL, as we will
    translate such sequences differently in a parallel context.  */
 static void unwind_gusa(CPUSH4State *regs)
 {
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 389ec09764..83a6e51de3 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6481,6 +6481,16 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         /* Grab a mutex so that thread setup appears atomic.  */
         pthread_mutex_lock(&clone_lock);
 
+        /*
+         * If this is our first additional thread, we need to ensure we
+         * generate code for parallel execution and flush old translations.
+         * Do this now so that the copy gets CF_PARALLEL too.
+         */
+        if (!(cpu->cflags_base & CF_PARALLEL)) {
+            cpu->cflags_base |= CF_PARALLEL;
+            tb_flush(cpu);
+        }
+
         /* we create a new CPU instance. */
         new_env = cpu_copy(env);
         /* Init regs that differ from the parent.  */
@@ -6521,14 +6531,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask);
         cpu->random_seed = qemu_guest_random_seed_thread_part1();
 
-        /* If this is our first additional thread, we need to ensure we
-         * generate code for parallel execution and flush old translations.
-         */
-        if (!parallel_cpus) {
-            parallel_cpus = true;
-            tb_flush(cpu);
-        }
-
         ret = pthread_create(&info.thread, &attr, clone_func, &info);
         /* TODO: Free new CPU state if thread creation failed.  */
 
-- 
2.25.1



             reply	other threads:[~2021-02-25  1:07 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-25  1:07 Richard Henderson [this message]
2021-02-25  9:51 ` [PATCH 1/1] accel/tcg: Replace parallel_cpus with cpu->cflags_base Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210225010708.288696-1-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.