All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: alex.bennee@linaro.org
Subject: [PATCH 27/27] accel/tcg: Precompute curr_cflags into cpu->tcg_cflags
Date: Tue,  2 Mar 2021 09:57:41 -0800	[thread overview]
Message-ID: <20210302175741.1079851-28-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210302175741.1079851-1-richard.henderson@linaro.org>

The primary motivation is to remove a dozen insns along
the fast-path in tb_lookup.  As a byproduct, this allows
us to completely remove parallel_cpus.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-accel-ops.h       |  1 +
 include/exec/exec-all.h         |  7 +------
 include/hw/core/cpu.h           |  2 ++
 accel/tcg/cpu-exec.c            |  3 ---
 accel/tcg/tcg-accel-ops-mttcg.c |  3 +--
 accel/tcg/tcg-accel-ops-rr.c    |  2 +-
 accel/tcg/tcg-accel-ops.c       |  8 ++++++++
 accel/tcg/translate-all.c       |  4 ----
 linux-user/main.c               |  1 +
 linux-user/sh4/signal.c         |  8 +++++---
 linux-user/syscall.c            | 18 ++++++++++--------
 11 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h
index 48130006de..6a5fcef889 100644
--- a/accel/tcg/tcg-accel-ops.h
+++ b/accel/tcg/tcg-accel-ops.h
@@ -17,5 +17,6 @@
 void tcg_cpus_destroy(CPUState *cpu);
 int tcg_cpus_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
+void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
 
 #endif /* TCG_CPUS_H */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 75f8c3981a..310f474540 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -510,8 +510,6 @@ struct TranslationBlock {
     uintptr_t jmp_dest[2];
 };
 
-extern bool parallel_cpus;
-
 /* Hide the qatomic_read to make code a little easier on the eyes */
 static inline uint32_t tb_cflags(const TranslationBlock *tb)
 {
@@ -521,10 +519,7 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb)
 /* current cflags for hashing/comparison */
 static inline uint32_t curr_cflags(CPUState *cpu)
 {
-    uint32_t cflags = deposit32(0, CF_CLUSTER_SHIFT, 8, cpu->cluster_index);
-    cflags |= parallel_cpus ? CF_PARALLEL : 0;
-    cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
-    return cflags;
+    return cpu->tcg_cflags;
 }
 
 /* TranslationBlock invalidate API */
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index c005d3dc2d..c68bc3ba8a 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -282,6 +282,7 @@ struct qemu_work_item;
  *   to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
  *   be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER
  *   QOM parent.
+ * @tcg_cflags: Pre-computed cflags for this cpu.
  * @nr_cores: Number of cores within this CPU package.
  * @nr_threads: Number of threads within this CPU.
  * @running: #true if CPU is currently running (lockless).
@@ -412,6 +413,7 @@ struct CPUState {
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index;
     int cluster_index;
+    uint32_t tcg_cflags;
     uint32_t halted;
     uint32_t can_do_io;
     int32_t exception_index;
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 931da96c2b..bdfa036ac8 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -267,8 +267,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
             mmap_unlock();
         }
 
-        /* Since we got here, we know that parallel_cpus must be true.  */
-        parallel_cpus = false;
         cpu_exec_enter(cpu);
         /* execute the generated code */
         trace_exec_tb(tb, pc);
@@ -296,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
      * the execution.
      */
     g_assert(cpu_in_exclusive_context(cpu));
-    parallel_cpus = true;
     cpu->running = false;
     end_exclusive();
 }
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 42973fb062..847d2079d2 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
     char thread_name[VCPU_THREAD_NAME_SIZE];
 
     g_assert(tcg_enabled());
-
-    parallel_cpus = (current_machine->smp.max_cpus > 1);
+    tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
 
     cpu->thread = g_malloc0(sizeof(QemuThread));
     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 4a66055e0d..018b54c508 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu)
     static QemuThread *single_tcg_cpu_thread;
 
     g_assert(tcg_enabled());
-    parallel_cpus = false;
+    tcg_cpu_init_cflags(cpu, false);
 
     if (!single_tcg_cpu_thread) {
         cpu->thread = g_malloc0(sizeof(QemuThread));
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 6144d9df87..6cdcaa2855 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -41,6 +41,14 @@
 
 /* common functionality among all TCG variants */
 
+void tcg_cpu_init_cflags(CPUState *cpu, bool parallel)
+{
+    uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT;
+    cflags |= parallel ? CF_PARALLEL : 0;
+    cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
+    cpu->tcg_cflags = cflags;
+}
+
 void tcg_cpus_destroy(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 0b0bfd35ab..f32df8b240 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE];
 TCGContext tcg_init_ctx;
 __thread TCGContext *tcg_ctx;
 TBContext tb_ctx;
-bool parallel_cpus;
 
 static void page_table_config_init(void)
 {
@@ -1867,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
         cflags = (cflags & ~CF_COUNT_MASK) | 1;
     }
 
-    cflags &= ~CF_CLUSTER_MASK;
-    cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;
-
     max_insns = cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
diff --git a/linux-user/main.c b/linux-user/main.c
index 81f48ff54e..4f4746dce8 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -205,6 +205,7 @@ CPUArchState *cpu_copy(CPUArchState *env)
     /* Reset non arch specific state */
     cpu_reset(new_cpu);
 
+    new_cpu->tcg_cflags = cpu->tcg_cflags;
     memcpy(new_env, env, sizeof(CPUArchState));
 
     /* Clone all break/watchpoints.
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index cc89a48ff8..29c1ee30e6 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -82,9 +82,11 @@ static abi_ulong get_sigframe(struct target_sigaction *ka,
     return (sp - frame_size) & -8ul;
 }
 
-/* Notice when we're in the middle of a gUSA region and reset.
-   Note that this will only occur for !parallel_cpus, as we will
-   translate such sequences differently in a parallel context.  */
+/*
+ * Notice when we're in the middle of a gUSA region and reset.
+ * Note that this will only occur when #CF_PARALLEL is unset, as we
+ * will translate such sequences differently in a parallel context.
+ */
 static void unwind_gusa(CPUSH4State *regs)
 {
     /* If the stack pointer is sufficiently negative, and we haven't
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 389ec09764..9522f603aa 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6481,6 +6481,16 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         /* Grab a mutex so that thread setup appears atomic.  */
         pthread_mutex_lock(&clone_lock);
 
+        /*
+         * If this is our first additional thread, we need to ensure we
+         * generate code for parallel execution and flush old translations.
+         * Do this now so that the copy gets CF_PARALLEL too.
+         */
+        if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+            cpu->tcg_cflags |= CF_PARALLEL;
+            tb_flush(cpu);
+        }
+
         /* we create a new CPU instance. */
         new_env = cpu_copy(env);
         /* Init regs that differ from the parent.  */
@@ -6521,14 +6531,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask);
         cpu->random_seed = qemu_guest_random_seed_thread_part1();
 
-        /* If this is our first additional thread, we need to ensure we
-         * generate code for parallel execution and flush old translations.
-         */
-        if (!parallel_cpus) {
-            parallel_cpus = true;
-            tb_flush(cpu);
-        }
-
         ret = pthread_create(&info.thread, &attr, clone_func, &info);
         /* TODO: Free new CPU state if thread creation failed.  */
 
-- 
2.25.1



  parent reply	other threads:[~2021-03-02 18:15 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-02 17:57 [PATCH 00/27] tcg patch queue Richard Henderson
2021-03-02 17:57 ` [PATCH 01/27] tcg/aarch64: Fix constant subtraction in tcg_out_addsub2 Richard Henderson
2021-03-02 17:57 ` [PATCH 02/27] tcg/aarch64: Fix I3617_CMLE0 Richard Henderson
2021-03-05 14:17   ` Peter Maydell
2021-03-02 17:57 ` [PATCH 03/27] tcg/aarch64: Fix generation of "scalar" vector operations Richard Henderson
2021-03-05 14:35   ` Peter Maydell
2021-03-05 15:21     ` Richard Henderson
2021-03-02 17:57 ` [PATCH 04/27] tcg/tci: Use exec/cpu_ldst.h interfaces Richard Henderson
2021-03-05 17:32   ` Philippe Mathieu-Daudé
2021-03-05 17:45   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 05/27] tcg: Split out tcg_raise_tb_overflow Richard Henderson
2021-03-05 17:49   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 06/27] tcg: Manage splitwx in tc_ptr_to_region_tree by hand Richard Henderson
2021-03-05 17:50   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 07/27] tcg/tci: Merge identical cases in generation (arithmetic opcodes) Richard Henderson
2021-03-02 17:57 ` [PATCH 08/27] tcg/tci: Merge identical cases in generation (exchange opcodes) Richard Henderson
2021-03-02 17:57 ` [PATCH 09/27] tcg/tci: Merge identical cases in generation (deposit opcode) Richard Henderson
2021-03-02 17:57 ` [PATCH 10/27] tcg/tci: Merge identical cases in generation (conditional opcodes) Richard Henderson
2021-03-02 17:57 ` [PATCH 11/27] tcg/tci: Merge identical cases in generation (load/store opcodes) Richard Henderson
2021-03-02 17:57 ` [PATCH 12/27] tcg/tci: Remove tci_read_r8 Richard Henderson
2021-03-05 17:50   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 13/27] tcg/tci: Remove tci_read_r8s Richard Henderson
2021-03-05 17:50   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 14/27] tcg/tci: Remove tci_read_r16 Richard Henderson
2021-03-05 17:51   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 15/27] tcg/tci: Remove tci_read_r16s Richard Henderson
2021-03-05 17:51   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 16/27] tcg/tci: Remove tci_read_r32 Richard Henderson
2021-03-05 17:51   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 17/27] tcg/tci: Remove tci_read_r32s Richard Henderson
2021-03-05 17:51   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 18/27] tcg/tci: Reduce use of tci_read_r64 Richard Henderson
2021-03-05 17:53   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 19/27] tcg/tci: Merge basic arithmetic operations Richard Henderson
2021-03-05 17:54   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 20/27] tcg/tci: Merge extension operations Richard Henderson
2021-03-05 17:56   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 21/27] tcg/tci: Merge bswap operations Richard Henderson
2021-03-05 17:57   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 22/27] tcg/tci: Merge mov, not and neg operations Richard Henderson
2021-03-05 17:58   ` Alex Bennée
2021-03-02 17:57 ` [PATCH 23/27] accel/tcg: rename tb_lookup__cpu_state and hoist state extraction Richard Henderson
2021-03-02 17:57 ` [PATCH 24/27] accel/tcg: move CF_CLUSTER calculation to curr_cflags Richard Henderson
2021-03-02 17:57 ` [PATCH 25/27] accel/tcg: drop the use of CF_HASH_MASK and rename params Richard Henderson
2021-03-02 17:57 ` [PATCH 26/27] include/exec: lightly re-arrange TranslationBlock Richard Henderson
2021-03-02 17:57 ` Richard Henderson [this message]
2021-03-05 17:12   ` [PATCH 27/27] accel/tcg: Precompute curr_cflags into cpu->tcg_cflags Alex Bennée
2021-03-02 18:33 ` [PATCH 00/27] tcg patch queue no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210302175741.1079851-28-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.