All of lore.kernel.org
 help / color / mirror / Atom feed
* [PULL 0/9] tcg patch queue for 2022-06-21
@ 2022-06-21 20:46 Richard Henderson
  2022-06-21 20:46 ` [PULL 1/9] tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd] Richard Henderson
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel

The following changes since commit c8b2d413761af732a0798d8df45ce968732083fe:

  Merge tag 'bsd-user-syscall-2022q2-pull-request' of ssh://github.com/qemu-bsd-user/qemu-bsd-user into staging (2022-06-19 13:56:13 -0700)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220621

for you to fetch changes up to c79a8e840c435bc26a251e34b043318e8b2081db:

  util/cacheflush: Optimize flushing when ppc host has coherent icache (2022-06-21 09:28:41 -0700)

----------------------------------------------------------------
Speed empty timer list in qemu_clock_deadline_ns_all.
Implement remainder for Power3.1 hosts.
Optimize ppc host icache flushing.
Cleanups to tcg_accel_ops_init.
Fix mmio crash accessing unmapped physical memory.

----------------------------------------------------------------
Bin Meng (1):
      target/avr: Drop avr_cpu_memory_rw_debug()

Idan Horowitz (1):
      qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all

Matheus Kowalczuk Ferst (1):
      tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd]

Nicholas Piggin (1):
      util/cacheflush: Optimize flushing when ppc host has coherent icache

Philippe Mathieu-Daudé (2):
      accel/tcg: Init TCG cflags in vCPU thread handler
      accel/tcg: Reorganize tcg_accel_ops_init()

Richard Henderson (3):
      softmmu: Always initialize xlat in address_space_translate_for_iotlb
      util: Merge cacheflush.c and cacheinfo.c
      util/cacheflush: Merge aarch64 ctr_el0 usage

 target/avr/cpu.h                |   2 -
 tcg/ppc/tcg-target.h            |   4 +-
 accel/tcg/tcg-accel-ops-mttcg.c |   5 +-
 accel/tcg/tcg-accel-ops-rr.c    |   7 +-
 accel/tcg/tcg-accel-ops.c       |  15 +--
 softmmu/physmem.c               |  13 ++-
 target/avr/cpu.c                |   1 -
 target/avr/helper.c             |   6 -
 util/cacheflush.c               | 247 +++++++++++++++++++++++++++++++++++++---
 util/cacheinfo.c                | 200 --------------------------------
 util/qemu-timer.c               |   3 +
 tcg/ppc/tcg-target.c.inc        |  22 ++++
 util/meson.build                |   2 +-
 13 files changed, 284 insertions(+), 243 deletions(-)
 delete mode 100644 util/cacheinfo.c


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PULL 1/9] tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd]
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 20:46 ` [PULL 2/9] target/avr: Drop avr_cpu_memory_rw_debug() Richard Henderson
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Matheus Kowalczuk Ferst

From: Matheus Kowalczuk Ferst <matheus.ferst@eldorado.org.br>

Power ISA v3.0 introduced mod[su][wd] insns that can be used to
implement rem[u]_i{32,64}.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.h     |  4 ++--
 tcg/ppc/tcg-target.c.inc | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index e6cf72503f..b5cd225cfa 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -83,7 +83,7 @@ extern bool have_vsx;
 
 /* optional instructions */
 #define TCG_TARGET_HAS_div_i32          1
-#define TCG_TARGET_HAS_rem_i32          0
+#define TCG_TARGET_HAS_rem_i32          have_isa_3_00
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_ext8s_i32        1
 #define TCG_TARGET_HAS_ext16s_i32       1
@@ -117,7 +117,7 @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_extrl_i64_i32    0
 #define TCG_TARGET_HAS_extrh_i64_i32    0
 #define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_rem_i64          have_isa_3_00
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_ext8s_i64        1
 #define TCG_TARGET_HAS_ext16s_i64       1
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index de4483e43b..1cbd047ab3 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -371,6 +371,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 #define MULHWU XO31( 11)
 #define DIVW   XO31(491)
 #define DIVWU  XO31(459)
+#define MODSW  XO31(779)
+#define MODUW  XO31(267)
 #define CMP    XO31(  0)
 #define CMPL   XO31( 32)
 #define LHBRX  XO31(790)
@@ -403,6 +405,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 #define MULHDU XO31(  9)
 #define DIVD   XO31(489)
 #define DIVDU  XO31(457)
+#define MODSD  XO31(777)
+#define MODUD  XO31(265)
 
 #define LBZX   XO31( 87)
 #define LHZX   XO31(279)
@@ -2806,6 +2810,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
         break;
 
+    case INDEX_op_rem_i32:
+        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
+        break;
+
+    case INDEX_op_remu_i32:
+        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
+        break;
+
     case INDEX_op_shl_i32:
         if (const_args[2]) {
             /* Limit immediate shift count lest we create an illegal insn.  */
@@ -2947,6 +2959,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_divu_i64:
         tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
         break;
+    case INDEX_op_rem_i64:
+        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
+        break;
+    case INDEX_op_remu_i64:
+        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
+        break;
 
     case INDEX_op_qemu_ld_i32:
         tcg_out_qemu_ld(s, args, false);
@@ -3722,6 +3740,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
     case INDEX_op_div_i32:
     case INDEX_op_divu_i32:
+    case INDEX_op_rem_i32:
+    case INDEX_op_remu_i32:
     case INDEX_op_nand_i32:
     case INDEX_op_nor_i32:
     case INDEX_op_muluh_i32:
@@ -3732,6 +3752,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_nor_i64:
     case INDEX_op_div_i64:
     case INDEX_op_divu_i64:
+    case INDEX_op_rem_i64:
+    case INDEX_op_remu_i64:
     case INDEX_op_mulsh_i64:
     case INDEX_op_muluh_i64:
         return C_O1_I2(r, r, r);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 2/9] target/avr: Drop avr_cpu_memory_rw_debug()
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
  2022-06-21 20:46 ` [PULL 1/9] tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd] Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 20:46 ` [PULL 3/9] accel/tcg: Init TCG cflags in vCPU thread handler Richard Henderson
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Bin Meng, Philippe Mathieu-Daudé

From: Bin Meng <bmeng.cn@gmail.com>

CPUClass::memory_rw_debug() holds a callback for GDB memory access.
If not provided, cpu_memory_rw_debug() is used by the GDB stub.
Drop avr_cpu_memory_rw_debug() which does nothing special.

Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20220322095004.70682-1-bmeng.cn@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/avr/cpu.h    | 2 --
 target/avr/cpu.c    | 1 -
 target/avr/helper.c | 6 ------
 3 files changed, 9 deletions(-)

diff --git a/target/avr/cpu.h b/target/avr/cpu.h
index d304f33301..96419c0c2b 100644
--- a/target/avr/cpu.h
+++ b/target/avr/cpu.h
@@ -184,8 +184,6 @@ void avr_cpu_tcg_init(void);
 
 void avr_cpu_list(void);
 int cpu_avr_exec(CPUState *cpu);
-int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf,
-                            int len, bool is_write);
 
 enum {
     TB_FLAGS_FULL_ACCESS = 1,
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index 5d70e34dd5..05b992ff73 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -214,7 +214,6 @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = avr_cpu_has_work;
     cc->dump_state = avr_cpu_dump_state;
     cc->set_pc = avr_cpu_set_pc;
-    cc->memory_rw_debug = avr_cpu_memory_rw_debug;
     dc->vmsd = &vms_avr_cpu;
     cc->sysemu_ops = &avr_sysemu_ops;
     cc->disas_set_info = avr_cpu_disas_set_info;
diff --git a/target/avr/helper.c b/target/avr/helper.c
index c27f702901..db76452f9a 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -93,12 +93,6 @@ void avr_cpu_do_interrupt(CPUState *cs)
     cs->exception_index = -1;
 }
 
-int avr_cpu_memory_rw_debug(CPUState *cs, vaddr addr, uint8_t *buf,
-                            int len, bool is_write)
-{
-    return cpu_memory_rw_debug(cs, addr, buf, len, is_write);
-}
-
 hwaddr avr_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 {
     return addr; /* I assume 1:1 address correspondence */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 3/9] accel/tcg: Init TCG cflags in vCPU thread handler
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
  2022-06-21 20:46 ` [PULL 1/9] tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd] Richard Henderson
  2022-06-21 20:46 ` [PULL 2/9] target/avr: Drop avr_cpu_memory_rw_debug() Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-10-20 12:33   ` Peter Maydell
  2022-06-21 20:46 ` [PULL 4/9] accel/tcg: Reorganize tcg_accel_ops_init() Richard Henderson
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Philippe Mathieu-Daudé

From: Philippe Mathieu-Daudé <f4bug@amsat.org>

Move TCG cflags initialization to thread handler.
Remove the duplicated assert checks.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20220323171751.78612-6-philippe.mathieu.daude@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-accel-ops-mttcg.c | 5 ++---
 accel/tcg/tcg-accel-ops-rr.c    | 7 +++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index d50239e0e2..ba997f6cfe 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -70,6 +70,8 @@ static void *mttcg_cpu_thread_fn(void *arg)
     assert(tcg_enabled());
     g_assert(!icount_enabled());
 
+    tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
+
     rcu_register_thread();
     force_rcu.notifier.notify = mttcg_force_rcu;
     force_rcu.cpu = cpu;
@@ -139,9 +141,6 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
 
-    g_assert(tcg_enabled());
-    tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
-
     cpu->thread = g_new0(QemuThread, 1);
     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
     qemu_cond_init(cpu->halt_cond);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 1a72149f0e..cc8adc2380 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -152,7 +152,9 @@ static void *rr_cpu_thread_fn(void *arg)
     Notifier force_rcu;
     CPUState *cpu = arg;
 
-    assert(tcg_enabled());
+    g_assert(tcg_enabled());
+    tcg_cpu_init_cflags(cpu, false);
+
     rcu_register_thread();
     force_rcu.notify = rr_force_rcu;
     rcu_add_force_rcu_notifier(&force_rcu);
@@ -275,9 +277,6 @@ void rr_start_vcpu_thread(CPUState *cpu)
     static QemuCond *single_tcg_halt_cond;
     static QemuThread *single_tcg_cpu_thread;
 
-    g_assert(tcg_enabled());
-    tcg_cpu_init_cflags(cpu, false);
-
     if (!single_tcg_cpu_thread) {
         cpu->thread = g_new0(QemuThread, 1);
         cpu->halt_cond = g_new0(QemuCond, 1);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 4/9] accel/tcg: Reorganize tcg_accel_ops_init()
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
                   ` (2 preceding siblings ...)
  2022-06-21 20:46 ` [PULL 3/9] accel/tcg: Init TCG cflags in vCPU thread handler Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 20:46 ` [PULL 5/9] qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all Richard Henderson
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Philippe Mathieu-Daudé

From: Philippe Mathieu-Daudé <f4bug@amsat.org>

Reorg TCG AccelOpsClass initialization to emphasis icount
mode share more code with single-threaded TCG.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220323171751.78612-7-philippe.mathieu.daude@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tcg-accel-ops.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 684dc5a137..786d90c08f 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -97,16 +97,17 @@ static void tcg_accel_ops_init(AccelOpsClass *ops)
         ops->create_vcpu_thread = mttcg_start_vcpu_thread;
         ops->kick_vcpu_thread = mttcg_kick_vcpu_thread;
         ops->handle_interrupt = tcg_handle_interrupt;
-    } else if (icount_enabled()) {
-        ops->create_vcpu_thread = rr_start_vcpu_thread;
-        ops->kick_vcpu_thread = rr_kick_vcpu_thread;
-        ops->handle_interrupt = icount_handle_interrupt;
-        ops->get_virtual_clock = icount_get;
-        ops->get_elapsed_ticks = icount_get;
     } else {
         ops->create_vcpu_thread = rr_start_vcpu_thread;
         ops->kick_vcpu_thread = rr_kick_vcpu_thread;
-        ops->handle_interrupt = tcg_handle_interrupt;
+
+        if (icount_enabled()) {
+            ops->handle_interrupt = icount_handle_interrupt;
+            ops->get_virtual_clock = icount_get;
+            ops->get_elapsed_ticks = icount_get;
+        } else {
+            ops->handle_interrupt = tcg_handle_interrupt;
+        }
     }
 }
 
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 5/9] qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
                   ` (3 preceding siblings ...)
  2022-06-21 20:46 ` [PULL 4/9] accel/tcg: Reorganize tcg_accel_ops_init() Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 20:46 ` [PULL 6/9] softmmu: Always initialize xlat in address_space_translate_for_iotlb Richard Henderson
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Idan Horowitz

From: Idan Horowitz <idan.horowitz@gmail.com>

This decreases qemu_clock_deadline_ns_all's share from 23.2% to 13% in a
profile of icount-enabled aarch64-softmmu.

Signed-off-by: Idan Horowitz <idan.horowitz@gmail.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220114004358.299534-2-idan.horowitz@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 util/qemu-timer.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index a670a57881..6a0de33dd2 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -261,6 +261,9 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
     }
 
     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
+        if (!qatomic_read(&timer_list->active_timers)) {
+            continue;
+        }
         qemu_mutex_lock(&timer_list->active_timers_lock);
         ts = timer_list->active_timers;
         /* Skip all external timers */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 6/9] softmmu: Always initialize xlat in address_space_translate_for_iotlb
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
                   ` (4 preceding siblings ...)
  2022-06-21 20:46 ` [PULL 5/9] qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 20:46 ` [PULL 7/9] util: Merge cacheflush.c and cacheinfo.c Richard Henderson
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell

The bug is an uninitialized memory read, along the translate_fail
path, which results in garbage being read from iotlb_to_section,
which can lead to a crash in io_readx/io_writex.

The bug may be fixed by writing any value with zero
in ~TARGET_PAGE_MASK, so that the call to iotlb_to_section using
the xlat'ed address returns io_mem_unassigned, as desired by the
translate_fail path.

It is most useful to record the original physical page address,
which will eventually be logged by memory_region_access_valid
when the access is rejected by unassigned_mem_accepts.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1065
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20220621153829.366423-1-richard.henderson@linaro.org>
---
 softmmu/physmem.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index fb16be57a6..dc3c3e5f2e 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -669,7 +669,7 @@ void tcg_iommu_init_notifier_list(CPUState *cpu)
 
 /* Called from RCU critical section */
 MemoryRegionSection *
-address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
+address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr,
                                   hwaddr *xlat, hwaddr *plen,
                                   MemTxAttrs attrs, int *prot)
 {
@@ -678,6 +678,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
     IOMMUMemoryRegionClass *imrc;
     IOMMUTLBEntry iotlb;
     int iommu_idx;
+    hwaddr addr = orig_addr;
     AddressSpaceDispatch *d =
         qatomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 
@@ -722,6 +723,16 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
     return section;
 
 translate_fail:
+    /*
+     * We should be given a page-aligned address -- certainly
+     * tlb_set_page_with_attrs() does so.  The page offset of xlat
+     * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0.
+     * The page portion of xlat will be logged by memory_region_access_valid()
+     * when this memory access is rejected, so use the original untranslated
+     * physical address.
+     */
+    assert((orig_addr & ~TARGET_PAGE_MASK) == 0);
+    *xlat = orig_addr;
     return &d->map.sections[PHYS_SECTION_UNASSIGNED];
 }
 
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 7/9] util: Merge cacheflush.c and cacheinfo.c
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
                   ` (5 preceding siblings ...)
  2022-06-21 20:46 ` [PULL 6/9] softmmu: Always initialize xlat in address_space_translate_for_iotlb Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 20:46 ` [PULL 8/9] util/cacheflush: Merge aarch64 ctr_el0 usage Richard Henderson
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell

Combine the two files into cacheflush.c.  There's a couple of bits
that would be helpful to share between the two, and combining them
seems better than exporting the bits.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20220621014837.189139-2-richard.henderson@linaro.org>
---
 util/cacheflush.c | 202 +++++++++++++++++++++++++++++++++++++++++++++-
 util/cacheinfo.c  | 200 ---------------------------------------------
 util/meson.build  |   2 +-
 3 files changed, 202 insertions(+), 202 deletions(-)
 delete mode 100644 util/cacheinfo.c

diff --git a/util/cacheflush.c b/util/cacheflush.c
index 4b57186d89..8096afd33c 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c
@@ -1,5 +1,5 @@
 /*
- * Flush the host cpu caches.
+ * Info about, and flushing the host cpu caches.
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -9,8 +9,208 @@
 #include "qemu/cacheflush.h"
 #include "qemu/cacheinfo.h"
 #include "qemu/bitops.h"
+#include "qemu/host-utils.h"
+#include "qemu/atomic.h"
 
 
+int qemu_icache_linesize = 0;
+int qemu_icache_linesize_log;
+int qemu_dcache_linesize = 0;
+int qemu_dcache_linesize_log;
+
+/*
+ * Operating system specific cache detection mechanisms.
+ */
+
+#if defined(_WIN32)
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+    SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
+    DWORD size = 0;
+    BOOL success;
+    size_t i, n;
+
+    /*
+     * Check for the required buffer size first.  Note that if the zero
+     * size we use for the probe results in success, then there is no
+     * data available; fail in that case.
+     */
+    success = GetLogicalProcessorInformation(0, &size);
+    if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+        return;
+    }
+
+    n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n);
+    if (!GetLogicalProcessorInformation(buf, &size)) {
+        goto fail;
+    }
+
+    for (i = 0; i < n; i++) {
+        if (buf[i].Relationship == RelationCache
+            && buf[i].Cache.Level == 1) {
+            switch (buf[i].Cache.Type) {
+            case CacheUnified:
+                *isize = *dsize = buf[i].Cache.LineSize;
+                break;
+            case CacheInstruction:
+                *isize = buf[i].Cache.LineSize;
+                break;
+            case CacheData:
+                *dsize = buf[i].Cache.LineSize;
+                break;
+            default:
+                break;
+            }
+        }
+    }
+ fail:
+    g_free(buf);
+}
+
+#elif defined(__APPLE__)
+# include <sys/sysctl.h>
+static void sys_cache_info(int *isize, int *dsize)
+{
+    /* There's only a single sysctl for both I/D cache line sizes.  */
+    long size;
+    size_t len = sizeof(size);
+    if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) {
+        *isize = *dsize = size;
+    }
+}
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+# include <sys/sysctl.h>
+static void sys_cache_info(int *isize, int *dsize)
+{
+    /* There's only a single sysctl for both I/D cache line sizes.  */
+    int size;
+    size_t len = sizeof(size);
+    if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) {
+        *isize = *dsize = size;
+    }
+}
+#else
+/* POSIX */
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+# ifdef _SC_LEVEL1_ICACHE_LINESIZE
+    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+    if (tmp_isize > 0) {
+        *isize = tmp_isize;
+    }
+# endif
+# ifdef _SC_LEVEL1_DCACHE_LINESIZE
+    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    if (tmp_dsize > 0) {
+        *dsize = tmp_dsize;
+    }
+# endif
+}
+#endif /* sys_cache_info */
+
+
+/*
+ * Architecture (+ OS) specific cache detection mechanisms.
+ */
+
+#if defined(__aarch64__)
+
+static void arch_cache_info(int *isize, int *dsize)
+{
+    if (*isize == 0 || *dsize == 0) {
+        uint64_t ctr;
+
+        /*
+         * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
+         * but (at least under Linux) these are marked protected by the
+         * kernel.  However, CTR_EL0 contains the minimum linesize in the
+         * entire hierarchy, and is used by userspace cache flushing.
+         */
+        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
+        if (*isize == 0) {
+            *isize = 4 << (ctr & 0xf);
+        }
+        if (*dsize == 0) {
+            *dsize = 4 << ((ctr >> 16) & 0xf);
+        }
+    }
+}
+
+#elif defined(_ARCH_PPC) && defined(__linux__)
+# include "elf.h"
+
+static void arch_cache_info(int *isize, int *dsize)
+{
+    if (*isize == 0) {
+        *isize = qemu_getauxval(AT_ICACHEBSIZE);
+    }
+    if (*dsize == 0) {
+        *dsize = qemu_getauxval(AT_DCACHEBSIZE);
+    }
+}
+
+#else
+static void arch_cache_info(int *isize, int *dsize) { }
+#endif /* arch_cache_info */
+
+/*
+ * ... and if all else fails ...
+ */
+
+static void fallback_cache_info(int *isize, int *dsize)
+{
+    /* If we can only find one of the two, assume they're the same.  */
+    if (*isize) {
+        if (*dsize) {
+            /* Success! */
+        } else {
+            *dsize = *isize;
+        }
+    } else if (*dsize) {
+        *isize = *dsize;
+    } else {
+#if defined(_ARCH_PPC)
+        /*
+         * For PPC, we're going to use the cache sizes computed for
+         * flush_idcache_range.  Which means that we must use the
+         * architecture minimum.
+         */
+        *isize = *dsize = 16;
+#else
+        /* Otherwise, 64 bytes is not uncommon.  */
+        *isize = *dsize = 64;
+#endif
+    }
+}
+
+static void __attribute__((constructor)) init_cache_info(void)
+{
+    int isize = 0, dsize = 0;
+
+    sys_cache_info(&isize, &dsize);
+    arch_cache_info(&isize, &dsize);
+    fallback_cache_info(&isize, &dsize);
+
+    assert((isize & (isize - 1)) == 0);
+    assert((dsize & (dsize - 1)) == 0);
+
+    qemu_icache_linesize = isize;
+    qemu_icache_linesize_log = ctz32(isize);
+    qemu_dcache_linesize = dsize;
+    qemu_dcache_linesize_log = ctz32(dsize);
+
+    qatomic64_init();
+}
+
+
+/*
+ * Architecture (+ OS) specific cache flushing mechanisms.
+ */
+
 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
 
 /* Caches are coherent and do not require flushing; symbol inline. */
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
deleted file mode 100644
index ab1644d490..0000000000
--- a/util/cacheinfo.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * cacheinfo.c - helpers to query the host about its caches
- *
- * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
- * License: GNU GPL, version 2 or later.
- *   See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/host-utils.h"
-#include "qemu/atomic.h"
-#include "qemu/cacheinfo.h"
-
-int qemu_icache_linesize = 0;
-int qemu_icache_linesize_log;
-int qemu_dcache_linesize = 0;
-int qemu_dcache_linesize_log;
-
-/*
- * Operating system specific detection mechanisms.
- */
-
-#if defined(_WIN32)
-
-static void sys_cache_info(int *isize, int *dsize)
-{
-    SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
-    DWORD size = 0;
-    BOOL success;
-    size_t i, n;
-
-    /* Check for the required buffer size first.  Note that if the zero
-       size we use for the probe results in success, then there is no
-       data available; fail in that case.  */
-    success = GetLogicalProcessorInformation(0, &size);
-    if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
-        return;
-    }
-
-    n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
-    size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
-    buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n);
-    if (!GetLogicalProcessorInformation(buf, &size)) {
-        goto fail;
-    }
-
-    for (i = 0; i < n; i++) {
-        if (buf[i].Relationship == RelationCache
-            && buf[i].Cache.Level == 1) {
-            switch (buf[i].Cache.Type) {
-            case CacheUnified:
-                *isize = *dsize = buf[i].Cache.LineSize;
-                break;
-            case CacheInstruction:
-                *isize = buf[i].Cache.LineSize;
-                break;
-            case CacheData:
-                *dsize = buf[i].Cache.LineSize;
-                break;
-            default:
-                break;
-            }
-        }
-    }
- fail:
-    g_free(buf);
-}
-
-#elif defined(__APPLE__)
-# include <sys/sysctl.h>
-static void sys_cache_info(int *isize, int *dsize)
-{
-    /* There's only a single sysctl for both I/D cache line sizes.  */
-    long size;
-    size_t len = sizeof(size);
-    if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) {
-        *isize = *dsize = size;
-    }
-}
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-# include <sys/sysctl.h>
-static void sys_cache_info(int *isize, int *dsize)
-{
-    /* There's only a single sysctl for both I/D cache line sizes.  */
-    int size;
-    size_t len = sizeof(size);
-    if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) {
-        *isize = *dsize = size;
-    }
-}
-#else
-/* POSIX */
-
-static void sys_cache_info(int *isize, int *dsize)
-{
-# ifdef _SC_LEVEL1_ICACHE_LINESIZE
-    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
-    if (tmp_isize > 0) {
-        *isize = tmp_isize;
-    }
-# endif
-# ifdef _SC_LEVEL1_DCACHE_LINESIZE
-    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
-    if (tmp_dsize > 0) {
-        *dsize = tmp_dsize;
-    }
-# endif
-}
-#endif /* sys_cache_info */
-
-/*
- * Architecture (+ OS) specific detection mechanisms.
- */
-
-#if defined(__aarch64__)
-
-static void arch_cache_info(int *isize, int *dsize)
-{
-    if (*isize == 0 || *dsize == 0) {
-        uint64_t ctr;
-
-        /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
-           but (at least under Linux) these are marked protected by the
-           kernel.  However, CTR_EL0 contains the minimum linesize in the
-           entire hierarchy, and is used by userspace cache flushing.  */
-        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
-        if (*isize == 0) {
-            *isize = 4 << (ctr & 0xf);
-        }
-        if (*dsize == 0) {
-            *dsize = 4 << ((ctr >> 16) & 0xf);
-        }
-    }
-}
-
-#elif defined(_ARCH_PPC) && defined(__linux__)
-# include "elf.h"
-
-static void arch_cache_info(int *isize, int *dsize)
-{
-    if (*isize == 0) {
-        *isize = qemu_getauxval(AT_ICACHEBSIZE);
-    }
-    if (*dsize == 0) {
-        *dsize = qemu_getauxval(AT_DCACHEBSIZE);
-    }
-}
-
-#else
-static void arch_cache_info(int *isize, int *dsize) { }
-#endif /* arch_cache_info */
-
-/*
- * ... and if all else fails ...
- */
-
-static void fallback_cache_info(int *isize, int *dsize)
-{
-    /* If we can only find one of the two, assume they're the same.  */
-    if (*isize) {
-        if (*dsize) {
-            /* Success! */
-        } else {
-            *dsize = *isize;
-        }
-    } else if (*dsize) {
-        *isize = *dsize;
-    } else {
-#if defined(_ARCH_PPC)
-        /*
-         * For PPC, we're going to use the cache sizes computed for
-         * flush_idcache_range.  Which means that we must use the
-         * architecture minimum.
-         */
-        *isize = *dsize = 16;
-#else
-        /* Otherwise, 64 bytes is not uncommon.  */
-        *isize = *dsize = 64;
-#endif
-    }
-}
-
-static void __attribute__((constructor)) init_cache_info(void)
-{
-    int isize = 0, dsize = 0;
-
-    sys_cache_info(&isize, &dsize);
-    arch_cache_info(&isize, &dsize);
-    fallback_cache_info(&isize, &dsize);
-
-    assert((isize & (isize - 1)) == 0);
-    assert((dsize & (dsize - 1)) == 0);
-
-    qemu_icache_linesize = isize;
-    qemu_icache_linesize_log = ctz32(isize);
-    qemu_dcache_linesize = dsize;
-    qemu_dcache_linesize_log = ctz32(dsize);
-
-    qatomic64_init();
-}
diff --git a/util/meson.build b/util/meson.build
index 8f16018cd4..4939b0b91c 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -27,7 +27,7 @@ util_ss.add(files('envlist.c', 'path.c', 'module.c'))
 util_ss.add(files('host-utils.c'))
 util_ss.add(files('bitmap.c', 'bitops.c'))
 util_ss.add(files('fifo8.c'))
-util_ss.add(files('cacheinfo.c', 'cacheflush.c'))
+util_ss.add(files('cacheflush.c'))
 util_ss.add(files('error.c', 'error-report.c'))
 util_ss.add(files('qemu-print.c'))
 util_ss.add(files('id.c'))
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 8/9] util/cacheflush: Merge aarch64 ctr_el0 usage
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
                   ` (6 preceding siblings ...)
  2022-06-21 20:46 ` [PULL 7/9] util: Merge cacheflush.c and cacheinfo.c Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 20:46 ` [PULL 9/9] util/cacheflush: Optimize flushing when ppc host has coherent icache Richard Henderson
  2022-06-21 23:33 ` [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell

Merge init_ctr_el0 into arch_cache_info.  In flush_idcache_range,
use the pre-computed line sizes from the global variables.
Use CONFIG_DARWIN in preference to __APPLE__.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20220621014837.189139-3-richard.henderson@linaro.org>
---
 util/cacheflush.c | 44 +++++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/util/cacheflush.c b/util/cacheflush.c
index 8096afd33c..01b6cb7583 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c
@@ -70,7 +70,7 @@ static void sys_cache_info(int *isize, int *dsize)
     g_free(buf);
 }
 
-#elif defined(__APPLE__)
+#elif defined(CONFIG_DARWIN)
 # include <sys/sysctl.h>
 static void sys_cache_info(int *isize, int *dsize)
 {
@@ -117,20 +117,25 @@ static void sys_cache_info(int *isize, int *dsize)
  * Architecture (+ OS) specific cache detection mechanisms.
  */
 
-#if defined(__aarch64__)
-
+#if defined(__aarch64__) && !defined(CONFIG_DARWIN)
+/* Apple does not expose CTR_EL0, so we must use system interfaces. */
+static uint64_t save_ctr_el0;
 static void arch_cache_info(int *isize, int *dsize)
 {
-    if (*isize == 0 || *dsize == 0) {
-        uint64_t ctr;
+    uint64_t ctr;
 
-        /*
-         * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
-         * but (at least under Linux) these are marked protected by the
-         * kernel.  However, CTR_EL0 contains the minimum linesize in the
-         * entire hierarchy, and is used by userspace cache flushing.
-         */
-        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
+    /*
+     * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
+     * but (at least under Linux) these are marked protected by the
+     * kernel.  However, CTR_EL0 contains the minimum linesize in the
+     * entire hierarchy, and is used by userspace cache flushing.
+     *
+     * We will also use this value in flush_idcache_range.
+     */
+    asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
+    save_ctr_el0 = ctr;
+
+    if (*isize == 0 || *dsize == 0) {
         if (*isize == 0) {
             *isize = 4 << (ctr & 0xf);
         }
@@ -228,17 +233,6 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 }
 #else
 
-/*
- * TODO: unify this with cacheinfo.c.
- * We want to save the whole contents of CTR_EL0, so that we
- * have more than the linesize, but also IDC and DIC.
- */
-static uint64_t save_ctr_el0;
-static void __attribute__((constructor)) init_ctr_el0(void)
-{
-    asm volatile("mrs\t%0, ctr_el0" : "=r"(save_ctr_el0));
-}
-
 /*
  * This is a copy of gcc's __aarch64_sync_cache_range, modified
  * to fit this three-operand interface.
@@ -248,8 +242,8 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
     const unsigned CTR_IDC = 1u << 28;
     const unsigned CTR_DIC = 1u << 29;
     const uint64_t ctr_el0 = save_ctr_el0;
-    const uintptr_t icache_lsize = 4 << extract64(ctr_el0, 0, 4);
-    const uintptr_t dcache_lsize = 4 << extract64(ctr_el0, 16, 4);
+    const uintptr_t icache_lsize = qemu_icache_linesize;
+    const uintptr_t dcache_lsize = qemu_dcache_linesize;
     uintptr_t p;
 
     /*
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PULL 9/9] util/cacheflush: Optimize flushing when ppc host has coherent icache
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
                   ` (7 preceding siblings ...)
  2022-06-21 20:46 ` [PULL 8/9] util/cacheflush: Merge aarch64 ctr_el0 usage Richard Henderson
@ 2022-06-21 20:46 ` Richard Henderson
  2022-06-21 23:33 ` [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 20:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: Nicholas Piggin, Peter Maydell

From: Nicholas Piggin <npiggin@gmail.com>

On linux, the AT_HWCAP bit PPC_FEATURE_ICACHE_SNOOP indicates
that we can use a simplified 3 instruction flush sequence.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Message-Id: <20220519141131.29839-1-npiggin@gmail.com>
[rth: update after merging cacheflush.c and cacheinfo.c]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20220621014837.189139-4-richard.henderson@linaro.org>
---
 util/cacheflush.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/util/cacheflush.c b/util/cacheflush.c
index 01b6cb7583..2c2c73e085 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c
@@ -117,6 +117,10 @@ static void sys_cache_info(int *isize, int *dsize)
  * Architecture (+ OS) specific cache detection mechanisms.
  */
 
+#if defined(__powerpc__)
+static bool have_coherent_icache;
+#endif
+
 #if defined(__aarch64__) && !defined(CONFIG_DARWIN)
 /* Apple does not expose CTR_EL0, so we must use system interfaces. */
 static uint64_t save_ctr_el0;
@@ -156,6 +160,7 @@ static void arch_cache_info(int *isize, int *dsize)
     if (*dsize == 0) {
         *dsize = qemu_getauxval(AT_DCACHEBSIZE);
     }
+    have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP;
 }
 
 #else
@@ -298,8 +303,24 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
     uintptr_t p, b, e;
-    size_t dsize = qemu_dcache_linesize;
-    size_t isize = qemu_icache_linesize;
+    size_t dsize, isize;
+
+    /*
+     * Some processors have coherent caches and support a simplified
+     * flushing procedure.  See
+     *   POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) 
+     *   https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k
+     */
+    if (have_coherent_icache) {
+        asm volatile ("sync\n\t"
+                      "icbi 0,%0\n\t"
+                      "isync"
+                      : : "r"(rx) : "memory");
+        return;
+    }
+
+    dsize = qemu_dcache_linesize;
+    isize = qemu_icache_linesize;
 
     b = rw & ~(dsize - 1);
     e = (rw + len + dsize - 1) & ~(dsize - 1);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PULL 0/9] tcg patch queue for 2022-06-21
  2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
                   ` (8 preceding siblings ...)
  2022-06-21 20:46 ` [PULL 9/9] util/cacheflush: Optimize flushing when ppc host has coherent icache Richard Henderson
@ 2022-06-21 23:33 ` Richard Henderson
  9 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2022-06-21 23:33 UTC (permalink / raw)
  To: qemu-devel

On 6/21/22 13:46, Richard Henderson wrote:
> The following changes since commit c8b2d413761af732a0798d8df45ce968732083fe:
> 
>    Merge tag 'bsd-user-syscall-2022q2-pull-request' of ssh://github.com/qemu-bsd-user/qemu-bsd-user into staging (2022-06-19 13:56:13 -0700)
> 
> are available in the Git repository at:
> 
>    https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220621
> 
> for you to fetch changes up to c79a8e840c435bc26a251e34b043318e8b2081db:
> 
>    util/cacheflush: Optimize flushing when ppc host has coherent icache (2022-06-21 09:28:41 -0700)
> 
> ----------------------------------------------------------------
> Speed empty timer list in qemu_clock_deadline_ns_all.
> Implement remainder for Power3.1 hosts.
> Optimize ppc host icache flushing.
> Cleanups to tcg_accel_ops_init.
> Fix mmio crash accessing unmapped physical memory.

Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as appropriate.


r~


> 
> ----------------------------------------------------------------
> Bin Meng (1):
>        target/avr: Drop avr_cpu_memory_rw_debug()
> 
> Idan Horowitz (1):
>        qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all
> 
> Matheus Kowalczuk Ferst (1):
>        tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd]
> 
> Nicholas Piggin (1):
>        util/cacheflush: Optimize flushing when ppc host has coherent icache
> 
> Philippe Mathieu-Daudé (2):
>        accel/tcg: Init TCG cflags in vCPU thread handler
>        accel/tcg: Reorganize tcg_accel_ops_init()
> 
> Richard Henderson (3):
>        softmmu: Always initialize xlat in address_space_translate_for_iotlb
>        util: Merge cacheflush.c and cacheinfo.c
>        util/cacheflush: Merge aarch64 ctr_el0 usage
> 
>   target/avr/cpu.h                |   2 -
>   tcg/ppc/tcg-target.h            |   4 +-
>   accel/tcg/tcg-accel-ops-mttcg.c |   5 +-
>   accel/tcg/tcg-accel-ops-rr.c    |   7 +-
>   accel/tcg/tcg-accel-ops.c       |  15 +--
>   softmmu/physmem.c               |  13 ++-
>   target/avr/cpu.c                |   1 -
>   target/avr/helper.c             |   6 -
>   util/cacheflush.c               | 247 +++++++++++++++++++++++++++++++++++++---
>   util/cacheinfo.c                | 200 --------------------------------
>   util/qemu-timer.c               |   3 +
>   tcg/ppc/tcg-target.c.inc        |  22 ++++
>   util/meson.build                |   2 +-
>   13 files changed, 284 insertions(+), 243 deletions(-)
>   delete mode 100644 util/cacheinfo.c



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PULL 3/9] accel/tcg: Init TCG cflags in vCPU thread handler
  2022-06-21 20:46 ` [PULL 3/9] accel/tcg: Init TCG cflags in vCPU thread handler Richard Henderson
@ 2022-10-20 12:33   ` Peter Maydell
  0 siblings, 0 replies; 12+ messages in thread
From: Peter Maydell @ 2022-10-20 12:33 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Philippe Mathieu-Daudé

On Tue, 21 Jun 2022 at 21:53, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> From: Philippe Mathieu-Daudé <f4bug@amsat.org>
>
> Move TCG cflags initialization to thread handler.
> Remove the duplicated assert checks.
>
> Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
> Message-Id: <20220323171751.78612-6-philippe.mathieu.daude@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Hi; I've just noticed that this commit seems to break icount
mode when there's more than one vCPU. Specifically:

> diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> index 1a72149f0e..cc8adc2380 100644
> --- a/accel/tcg/tcg-accel-ops-rr.c
> +++ b/accel/tcg/tcg-accel-ops-rr.c
> @@ -152,7 +152,9 @@ static void *rr_cpu_thread_fn(void *arg)
>      Notifier force_rcu;
>      CPUState *cpu = arg;
>
> -    assert(tcg_enabled());
> +    g_assert(tcg_enabled());
> +    tcg_cpu_init_cflags(cpu, false);
> +

In icount mode we round-robin on the same CPU thread, so
the rr_cpu_thread_fn() gets called only once, and we set
the TCG cflags on the first CPU here, but not on any others.

>      rcu_register_thread();
>      force_rcu.notify = rr_force_rcu;
>      rcu_add_force_rcu_notifier(&force_rcu);
> @@ -275,9 +277,6 @@ void rr_start_vcpu_thread(CPUState *cpu)
>      static QemuCond *single_tcg_halt_cond;
>      static QemuThread *single_tcg_cpu_thread;
>
> -    g_assert(tcg_enabled());
> -    tcg_cpu_init_cflags(cpu, false);
> -

This code gets called for each vCPU, whether we are going to
give it its own thread or not, so when we did this check in
the old location we would call tcg_cpu_init_cflags() on every vCPU.

>      if (!single_tcg_cpu_thread) {
>          cpu->thread = g_new0(QemuThread, 1);
>          cpu->halt_cond = g_new0(QemuCond, 1);

So now only one vCPU gets the CF_USE_ICOUNT thread set,
and the guest kernel hangs shortly after it brings up the
secondary CPU.

Reverting commit a82fd5a4ec24d923ff1e fixes the problem.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-10-20 14:14 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-21 20:46 [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson
2022-06-21 20:46 ` [PULL 1/9] tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd] Richard Henderson
2022-06-21 20:46 ` [PULL 2/9] target/avr: Drop avr_cpu_memory_rw_debug() Richard Henderson
2022-06-21 20:46 ` [PULL 3/9] accel/tcg: Init TCG cflags in vCPU thread handler Richard Henderson
2022-10-20 12:33   ` Peter Maydell
2022-06-21 20:46 ` [PULL 4/9] accel/tcg: Reorganize tcg_accel_ops_init() Richard Henderson
2022-06-21 20:46 ` [PULL 5/9] qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all Richard Henderson
2022-06-21 20:46 ` [PULL 6/9] softmmu: Always initialize xlat in address_space_translate_for_iotlb Richard Henderson
2022-06-21 20:46 ` [PULL 7/9] util: Merge cacheflush.c and cacheinfo.c Richard Henderson
2022-06-21 20:46 ` [PULL 8/9] util/cacheflush: Merge aarch64 ctr_el0 usage Richard Henderson
2022-06-21 20:46 ` [PULL 9/9] util/cacheflush: Optimize flushing when ppc host has coherent icache Richard Henderson
2022-06-21 23:33 ` [PULL 0/9] tcg patch queue for 2022-06-21 Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.