All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: j@getutm.app
Subject: [PATCH v3 01/41] tcg: Enhance flush_icache_range with separate data pointer
Date: Thu,  5 Nov 2020 19:28:41 -0800	[thread overview]
Message-ID: <20201106032921.600200-2-richard.henderson@linaro.org> (raw)
In-Reply-To: <20201106032921.600200-1-richard.henderson@linaro.org>

We are shortly going to have a split rw/rx jit buffer.  Depending
on the host, we need to flush the dcache at the rw data pointer and
flush the icache at the rx code pointer.

For now, the two passed pointers are identical, so there is no
effective change in behaviour.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.h     |  9 +++++++--
 tcg/arm/tcg-target.h         |  8 ++++++--
 tcg/i386/tcg-target.h        |  3 ++-
 tcg/mips/tcg-target.h        |  8 ++++++--
 tcg/ppc/tcg-target.h         |  2 +-
 tcg/riscv/tcg-target.h       |  8 ++++++--
 tcg/s390/tcg-target.h        |  3 ++-
 tcg/sparc/tcg-target.h       |  8 +++++---
 tcg/tci/tcg-target.h         |  3 ++-
 softmmu/physmem.c            |  9 ++++++++-
 tcg/tcg.c                    |  6 ++++--
 tcg/aarch64/tcg-target.c.inc |  2 +-
 tcg/mips/tcg-target.c.inc    |  2 +-
 tcg/ppc/tcg-target.c.inc     | 21 +++++++++++----------
 tcg/sparc/tcg-target.c.inc   |  4 ++--
 15 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 663dd0b95e..d0a6a059b7 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -148,9 +148,14 @@ typedef enum {
 #define TCG_TARGET_DEFAULT_MO (0)
 #define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    __builtin___clear_cache((char *)start, (char *)stop);
+    /* TODO: Copy this from gcc to avoid 4 loops instead of 2. */
+    if (rw != rx) {
+        __builtin___clear_cache((char *)rw, (char *)(rw + len));
+    }
+    __builtin___clear_cache((char *)rx, (char *)(rx + len));
 }
 
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 17e771374d..fa88b24e43 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -134,9 +134,13 @@ enum {
 #define TCG_TARGET_DEFAULT_MO (0)
 #define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    __builtin___clear_cache((char *) start, (char *) stop);
+    if (rw != rx) {
+        __builtin___clear_cache((char *)rw, (char *)(rw + len));
+    }
+    __builtin___clear_cache((char *)rx, (char *)(rx + len));
 }
 
 /* not defined -- call should be eliminated at compile time */
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index abd4ac7fc0..8323e72639 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -206,7 +206,8 @@ extern bool have_avx2;
 #define TCG_TARGET_extract_i64_valid(ofs, len) \
     (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
 }
 
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c6b091d849..47b1226ee9 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -207,9 +207,13 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_DEFAULT_MO (0)
 #define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    cacheflush ((void *)start, stop-start, ICACHE);
+    if (rx != rw) {
+        cacheflush((void *)rw, len, DCACHE);
+    }
+    cacheflush((void *)rx, len, ICACHE);
 }
 
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index be10363956..fbb6dc1b47 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -175,7 +175,7 @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_bitsel_vec       have_vsx
 #define TCG_TARGET_HAS_cmpsel_vec       0
 
-void flush_icache_range(uintptr_t start, uintptr_t stop);
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_DEFAULT_MO (0)
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index 032439d806..0fa6ae358e 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -159,9 +159,13 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i64        1
 #endif
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    __builtin___clear_cache((char *)start, (char *)stop);
+    if (rx != rw) {
+        __builtin___clear_cache((char *)rw, (char *)(rw + len));
+    }
+    __builtin___clear_cache((char *)rx, (char *)(rx + len));
 }
 
 /* not defined -- call should be eliminated at compile time */
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 63c8797bd3..c3dc2e8938 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -145,7 +145,8 @@ enum {
     TCG_AREG0 = TCG_REG_R10,
 };
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
 }
 
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 633841ebf2..c27c40231e 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -168,10 +168,12 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_DEFAULT_MO (0)
 #define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
-    uintptr_t p;
-    for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
+    /* No additional data flush to the RW virtual address required. */
+    uintptr_t p, end = (rx + len + 7) & -8;
+    for (p = rx & -8; p < end; p += 8) {
         __asm__ __volatile__("flush\t%0" : : "r" (p));
     }
 }
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 8c1c1d265d..6460449719 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -191,7 +191,8 @@ void tci_disas(uint8_t opc);
 
 #define HAVE_TCG_QEMU_TB_EXEC
 
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
 {
 }
 
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 0b31be2928..d1e2da5502 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -2946,7 +2946,14 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
                 invalidate_and_set_dirty(mr, addr1, l);
                 break;
             case FLUSH_CACHE:
-                flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l);
+                /*
+                 * FIXME: This function is currently located in tcg/host/,
+                 * but we never come here when tcg is enabled; only for
+                 * real hardware acceleration.  This can actively fail
+                 * when TCI is configured, since that function is a nop.
+                 * We should move this to util/ or something.
+                 */
+                flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
                 break;
             }
         }
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 43c6cf8f52..d5a72c226f 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1077,7 +1077,8 @@ void tcg_prologue_init(TCGContext *s)
 #endif
 
     buf1 = s->code_ptr;
-    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
+    flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0,
+                        tcg_ptr_byte_diff(buf1, buf0));
 
     /* Deduct the prologue from the buffer.  */
     prologue_size = tcg_current_code_size(s);
@@ -4320,7 +4321,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     }
 
     /* flush instruction cache */
-    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+    flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf,
+                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
 
     return tcg_current_code_size(s);
 }
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 26f71cb599..83af3108a4 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
     }
     pair = (uint64_t)i2 << 32 | i1;
     qatomic_set((uint64_t *)jmp_addr, pair);
-    flush_icache_range(jmp_addr, jmp_addr + 8);
+    flush_idcache_range(jmp_addr, jmp_addr, 8);
 }
 
 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 41be574e89..c255ecb444 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
                               uintptr_t addr)
 {
     qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
-    flush_icache_range(jmp_addr, jmp_addr + 4);
+    flush_idcache_range(jmp_addr, jmp_addr, 4);
 }
 
 typedef struct {
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 18ee989f95..a848e98383 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
         /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
            within qatomic_set that would fail to build a ppc32 host.  */
         qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
-        flush_icache_range(jmp_addr, jmp_addr + 8);
+        flush_idcache_range(jmp_addr, jmp_addr, 8);
     } else {
         intptr_t diff = addr - jmp_addr;
         tcg_debug_assert(in_range_b(diff));
         qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
-        flush_icache_range(jmp_addr, jmp_addr + 4);
+        flush_idcache_range(jmp_addr, jmp_addr, 4);
     }
 }
 
@@ -3864,22 +3864,23 @@ void tcg_register_jit(void *buf, size_t buf_size)
 }
 #endif /* __ELF__ */
 
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+/* Flush the dcache at RW, and the icache at RX, as necessary. */
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, uintptr_t len)
 {
-    uintptr_t p, start1, stop1;
+    uintptr_t p, start, stop;
     size_t dsize = qemu_dcache_linesize;
     size_t isize = qemu_icache_linesize;
 
-    start1 = start & ~(dsize - 1);
-    stop1 = (stop + dsize - 1) & ~(dsize - 1);
-    for (p = start1; p < stop1; p += dsize) {
+    start = rw & ~(dsize - 1);
+    stop = (rw + len + dsize - 1) & ~(dsize - 1);
+    for (p = start; p < stop; p += dsize) {
         asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
     }
     asm volatile ("sync" : : : "memory");
 
-    start &= start & ~(isize - 1);
-    stop1 = (stop + isize - 1) & ~(isize - 1);
-    for (p = start1; p < stop1; p += isize) {
+    start = rx & ~(isize - 1);
+    stop = (rx + len + isize - 1) & ~(isize - 1);
+    for (p = start; p < stop; p += isize) {
         asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
     }
     asm volatile ("sync" : : : "memory");
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 6775bd30fc..6e2d755f6a 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
     if (!USE_REG_TB) {
         qatomic_set((uint32_t *)jmp_addr,
 		    deposit32(CALL, 0, 30, br_disp >> 2));
-        flush_icache_range(jmp_addr, jmp_addr + 4);
+        flush_idcache_range(jmp_addr, jmp_addr, 4);
         return;
     }
 
@@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
     }
 
     qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
-    flush_icache_range(jmp_addr, jmp_addr + 8);
+    flush_idcache_range(jmp_addr, jmp_addr, 8);
 }
-- 
2.25.1



  reply	other threads:[~2020-11-06  3:31 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-06  3:28 [PATCH v3 00/41] Mirror map JIT memory for TCG Richard Henderson
2020-11-06  3:28 ` Richard Henderson [this message]
2020-11-06 20:31   ` [PATCH v3 01/41] tcg: Enhance flush_icache_range with separate data pointer Alex Bennée
2020-11-06 22:51     ` Richard Henderson
2020-11-07 21:08       ` Alex Bennée
2020-11-06  3:28 ` [PATCH v3 02/41] tcg: Move tcg prologue pointer out of TCGContext Richard Henderson
2020-11-07 21:10   ` Alex Bennée
2020-11-06  3:28 ` [PATCH v3 03/41] tcg: Move tcg epilogue " Richard Henderson
2020-11-06  3:28 ` [PATCH v3 04/41] tcg: Add in_code_gen_buffer Richard Henderson
2020-11-06  3:28 ` [PATCH v3 05/41] tcg: Introduce tcg_splitwx_to_{rx,rw} Richard Henderson
2020-11-06  3:28 ` [PATCH v3 06/41] tcg: Adjust TCGLabel for const Richard Henderson
2020-11-06  3:28 ` [PATCH v3 07/41] tcg: Adjust tcg_out_call " Richard Henderson
2020-11-06  3:28 ` [PATCH v3 08/41] tcg: Adjust tcg_out_label " Richard Henderson
2020-11-06  3:28 ` [PATCH v3 09/41] tcg: Adjust tcg_register_jit " Richard Henderson
2020-11-06  3:28 ` [PATCH v3 10/41] tcg: Adjust tb_target_set_jmp_target for split-wx Richard Henderson
2020-11-06  3:28 ` [PATCH v3 11/41] tcg: Make DisasContextBase.tb const Richard Henderson
2020-11-06  3:28 ` [PATCH v3 12/41] tcg: Make tb arg to synchronize_from_tb const Richard Henderson
2020-11-06  3:28 ` [PATCH v3 13/41] tcg: Use Error with alloc_code_gen_buffer Richard Henderson
2020-11-06  3:28 ` [PATCH v3 14/41] tcg: Add --accel tcg,split-wx property Richard Henderson
2020-11-06  3:28 ` [PATCH v3 15/41] accel/tcg: Support split-wx for linux with memfd Richard Henderson
2020-11-10 17:03   ` Alex Bennée
2020-11-10 17:26     ` Richard Henderson
2020-11-06  3:28 ` [PATCH v3 16/41] accel/tcg: Support split-wx for darwin/iOS with vm_remap Richard Henderson
2020-11-08  3:37   ` Joelle van Dyne
2020-11-10 17:37   ` Alex Bennée
2020-11-10 17:57     ` Joelle van Dyne
2020-11-06  3:28 ` [PATCH v3 17/41] tcg: Return the TB pointer from the rx region from exit_tb Richard Henderson
2020-11-06  3:28 ` [PATCH v3 18/41] tcg/i386: Support split-wx code generation Richard Henderson
2020-11-06  3:28 ` [PATCH v3 19/41] tcg/aarch64: Use B not BL for tcg_out_goto_long Richard Henderson
2020-11-06  3:29 ` [PATCH v3 20/41] tcg/aarch64: Implement flush_idcache_range manually Richard Henderson
2020-11-06  3:29 ` [PATCH v3 21/41] tcg/aarch64: Support split-wx code generation Richard Henderson
2020-11-06  3:29 ` [PATCH v3 22/41] disas: Push const down through host disasassembly Richard Henderson
2020-11-06  3:29 ` [PATCH v3 23/41] tcg/tci: Push const down through bytecode reading Richard Henderson
2020-11-06  3:29 ` [PATCH v3 24/41] tcg: Introduce tcg_tbrel_diff Richard Henderson
2020-11-06  3:29 ` [PATCH v3 25/41] tcg/ppc: Use tcg_tbrel_diff Richard Henderson
2020-11-06  3:29 ` [PATCH v3 26/41] tcg/ppc: Use tcg_out_mem_long to reset TCG_REG_TB Richard Henderson
2020-11-06  3:29 ` [PATCH v3 27/41] tcg/ppc: Support split-wx code generation Richard Henderson
2020-11-06  3:29 ` [PATCH v3 28/41] tcg/sparc: Use tcg_tbrel_diff Richard Henderson
2020-11-06  3:29 ` [PATCH v3 29/41] tcg/sparc: Support split-wx code generation Richard Henderson
2020-11-06  3:29 ` [PATCH v3 30/41] tcg/s390: Use tcg_tbrel_diff Richard Henderson
2020-11-06  3:29 ` [PATCH v3 31/41] tcg/s390: Support split-wx code generation Richard Henderson
2020-11-06  3:29 ` [PATCH v3 32/41] tcg/riscv: Fix branch range checks Richard Henderson
2020-11-06  3:29 ` [PATCH v3 33/41] tcg/riscv: Remove branch-over-branch fallback Richard Henderson
2020-11-06  3:29 ` [PATCH v3 34/41] tcg/riscv: Support split-wx code generation Richard Henderson
2020-11-06  3:29 ` [PATCH v3 35/41] accel/tcg: Add mips support to alloc_code_gen_buffer_splitwx_memfd Richard Henderson
2020-11-06  3:29 ` [PATCH v3 36/41] tcg/mips: Do not assert on relocation overflow Richard Henderson
2020-11-06  3:29 ` [PATCH v3 37/41] tcg/mips: Support split-wx code generation Richard Henderson
2020-11-06  3:29 ` [PATCH v3 38/41] tcg/arm: " Richard Henderson
2020-11-06  3:29 ` [PATCH v3 39/41] tcg: Remove TCG_TARGET_SUPPORT_MIRROR Richard Henderson
2020-11-06  3:29 ` [PATCH v3 40/41] tcg: Constify tcg_code_gen_epilogue Richard Henderson
2020-11-06  3:29 ` [PATCH v3 41/41] tcg: Constify TCGLabelQemuLdst.raddr Richard Henderson
2020-11-06  4:00 ` [PATCH v3 00/41] Mirror map JIT memory for TCG no-reply
2020-11-08  3:38 ` Joelle van Dyne
2020-11-17  3:47 ` Joelle van Dyne
2020-11-17 15:20   ` Richard Henderson
2020-11-17 15:31     ` Joelle van Dyne
2020-11-17 17:26       ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201106032921.600200-2-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=j@getutm.app \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.