All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH for-8.0 26/29] tcg/i386: Examine MemOp for atomicity and alignment
Date: Fri, 18 Nov 2022 01:47:51 -0800	[thread overview]
Message-ID: <20221118094754.242910-27-richard.henderson@linaro.org> (raw)
In-Reply-To: <20221118094754.242910-1-richard.henderson@linaro.org>

No change to the ultimate load/store routines yet, so some
atomicity conditions not yet honored, but plumbs the change
to alignment through the adjacent functions.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.c.inc | 128 ++++++++++++++++++++++++++++++--------
 1 file changed, 101 insertions(+), 27 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index e04818eef6..7dc56040d2 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1746,6 +1746,83 @@ tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i)
     }
 }
 
+/*
+ * Return the alignment and atomicity to use for the inline fast path
+ * for the given memory operation.  The alignment may be larger than
+ * that specified in @opc, and the correct alignment will be diagnosed
+ * by the slow path helper.
+ */
+static MemOp atom_and_align_for_opc(TCGContext *s, MemOp opc, MemOp *out_al)
+{
+    MemOp align = get_alignment_bits(opc);
+    MemOp atom, atmax, atsub, size = opc & MO_SIZE;
+
+    /* When serialized, no further atomicity required.  */
+    if (s->tb_cflags & CF_PARALLEL) {
+        atom = opc & MO_ATOM_MASK;
+    } else {
+        atom = MO_ATOM_NONE;
+    }
+
+    atmax = opc & MO_ATMAX_MASK;
+    if (atmax == MO_ATMAX_SIZE) {
+        atmax = size;
+    } else {
+        atmax = atmax >> MO_ATMAX_SHIFT;
+    }
+
+    switch (atom) {
+    case MO_ATOM_NONE:
+        /* The operation requires no specific atomicity. */
+        atmax = MO_8;
+        atsub = MO_8;
+        break;
+    case MO_ATOM_IFALIGN:
+        /* If unaligned, the subobjects are bytes. */
+        atsub = MO_8;
+        break;
+    case MO_ATOM_WITHIN16:
+        /* If unaligned, there are subobjects if atmax < size. */
+        atsub = (atmax < size ? atmax : MO_8);
+        atmax = size;
+        break;
+    case MO_ATOM_SUBALIGN:
+        /* If unaligned but not odd, there are subobjects up to atmax - 1. */
+        atsub = (atmax == MO_8 ? MO_8 : atmax - 1);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    /*
+     * Per Intel Architecture SDM, Volume 3 Section 8.1.1,
+     * - Pentium family guarantees atomicity of aligned <= 64-bit.
+     * - P6 family guarantees atomicity of unaligned <= 64-bit
+     *   which fit within a cache line.
+     * - AVX guarantees atomicity of aligned 128-bit VMOVDQA (et al).
+     *
+     * There is no language in the Intel manual specifying what happens
+     * with the partial memory operations when crossing a cache line.
+     * When there is required atomicity of subobjects, we must perform
+     * an additional runtime test for alignment and then perform either
+     * the full operation, or two half-sized operations.
+     *
+     * For x86_64, and MO_64, we do not have a scratch register with
+     * which to do this.  Only allow splitting for MO_64 on i386,
+     * where the data is already separated, or MO_128.
+     * Otherwise, require full alignment and fall back to the helper
+     * for the misaligned case.
+     */
+    if (align < atmax
+        && atsub != MO_8
+        && size != (TCG_TARGET_REG_BITS == 64 ? MO_128 : MO_64)) {
+        align = size;
+    }
+
+    *out_al = align;
+    return atmax;
+}
+
 /*
  * helper signature: helper_ld*_mmu(CPUState *env, target_ulong addr,
  *                                  int mmu_idx, uintptr_t ra)
@@ -1987,7 +2064,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
  * First argument register is clobbered.
  */
 static void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
-                             int mem_index, MemOp opc,
+                             int mem_index, MemOp a_bits, MemOp s_bits,
                              tcg_insn_unit **label_ptr, int which)
 {
     const TCGReg r0 = TCG_REG_L0;
@@ -1995,8 +2072,6 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
     TCGType ttype = TCG_TYPE_I32;
     TCGType tlbtype = TCG_TYPE_I32;
     int trexw = 0, hrexw = 0, tlbrexw = 0;
-    unsigned a_bits = get_alignment_bits(opc);
-    unsigned s_bits = opc & MO_SIZE;
     unsigned a_mask = (1 << a_bits) - 1;
     unsigned s_mask = (1 << s_bits) - 1;
     target_ulong tlb_mask;
@@ -2124,7 +2199,8 @@ static inline int setup_guest_base_seg(void)
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, int index, intptr_t ofs,
-                                   int seg, TCGType type, MemOp memop)
+                                   int seg, TCGType type, MemOp memop,
+                                   MemOp atom, MemOp align)
 {
     bool use_movbe = false;
     int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
@@ -2225,11 +2301,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
     TCGReg datalo, datahi, addrlo;
     TCGReg addrhi __attribute__((unused));
     MemOpIdx oi;
-    MemOp opc;
+    MemOp opc, atom, align;
     tcg_insn_unit *label_ptr[2] = { };
-#ifndef CONFIG_SOFTMMU
-    unsigned a_bits;
-#endif
 
     datalo = *args++;
     switch (type) {
@@ -2246,26 +2319,27 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
     addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
+    atom = atom_and_align_for_opc(s, opc, &align);
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
+    tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), align, opc & MO_SIZE,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, type, opc);
+    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, type,
+                           opc, atom, align);
 
     /* Record the current context of a load into ldst label */
     add_qemu_ldst_label(s, true, type, oi, datalo, datahi,
                         TCG_REG_L1, addrhi, s->code_ptr, label_ptr);
 #else
-    a_bits = get_alignment_bits(opc);
-    if (a_bits) {
-        tcg_out_test_alignment(s, addrlo, a_bits, label_ptr);
+    if (align) {
+        tcg_out_test_alignment(s, addrlo, align, label_ptr);
     }
     tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
                            x86_guest_base_offset, x86_guest_base_seg,
-                           type, opc);
-    if (a_bits) {
+                           type, opc, atom, align);
+    if (align) {
         add_qemu_ldst_label(s, true, type, oi, datalo, datahi,
                             addrlo, addrhi, s->code_ptr, label_ptr);
     }
@@ -2274,7 +2348,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
 
 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, int index, intptr_t ofs,
-                                   int seg, MemOp memop)
+                                   int seg, MemOp memop,
+                                   MemOp atom, MemOp align)
 {
     bool use_movbe = false;
     int movop = OPC_MOVL_EvGv;
@@ -2329,11 +2404,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGType type)
     TCGReg datalo, datahi, addrlo;
     TCGReg addrhi __attribute__((unused));
     MemOpIdx oi;
-    MemOp opc;
+    MemOp opc, atom, align;
     tcg_insn_unit *label_ptr[2] = { };
-#ifndef CONFIG_SOFTMMU
-    unsigned a_bits;
-#endif
 
     datalo = *args++;
     switch (type) {
@@ -2350,25 +2422,27 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGType type)
     addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
+    atom = atom_and_align_for_opc(s, opc, &align);
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
+    tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), align, opc & MO_SIZE,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
+    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0,
+                           opc, atom, align);
 
     /* Record the current context of a store into ldst label */
     add_qemu_ldst_label(s, false, type, oi, datalo, datahi,
                         TCG_REG_L1, addrhi, s->code_ptr, label_ptr);
 #else
-    a_bits = get_alignment_bits(opc);
-    if (a_bits) {
-        tcg_out_test_alignment(s, addrlo, a_bits, label_ptr);
+    if (align) {
+        tcg_out_test_alignment(s, addrlo, align, label_ptr);
     }
     tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
-                           x86_guest_base_offset, x86_guest_base_seg, opc);
-    if (a_bits) {
+                           x86_guest_base_offset, x86_guest_base_seg,
+                           opc, atom, align);
+    if (align) {
         add_qemu_ldst_label(s, false, type, oi, datalo, datahi,
                             addrlo, addrhi, s->code_ptr, label_ptr);
     }
-- 
2.34.1



  parent reply	other threads:[~2022-11-18  9:54 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-18  9:47 [PATCH for-8.0 00/29] tcg: Improve atomicity support Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 01/29] include/qemu/cpuid: Introduce xgetbv_low Richard Henderson
2022-11-21 12:15   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 02/29] include/exec/memop: Add bits describing atomicity Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 03/29] accel/tcg: Add cpu_in_serial_context Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 04/29] accel/tcg: Introduce tlb_read_idx Richard Henderson
2022-11-21 12:25   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 05/29] accel/tcg: Reorg system mode load helpers Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 06/29] accel/tcg: Reorg system mode store helpers Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 07/29] accel/tcg: Honor atomicity of loads Richard Henderson
2022-11-22 14:35   ` Peter Maydell
2022-11-22 18:04     ` Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 08/29] accel/tcg: Honor atomicity of stores Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 09/29] tcg/tci: Use cpu_{ld,st}_mmu Richard Henderson
2022-11-21 12:40   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 10/29] tcg: Unify helper_{be,le}_{ld,st}* Richard Henderson
2022-11-21 12:48   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 11/29] accel/tcg: Implement helper_{ld, st}*_mmu for user-only Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 12/29] tcg: Add 128-bit guest memory primitives Richard Henderson
2022-11-22  3:30   ` Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 13/29] meson: Detect atomic128 support with optimization Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 14/29] tcg/i386: Add have_atomic16 Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 15/29] include/qemu/int128: Add vector type to Int128Alias Richard Henderson
2022-11-21 23:45   ` Philippe Mathieu-Daudé
2022-11-22 18:21   ` Philippe Mathieu-Daudé
2022-11-22 18:31     ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 16/29] accel/tcg: Use have_atomic16 in ldst_atomicity.c.inc Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 17/29] tcg/aarch64: Add have_lse, have_lse2 Richard Henderson
2022-11-21 23:10   ` Philippe Mathieu-Daudé
2022-11-21 23:14     ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 18/29] accel/tcg: Add aarch64 specific support in ldst_atomicity Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 19/29] tcg: Introduce TCG_OPF_TYPE_MASK Richard Henderson
2022-11-21 16:12   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 20/29] tcg: Add INDEX_op_qemu_{ld,st}_i128 Richard Henderson
2022-11-21 22:59   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 21/29] tcg/i386: Introduce tcg_out_mov2 Richard Henderson
2022-11-21 16:21   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 22/29] tcg/i386: Introduce tcg_out_testi Richard Henderson
2022-11-21 16:22   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 23/29] tcg/i386: Use full load/store helpers in user-only mode Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 24/29] tcg/i386: Replace is64 with type in qemu_ld/st routines Richard Henderson
2022-11-21 16:27   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 25/29] tcg/i386: Mark Win64 call-saved vector regs as reserved Richard Henderson
2022-11-21 16:28   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` Richard Henderson [this message]
2022-11-18  9:47 ` [PATCH for-8.0 27/29] tcg/i386: Support 128-bit load/store with have_atomic16 Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 28/29] tcg/i386: Add vex_v argument to tcg_out_vex_modrm_pool Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 29/29] tcg/i386: Honor 64-bit atomicity in 32-bit mode Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221118094754.242910-27-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.