[PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues
@ 2021-06-19 17:26 Richard Henderson
  2021-06-19 17:26 ` [PATCH 01/15] NOTFORMERGE q800: test case for do_unaligned_access issue Richard Henderson
                   ` (16 more replies)
  0 siblings, 17 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

Short story is that the first two patches resolve the observed
problem, by completely bypassing quite a lot of code in memory.c.

Longer story is that we should either use that code in memory.c,
or we should bypass it to an even lower level, so that we don't
have multiple locations doing the partial-read assembly thing.

Patch 13 exposes a number of obvious device bugs via make check.
I'm sure there are more in devices that are less well tested.

Patch 15 has an obvious drawback: it breaks the original #360.
But it starts the conversation as to whether the check in memory.c
is in fact broken.


r~


Mark Cave-Ayland (2):
  NOTFORMERGE q800: test case for do_unaligned_access issue
  accel/tcg: Use byte ops for unaligned loads

Philippe Mathieu-Daudé (1):
  accel/tcg: Extract load_helper_unaligned from load_helper

Richard Henderson (12):
  accel/tcg: Don't test for watchpoints for code read
  accel/tcg: Handle page span access before i/o access
  softmmu/memory: Inline memory_region_dispatch_read1
  softmmu/memory: Simplify access_with_adjusted_size interface
  hw/net/e1000e: Fix size of io operations
  hw/net/e1000e: Fix impl.min_access_size
  hw/pci-host/q35: Improve blackhole_ops
  hw/scsi/megasas: Fix megasas_mmio_ops sizes
  hw/scsi/megasas: Improve megasas_queue_ops min_access_size
  softmmu/memory: Disallow short writes
  softmmu/memory: Support some unaligned access
  RFC accel/tcg: Defer some unaligned accesses to memory subsystem

 accel/tcg/cputlb.c | 147 +++++++++++++----------------
 hw/m68k/q800.c     | 131 ++------------------------
 hw/net/e1000e.c    |   8 +-
 hw/pci-host/q35.c  |   9 +-
 hw/scsi/megasas.c  |   6 +-
 softmmu/memory.c   | 226 +++++++++++++++++++++++++++++++++------------
 6 files changed, 251 insertions(+), 276 deletions(-)

-- 
2.25.1



^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 01/15] NOTFORMERGE q800: test case for do_unaligned_access issue
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 02/15] accel/tcg: Extract load_helper_unaligned from load_helper Richard Henderson
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>

A hack so that the built-in rom for -M a800 triggers the
problem within the first two instructions.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
 hw/m68k/q800.c | 131 ++++---------------------------------------------
 1 file changed, 9 insertions(+), 122 deletions(-)

diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index 11376daa85..9795ceb06a 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -174,26 +174,13 @@ static void main_cpu_reset(void *opaque)
     CPUState *cs = CPU(cpu);
 
     cpu_reset(cs);
-    cpu->env.aregs[7] = ldl_phys(cs->as, 0);
-    cpu->env.pc = ldl_phys(cs->as, 4);
+    cpu->env.aregs[7] = 0x1000;
+    cpu->env.pc = MACROM_ADDR;
 }
 
 static uint8_t fake_mac_rom[] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-    /* offset: 0xa - mac_reset */
-
-    /* via2[vDirB] |= VIA2B_vPower */
-    0x20, 0x7C, 0x50, 0xF0, 0x24, 0x00, /* moveal VIA2_BASE+vDirB,%a0 */
-    0x10, 0x10,                         /* moveb %a0@,%d0 */
-    0x00, 0x00, 0x00, 0x04,             /* orib #4,%d0 */
-    0x10, 0x80,                         /* moveb %d0,%a0@ */
-
-    /* via2[vBufB] &= ~VIA2B_vPower */
-    0x20, 0x7C, 0x50, 0xF0, 0x20, 0x00, /* moveal VIA2_BASE+vBufB,%a0 */
-    0x10, 0x10,                         /* moveb %a0@,%d0 */
-    0x02, 0x00, 0xFF, 0xFB,             /* andib #-5,%d0 */
-    0x10, 0x80,                         /* moveb %d0,%a0@ */
+    0x41, 0xf9, 0x50, 0x00, 0x00, 0x00,  /* lea 0x50000000,%a0 */
+    0x30, 0x28, 0x11, 0xff,              /* movew %a0@(4607),%d0 */
 
     /* while (true) ; */
     0x60, 0xFE                          /* bras [self] */
@@ -202,24 +189,11 @@ static uint8_t fake_mac_rom[] = {
 static void q800_init(MachineState *machine)
 {
     M68kCPU *cpu = NULL;
-    int linux_boot;
-    int32_t kernel_size;
-    uint64_t elf_entry;
-    char *filename;
-    int bios_size;
-    ram_addr_t initrd_base;
-    int32_t initrd_size;
     MemoryRegion *rom;
     MemoryRegion *io;
     const int io_slice_nb = (IO_SIZE / IO_SLICE) - 1;
     int i;
     ram_addr_t ram_size = machine->ram_size;
-    const char *kernel_filename = machine->kernel_filename;
-    const char *initrd_filename = machine->initrd_filename;
-    const char *kernel_cmdline = machine->kernel_cmdline;
-    const char *bios_name = machine->firmware ?: MACROM_FILENAME;
-    hwaddr parameters_base;
-    CPUState *cs;
     DeviceState *dev;
     DeviceState *via_dev;
     DeviceState *escc_orgate;
@@ -231,8 +205,6 @@ static void q800_init(MachineState *machine)
     DeviceState *glue;
     DriveInfo *dinfo;
 
-    linux_boot = (kernel_filename != NULL);
-
     if (ram_size > 1 * GiB) {
         error_report("Too much memory for this machine: %" PRId64 " MiB, "
                      "maximum 1024 MiB", ram_size / MiB);
@@ -392,96 +364,11 @@ static void q800_init(MachineState *machine)
     qdev_prop_set_uint8(dev, "depth", graphic_depth);
     qdev_realize_and_unref(dev, BUS(nubus), &error_fatal);
 
-    cs = CPU(cpu);
-    if (linux_boot) {
-        uint64_t high;
-        kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
-                               &elf_entry, NULL, &high, NULL, 1,
-                               EM_68K, 0, 0);
-        if (kernel_size < 0) {
-            error_report("could not load kernel '%s'", kernel_filename);
-            exit(1);
-        }
-        stl_phys(cs->as, 4, elf_entry); /* reset initial PC */
-        parameters_base = (high + 1) & ~1;
-
-        BOOTINFO1(cs->as, parameters_base, BI_MACHTYPE, MACH_MAC);
-        BOOTINFO1(cs->as, parameters_base, BI_FPUTYPE, FPU_68040);
-        BOOTINFO1(cs->as, parameters_base, BI_MMUTYPE, MMU_68040);
-        BOOTINFO1(cs->as, parameters_base, BI_CPUTYPE, CPU_68040);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_CPUID, CPUB_68040);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_MODEL, MAC_MODEL_Q800);
-        BOOTINFO1(cs->as, parameters_base,
-                  BI_MAC_MEMSIZE, ram_size >> 20); /* in MB */
-        BOOTINFO2(cs->as, parameters_base, BI_MEMCHUNK, 0, ram_size);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_VADDR, VIDEO_BASE);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_VDEPTH, graphic_depth);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_VDIM,
-                  (graphic_height << 16) | graphic_width);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_VROW,
-                  (graphic_width * graphic_depth + 7) / 8);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_SCCBASE, SCC_BASE);
-
-        rom = g_malloc(sizeof(*rom));
-        memory_region_init_ram_ptr(rom, NULL, "m68k_fake_mac.rom",
-                                   sizeof(fake_mac_rom), fake_mac_rom);
-        memory_region_set_readonly(rom, true);
-        memory_region_add_subregion(get_system_memory(), MACROM_ADDR, rom);
-
-        if (kernel_cmdline) {
-            BOOTINFOSTR(cs->as, parameters_base, BI_COMMAND_LINE,
-                        kernel_cmdline);
-        }
-
-        /* load initrd */
-        if (initrd_filename) {
-            initrd_size = get_image_size(initrd_filename);
-            if (initrd_size < 0) {
-                error_report("could not load initial ram disk '%s'",
-                             initrd_filename);
-                exit(1);
-            }
-
-            initrd_base = (ram_size - initrd_size) & TARGET_PAGE_MASK;
-            load_image_targphys(initrd_filename, initrd_base,
-                                ram_size - initrd_base);
-            BOOTINFO2(cs->as, parameters_base, BI_RAMDISK, initrd_base,
-                      initrd_size);
-        } else {
-            initrd_base = 0;
-            initrd_size = 0;
-        }
-        BOOTINFO0(cs->as, parameters_base, BI_LAST);
-    } else {
-        uint8_t *ptr;
-        /* allocate and load BIOS */
-        rom = g_malloc(sizeof(*rom));
-        memory_region_init_rom(rom, NULL, "m68k_mac.rom", MACROM_SIZE,
-                               &error_abort);
-        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
-        memory_region_add_subregion(get_system_memory(), MACROM_ADDR, rom);
-
-        /* Load MacROM binary */
-        if (filename) {
-            bios_size = load_image_targphys(filename, MACROM_ADDR, MACROM_SIZE);
-            g_free(filename);
-        } else {
-            bios_size = -1;
-        }
-
-        /* Remove qtest_enabled() check once firmware files are in the tree */
-        if (!qtest_enabled()) {
-            if (bios_size < 0 || bios_size > MACROM_SIZE) {
-                error_report("could not load MacROM '%s'", bios_name);
-                exit(1);
-            }
-
-            ptr = rom_ptr(MACROM_ADDR, MACROM_SIZE);
-            stl_phys(cs->as, 0, ldl_p(ptr));    /* reset initial SP */
-            stl_phys(cs->as, 4,
-                     MACROM_ADDR + ldl_p(ptr + 4)); /* reset initial PC */
-        }
-    }
+    rom = g_malloc(sizeof(*rom));
+    memory_region_init_ram_ptr(rom, NULL, "m68k_fake_mac.rom",
+                               sizeof(fake_mac_rom), fake_mac_rom);
+    memory_region_set_readonly(rom, true);
+    memory_region_add_subregion(get_system_memory(), MACROM_ADDR, rom);
 }
 
 static void q800_machine_class_init(ObjectClass *oc, void *data)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 02/15] accel/tcg: Extract load_helper_unaligned from load_helper
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
  2021-06-19 17:26 ` [PATCH 01/15] NOTFORMERGE q800: test case for do_unaligned_access issue Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 03/15] accel/tcg: Use byte ops for unaligned loads Richard Henderson
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

From: Philippe Mathieu-Daudé <f4bug@amsat.org>

Replace a goto statement by an inlined function for easier review.
No logical change intended.

Inspired-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20210609141010.1066750-2-f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 52 ++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index f24348e979..a94de90099 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1851,6 +1851,34 @@ load_memop(const void *haddr, MemOp op)
     }
 }
 
+static inline uint64_t QEMU_ALWAYS_INLINE
+load_helper_unaligned(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
+                      uintptr_t retaddr, MemOp op, bool code_read,
+                      FullLoadHelper *full_load)
+{
+    size_t size = memop_size(op);
+    target_ulong addr1, addr2;
+    uint64_t res;
+    uint64_t r1, r2;
+    unsigned shift;
+
+    addr1 = addr & ~((target_ulong)size - 1);
+    addr2 = addr1 + size;
+    r1 = full_load(env, addr1, oi, retaddr);
+    r2 = full_load(env, addr2, oi, retaddr);
+    shift = (addr & (size - 1)) * 8;
+
+    if (memop_big_endian(op)) {
+        /* Big-endian combine.  */
+        res = (r1 << shift) | (r2 >> ((size * 8) - shift));
+    } else {
+        /* Little-endian combine.  */
+        res = (r1 >> shift) | (r2 << ((size * 8) - shift));
+    }
+
+    return res & MAKE_64BIT_MASK(0, size * 8);
+}
+
 static inline uint64_t QEMU_ALWAYS_INLINE
 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
             uintptr_t retaddr, MemOp op, bool code_read,
@@ -1866,7 +1894,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
     unsigned a_bits = get_alignment_bits(get_memop(oi));
     void *haddr;
-    uint64_t res;
     size_t size = memop_size(op);
 
     /* Handle CPU specific unaligned behaviour */
@@ -1895,7 +1922,8 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
 
         /* For anything that is unaligned, recurse through full_load.  */
         if ((addr & (size - 1)) != 0) {
-            goto do_unaligned_access;
+            return load_helper_unaligned(env, addr, oi, retaddr, op,
+                                         code_read, full_load);
         }
 
         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
@@ -1932,24 +1960,8 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
     if (size > 1
         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
                     >= TARGET_PAGE_SIZE)) {
-        target_ulong addr1, addr2;
-        uint64_t r1, r2;
-        unsigned shift;
-    do_unaligned_access:
-        addr1 = addr & ~((target_ulong)size - 1);
-        addr2 = addr1 + size;
-        r1 = full_load(env, addr1, oi, retaddr);
-        r2 = full_load(env, addr2, oi, retaddr);
-        shift = (addr & (size - 1)) * 8;
-
-        if (memop_big_endian(op)) {
-            /* Big-endian combine.  */
-            res = (r1 << shift) | (r2 >> ((size * 8) - shift));
-        } else {
-            /* Little-endian combine.  */
-            res = (r1 >> shift) | (r2 << ((size * 8) - shift));
-        }
-        return res & MAKE_64BIT_MASK(0, size * 8);
+        return load_helper_unaligned(env, addr, oi, retaddr, op,
+                                     code_read, full_load);
     }
 
     haddr = (void *)((uintptr_t)addr + entry->addend);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 03/15] accel/tcg: Use byte ops for unaligned loads
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
  2021-06-19 17:26 ` [PATCH 01/15] NOTFORMERGE q800: test case for do_unaligned_access issue Richard Henderson
  2021-06-19 17:26 ` [PATCH 02/15] accel/tcg: Extract load_helper_unaligned from load_helper Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 04/15] accel/tcg: Don't test for watchpoints for code read Richard Henderson
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>

If an unaligned load is required then the load is split into
two separate accesses and combined.  This does not work correctly
with MMIO accesses because the I/O subsystem may use a different
endianness than we are expecting.

Use byte loads to obviate I/O endianness.  We already use byte
stores in store_helper_unaligned, so this solution has precedent.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/360
Message-Id: <20210609093528.9616-1-mark.cave-ayland@ilande.co.uk>
[PMD: Extract load_helper_unaligned() in earlier patch]
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20210609141010.1066750-3-f4bug@amsat.org>
[rth: Drop all of the stuff we do for stores not required by loads.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 93 ++++++++++++++++++----------------------------
 1 file changed, 36 insertions(+), 57 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index a94de90099..ba21487138 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1854,35 +1854,36 @@ load_memop(const void *haddr, MemOp op)
 static inline uint64_t QEMU_ALWAYS_INLINE
 load_helper_unaligned(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
                       uintptr_t retaddr, MemOp op, bool code_read,
-                      FullLoadHelper *full_load)
+                      FullLoadHelper *byte_load)
 {
+    uintptr_t mmu_idx = get_mmuidx(oi);
     size_t size = memop_size(op);
-    target_ulong addr1, addr2;
-    uint64_t res;
-    uint64_t r1, r2;
-    unsigned shift;
-
-    addr1 = addr & ~((target_ulong)size - 1);
-    addr2 = addr1 + size;
-    r1 = full_load(env, addr1, oi, retaddr);
-    r2 = full_load(env, addr2, oi, retaddr);
-    shift = (addr & (size - 1)) * 8;
+    uint64_t val = 0;
+    int i;
 
+    /* XXX: not efficient, but simple. */
+    oi = make_memop_idx(MO_UB, mmu_idx);
     if (memop_big_endian(op)) {
-        /* Big-endian combine.  */
-        res = (r1 << shift) | (r2 >> ((size * 8) - shift));
+        for (i = 0; i < size; ++i) {
+            /* Big-endian load.  */
+            uint64_t val8 = byte_load(env, addr + i, oi, retaddr);
+            val = (val << 8) | val8;
+        }
     } else {
-        /* Little-endian combine.  */
-        res = (r1 >> shift) | (r2 << ((size * 8) - shift));
+        for (i = 0; i < size; ++i) {
+            /* Little-endian load.  */
+            uint64_t val8 = byte_load(env, addr + i, oi, retaddr);
+            val |= val8 << (i * 8);
+        }
     }
 
-    return res & MAKE_64BIT_MASK(0, size * 8);
+    return val;
 }
 
 static inline uint64_t QEMU_ALWAYS_INLINE
 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
             uintptr_t retaddr, MemOp op, bool code_read,
-            FullLoadHelper *full_load)
+            FullLoadHelper *byte_load)
 {
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
@@ -1920,10 +1921,10 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         CPUIOTLBEntry *iotlbentry;
         bool need_swap;
 
-        /* For anything that is unaligned, recurse through full_load.  */
+        /* For anything that is unaligned, recurse through byte_load.  */
         if ((addr & (size - 1)) != 0) {
             return load_helper_unaligned(env, addr, oi, retaddr, op,
-                                         code_read, full_load);
+                                         code_read, byte_load);
         }
 
         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
@@ -1961,7 +1962,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
                     >= TARGET_PAGE_SIZE)) {
         return load_helper_unaligned(env, addr, oi, retaddr, op,
-                                     code_read, full_load);
+                                     code_read, byte_load);
     }
 
     haddr = (void *)((uintptr_t)addr + entry->addend);
@@ -1978,8 +1979,9 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
  */
 
-static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
-                              TCGMemOpIdx oi, uintptr_t retaddr)
+static uint64_t __attribute__((noinline))
+full_ldub_mmu(CPUArchState *env, target_ulong addr,
+              TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
 }
@@ -1993,8 +1995,7 @@ tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
-                       full_le_lduw_mmu);
+    return load_helper(env, addr, oi, retaddr, MO_LEUW, false, full_ldub_mmu);
 }
 
 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
@@ -2006,8 +2007,7 @@ tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
-                       full_be_lduw_mmu);
+    return load_helper(env, addr, oi, retaddr, MO_BEUW, false, full_ldub_mmu);
 }
 
 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
@@ -2019,8 +2019,7 @@ tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
-                       full_le_ldul_mmu);
+    return load_helper(env, addr, oi, retaddr, MO_LEUL, false, full_ldub_mmu);
 }
 
 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
@@ -2032,8 +2031,7 @@ tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
-                       full_be_ldul_mmu);
+    return load_helper(env, addr, oi, retaddr, MO_BEUL, false, full_ldub_mmu);
 }
 
 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
@@ -2045,15 +2043,13 @@ tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
-                       helper_le_ldq_mmu);
+    return load_helper(env, addr, oi, retaddr, MO_LEQ, false, full_ldub_mmu);
 }
 
 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
-                       helper_be_ldq_mmu);
+    return load_helper(env, addr, oi, retaddr, MO_BEQ, false, full_ldub_mmu);
 }
 
 /*
@@ -2732,8 +2728,9 @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
 
 /* Code access functions.  */
 
-static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
-                               TCGMemOpIdx oi, uintptr_t retaddr)
+static uint64_t __attribute__((noinline))
+full_ldub_code(CPUArchState *env, target_ulong addr,
+               TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
 }
@@ -2744,38 +2741,20 @@ uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
     return full_ldub_code(env, addr, oi, 0);
 }
 
-static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
-                               TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
-}
-
 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
 {
     TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
-    return full_lduw_code(env, addr, oi, 0);
-}
-
-static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
-                              TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
+    return load_helper(env, addr, oi, 0, MO_TEUW, true, full_ldub_code);
 }
 
 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
 {
     TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
-    return full_ldl_code(env, addr, oi, 0);
-}
-
-static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
-                              TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
+    return load_helper(env, addr, oi, 0, MO_TEUL, true, full_ldub_code);
 }
 
 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
 {
     TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
-    return full_ldq_code(env, addr, oi, 0);
+    return load_helper(env, addr, oi, 0, MO_TEQ, true, full_ldub_code);
 }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 04/15] accel/tcg: Don't test for watchpoints for code read
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (2 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 03/15] accel/tcg: Use byte ops for unaligned loads Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-21 18:29   ` Philippe Mathieu-Daudé
  2021-06-19 17:26 ` [PATCH 05/15] accel/tcg: Handle page span access before i/o access Richard Henderson
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

Data read watchpoints do not apply to code reads.
Watchpoints for code are called breakpoints, and
are handled by the translator.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index ba21487138..23a97849be 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1930,7 +1930,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 
         /* Handle watchpoints.  */
-        if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
+        if (!code_read && unlikely(tlb_addr & TLB_WATCHPOINT)) {
             /* On watchpoint hit, this will longjmp out.  */
             cpu_check_watchpoint(env_cpu(env), addr, size,
                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 05/15] accel/tcg: Handle page span access before i/o access
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (3 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 04/15] accel/tcg: Don't test for watchpoints for code read Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 06/15] softmmu/memory: Inline memory_region_dispatch_read1 Richard Henderson
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

At present this is a distinction without much effect.
But this will enable further improvements.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 23a97849be..6209e00c9b 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1916,6 +1916,14 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         tlb_addr &= ~TLB_INVALID_MASK;
     }
 
+    /* Handle access that spans two pages. */
+    if (size > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
+                    >= TARGET_PAGE_SIZE)) {
+        return load_helper_unaligned(env, addr, oi, retaddr, op,
+                                     code_read, byte_load);
+    }
+
     /* Handle anything that isn't just a straight memory access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
         CPUIOTLBEntry *iotlbentry;
@@ -1957,14 +1965,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         return load_memop(haddr, op);
     }
 
-    /* Handle slow unaligned access (it spans two pages or IO).  */
-    if (size > 1
-        && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
-                    >= TARGET_PAGE_SIZE)) {
-        return load_helper_unaligned(env, addr, oi, retaddr, op,
-                                     code_read, byte_load);
-    }
-
     haddr = (void *)((uintptr_t)addr + entry->addend);
     return load_memop(haddr, op);
 }
@@ -2421,6 +2421,16 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
     }
 
+    /* Handle access that spans two pages. */
+    if (size > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
+                     >= TARGET_PAGE_SIZE)) {
+    do_unaligned_access:
+        store_helper_unaligned(env, addr, val, retaddr, size,
+                               mmu_idx, memop_big_endian(op));
+        return;
+    }
+
     /* Handle anything that isn't just a straight memory access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
         CPUIOTLBEntry *iotlbentry;
@@ -2474,16 +2484,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
         return;
     }
 
-    /* Handle slow unaligned access (it spans two pages or IO).  */
-    if (size > 1
-        && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
-                     >= TARGET_PAGE_SIZE)) {
-    do_unaligned_access:
-        store_helper_unaligned(env, addr, val, retaddr, size,
-                               mmu_idx, memop_big_endian(op));
-        return;
-    }
-
     haddr = (void *)((uintptr_t)addr + entry->addend);
     store_memop(haddr, val, op);
 }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 06/15] softmmu/memory: Inline memory_region_dispatch_read1
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (4 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 05/15] accel/tcg: Handle page span access before i/o access Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-21 18:25   ` Philippe Mathieu-Daudé
  2021-06-19 17:26 ` [PATCH 07/15] softmmu/memory: Simplify access_with_adjusted_size interface Richard Henderson
                   ` (10 subsequent siblings)
  16 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

Inline the body into the only caller of this function.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 softmmu/memory.c | 38 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/softmmu/memory.c b/softmmu/memory.c
index f0161515e9..744c5a80bd 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1408,29 +1408,6 @@ bool memory_region_access_valid(MemoryRegion *mr,
     return true;
 }
 
-static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
-                                                hwaddr addr,
-                                                uint64_t *pval,
-                                                unsigned size,
-                                                MemTxAttrs attrs)
-{
-    *pval = 0;
-
-    if (mr->ops->read) {
-        return access_with_adjusted_size(addr, pval, size,
-                                         mr->ops->impl.min_access_size,
-                                         mr->ops->impl.max_access_size,
-                                         memory_region_read_accessor,
-                                         mr, attrs);
-    } else {
-        return access_with_adjusted_size(addr, pval, size,
-                                         mr->ops->impl.min_access_size,
-                                         mr->ops->impl.max_access_size,
-                                         memory_region_read_with_attrs_accessor,
-                                         mr, attrs);
-    }
-}
-
 MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
                                         hwaddr addr,
                                         uint64_t *pval,
@@ -1445,7 +1422,20 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
         return MEMTX_DECODE_ERROR;
     }
 
-    r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
+    *pval = 0;
+    if (mr->ops->read) {
+        r = access_with_adjusted_size(addr, pval, size,
+                                      mr->ops->impl.min_access_size,
+                                      mr->ops->impl.max_access_size,
+                                      memory_region_read_accessor,
+                                      mr, attrs);
+    } else {
+        r = access_with_adjusted_size(addr, pval, size,
+                                      mr->ops->impl.min_access_size,
+                                      mr->ops->impl.max_access_size,
+                                      memory_region_read_with_attrs_accessor,
+                                      mr, attrs);
+    }
     adjust_endianness(mr, pval, op);
     return r;
 }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 07/15] softmmu/memory: Simplify access_with_adjusted_size interface
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (5 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 06/15] softmmu/memory: Inline memory_region_dispatch_read1 Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-21 18:27   ` Philippe Mathieu-Daudé
  2021-06-19 17:26 ` [PATCH 08/15] hw/net/e1000e: Fix size of io operations Richard Henderson
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

Create a typedef for the access_fn callback.  Remove the
access_size_{min,max} and access_fn arguments, and instead
derive these from the MemoryRegion argument.  Add a write
boolean argument.  Mark the function inline.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 softmmu/memory.c | 67 +++++++++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 41 deletions(-)

diff --git a/softmmu/memory.c b/softmmu/memory.c
index 744c5a80bd..7373d89600 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -509,22 +509,24 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
     return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
 }
 
+typedef MemTxResult MemoryRegionAccessFn(MemoryRegion *mr,
+                                         hwaddr addr,
+                                         uint64_t *value,
+                                         unsigned size,
+                                         signed shift,
+                                         uint64_t mask,
+                                         MemTxAttrs attrs);
+
 static MemTxResult access_with_adjusted_size(hwaddr addr,
-                                      uint64_t *value,
-                                      unsigned size,
-                                      unsigned access_size_min,
-                                      unsigned access_size_max,
-                                      MemTxResult (*access_fn)
-                                                  (MemoryRegion *mr,
-                                                   hwaddr addr,
-                                                   uint64_t *value,
-                                                   unsigned size,
-                                                   signed shift,
-                                                   uint64_t mask,
-                                                   MemTxAttrs attrs),
-                                      MemoryRegion *mr,
-                                      MemTxAttrs attrs)
+                                             uint64_t *value,
+                                             unsigned size,
+                                             MemoryRegion *mr,
+                                             MemTxAttrs attrs,
+                                             bool write)
 {
+    unsigned access_size_min = mr->ops->impl.min_access_size;
+    unsigned access_size_max = mr->ops->impl.max_access_size;
+    MemoryRegionAccessFn *access_fn;
     uint64_t access_mask;
     unsigned access_size;
     unsigned i;
@@ -537,6 +539,14 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
         access_size_max = 4;
     }
 
+    if (write) {
+        access_fn = (mr->ops->write ? memory_region_write_accessor
+                     : memory_region_write_with_attrs_accessor);
+    } else {
+        access_fn = (mr->ops->read ? memory_region_read_accessor
+                     : memory_region_read_with_attrs_accessor);
+    }
+
     /* FIXME: support unaligned access? */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
     access_mask = MAKE_64BIT_MASK(0, access_size * 8);
@@ -1423,19 +1433,7 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
     }
 
     *pval = 0;
-    if (mr->ops->read) {
-        r = access_with_adjusted_size(addr, pval, size,
-                                      mr->ops->impl.min_access_size,
-                                      mr->ops->impl.max_access_size,
-                                      memory_region_read_accessor,
-                                      mr, attrs);
-    } else {
-        r = access_with_adjusted_size(addr, pval, size,
-                                      mr->ops->impl.min_access_size,
-                                      mr->ops->impl.max_access_size,
-                                      memory_region_read_with_attrs_accessor,
-                                      mr, attrs);
-    }
+    r = access_with_adjusted_size(addr, pval, size, mr, attrs, false);
     adjust_endianness(mr, pval, op);
     return r;
 }
@@ -1486,20 +1484,7 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
         return MEMTX_OK;
     }
 
-    if (mr->ops->write) {
-        return access_with_adjusted_size(addr, &data, size,
-                                         mr->ops->impl.min_access_size,
-                                         mr->ops->impl.max_access_size,
-                                         memory_region_write_accessor, mr,
-                                         attrs);
-    } else {
-        return
-            access_with_adjusted_size(addr, &data, size,
-                                      mr->ops->impl.min_access_size,
-                                      mr->ops->impl.max_access_size,
-                                      memory_region_write_with_attrs_accessor,
-                                      mr, attrs);
-    }
+    return access_with_adjusted_size(addr, &data, size, mr, attrs, true);
 }
 
 void memory_region_init_io(MemoryRegion *mr,
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 08/15] hw/net/e1000e: Fix size of io operations
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (6 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 07/15] softmmu/memory: Simplify access_with_adjusted_size interface Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 09/15] hw/net/e1000e: Fix impl.min_access_size Richard Henderson
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel
  Cc: Dmitry Fleytman, Jason Wang, mark.cave-ayland, f4bug, pbonzini,
	alex.bennee

The size of the operation is an argument to the respective
functions, and has nothing to do with sizeof(val).

Cc: Jason Wang <jasowang@redhat.com>
Cc: Dmitry Fleytman <dmitry.fleytman@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/net/e1000e.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index a8a77eca95..ea3347fbb4 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -145,7 +145,7 @@ e1000e_io_read(void *opaque, hwaddr addr, unsigned size)
         return s->ioaddr;
     case E1000_IODATA:
         if (e1000e_io_get_reg_index(s, &idx)) {
-            val = e1000e_core_read(&s->core, idx, sizeof(val));
+            val = e1000e_core_read(&s->core, idx, size);
             trace_e1000e_io_read_data(idx, val);
             return val;
         }
@@ -171,7 +171,7 @@ e1000e_io_write(void *opaque, hwaddr addr,
     case E1000_IODATA:
         if (e1000e_io_get_reg_index(s, &idx)) {
             trace_e1000e_io_write_data(idx, val);
-            e1000e_core_write(&s->core, idx, val, sizeof(val));
+            e1000e_core_write(&s->core, idx, val, size);
         }
         return;
     default:
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 09/15] hw/net/e1000e: Fix impl.min_access_size
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (7 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 08/15] hw/net/e1000e: Fix size of io operations Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-21  7:20   ` Jason Wang
  2021-06-19 17:26 ` [PATCH 10/15] hw/pci-host/q35: Improve blackhole_ops Richard Henderson
                   ` (7 subsequent siblings)
  16 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel
  Cc: Dmitry Fleytman, Jason Wang, mark.cave-ayland, f4bug, pbonzini,
	alex.bennee

There are certainly architectural 2 byte writes, as evidenced
by the e1000e_set_16bit function.  I also saw a 1 byte write,
though that may have been the fuzzer.

Cc: Jason Wang <jasowang@redhat.com> 
Cc: Dmitry Fleytman <dmitry.fleytman@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/net/e1000e.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index ea3347fbb4..ad73e39ebc 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -185,7 +185,7 @@ static const MemoryRegionOps mmio_ops = {
     .write = e1000e_mmio_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .impl = {
-        .min_access_size = 4,
+        .min_access_size = 1,
         .max_access_size = 4,
     },
 };
@@ -195,7 +195,7 @@ static const MemoryRegionOps io_ops = {
     .write = e1000e_io_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .impl = {
-        .min_access_size = 4,
+        .min_access_size = 1,
         .max_access_size = 4,
     },
 };
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 10/15] hw/pci-host/q35: Improve blackhole_ops
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (8 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 09/15] hw/net/e1000e: Fix impl.min_access_size Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-21 18:31   ` Philippe Mathieu-Daudé
  2021-06-19 17:26 ` [PATCH 11/15] hw/scsi/megasas: Fix megasas_mmio_ops sizes Richard Henderson
                   ` (6 subsequent siblings)
  16 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

There is nothing about the blackhole that requires 4 byte
operations.  Decrease the min size to 1, increase the max
size to 8.  Drop duplicate endianness spec.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/pci-host/q35.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 2eb729dff5..133be67e4f 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -268,7 +268,7 @@ static const TypeInfo q35_host_info = {
 
 static uint64_t blackhole_read(void *ptr, hwaddr reg, unsigned size)
 {
-    return 0xffffffff;
+    return UINT64_MAX;
 }
 
 static void blackhole_write(void *opaque, hwaddr addr, uint64_t val,
@@ -282,10 +282,9 @@ static const MemoryRegionOps blackhole_ops = {
     .write = blackhole_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid.min_access_size = 1,
-    .valid.max_access_size = 4,
-    .impl.min_access_size = 4,
-    .impl.max_access_size = 4,
-    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
 };
 
 /* PCIe MMCFG */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 11/15] hw/scsi/megasas: Fix megasas_mmio_ops sizes
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (9 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 10/15] hw/pci-host/q35: Improve blackhole_ops Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 12/15] hw/scsi/megasas: Improve megasas_queue_ops min_access_size Richard Henderson
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel
  Cc: Fam Zheng, Hannes Reinecke, qemu-block, mark.cave-ayland, f4bug,
	pbonzini, alex.bennee

All of the megasas mmio registers are 32-bit not 64-bit.

Cc: qemu-block@nongnu.org
Cc: Fam Zheng <fam@euphon.net>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/scsi/megasas.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 8f2389d2c6..c98cb7a499 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2163,8 +2163,8 @@ static const MemoryRegionOps megasas_mmio_ops = {
     .write = megasas_mmio_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .impl = {
-        .min_access_size = 8,
-        .max_access_size = 8,
+        .min_access_size = 4,
+        .max_access_size = 4,
     }
 };
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 12/15] hw/scsi/megasas: Improve megasas_queue_ops min_access_size
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (10 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 11/15] hw/scsi/megasas: Fix megasas_mmio_ops sizes Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 13/15] softmmu/memory: Disallow short writes Richard Henderson
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel
  Cc: Fam Zheng, Hannes Reinecke, qemu-block, mark.cave-ayland, f4bug,
	pbonzini, alex.bennee

This device is a read-zero-write-ignored device.
We can trivially support any size operation.

Cc: qemu-block@nongnu.org
Cc: Fam Zheng <fam@euphon.net> 
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/scsi/megasas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index c98cb7a499..197b75225f 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2207,7 +2207,7 @@ static const MemoryRegionOps megasas_queue_ops = {
     .write = megasas_queue_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .impl = {
-        .min_access_size = 8,
+        .min_access_size = 1,
         .max_access_size = 8,
     }
 };
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 13/15] softmmu/memory: Disallow short writes
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (11 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 12/15] hw/scsi/megasas: Improve megasas_queue_ops min_access_size Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 14/15] softmmu/memory: Support some unaligned access Richard Henderson
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

Writes smaller than impl.min_access_size would require a
read-modify-write cycle, which could have side effects.

The present behaviour seems to be to extend the current write
to min_access_size.  While we could continue that, so far all
of the instances I have seen have been either device model
errors or the fuzzer intentionally doing bad things.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 softmmu/memory.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7373d89600..2fe237327d 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -548,6 +548,26 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
     }
 
     /* FIXME: support unaligned access? */
+    /*
+     * Check for a small access.
+     */
+    if (unlikely(size < access_size_min)) {
+        /*
+         * Logically, we cannot support short writes without a read-modify
+         * cycle, and many mmio registers have side-effects on read.
+         * In practice, this appears to be either (1) model error,
+         * or (2) guest error via the fuzzer.
+         */
+        if (write) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid short write: %s "
+                          "hwaddr: 0x%" HWADDR_PRIx " size: %u "
+                          "min: %u max: %u\n", __func__,
+                          memory_region_name(mr), addr, size,
+                          access_size_min, access_size_max);
+            return MEMTX_ERROR;
+        }
+    }
+
     access_size = MAX(MIN(size, access_size_max), access_size_min);
     access_mask = MAKE_64BIT_MASK(0, access_size * 8);
     if (memory_region_big_endian(mr)) {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 14/15] softmmu/memory: Support some unaligned access
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (12 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 13/15] softmmu/memory: Disallow short writes Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-19 17:26 ` [PATCH 15/15] RFC accel/tcg: Defer some unaligned accesses to memory subsystem Richard Henderson
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

Decline to support writes that cannot be covered by access_size_min.
Decline to support unaligned reads that require extraction from more
than two reads.

Do support exact size match when the model supports unaligned.
Do support reducing the operation size to match the alignment.
Do support loads that extract from 1 or 2 larger loads.

Diagnose anything that we do not handle via LOG_GUEST_ERROR,
as any of these cases are probably model or guest errors.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 softmmu/memory.c | 127 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 6 deletions(-)

diff --git a/softmmu/memory.c b/softmmu/memory.c
index 2fe237327d..baf8573f1b 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -529,7 +529,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
     MemoryRegionAccessFn *access_fn;
     uint64_t access_mask;
     unsigned access_size;
-    unsigned i;
+    signed access_sh;
     MemTxResult r = MEMTX_OK;
 
     if (!access_size_min) {
@@ -547,7 +547,6 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
                      : memory_region_read_with_attrs_accessor);
     }
 
-    /* FIXME: support unaligned access? */
     /*
      * Check for a small access.
      */
@@ -557,6 +556,10 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
          * cycle, and many mmio registers have side-effects on read.
          * In practice, this appears to be either (1) model error,
          * or (2) guest error via the fuzzer.
+         *
+         * TODO: Are all short reads also guest or model errors, because
+         * of said side effects?  Or is this valid read-for-effect then
+         * discard the (complete) result via narrow destination register?
          */
         if (write) {
             qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid short write: %s "
@@ -566,22 +569,134 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
                           access_size_min, access_size_max);
             return MEMTX_ERROR;
         }
+
+        /*
+         * If the original access is aligned, we can always extract
+         * from a single larger load.
+         */
+        access_size = access_size_min;
+        if (likely((addr & (size - 1)) == 0)) {
+            goto extract;
+        }
+
+        /*
+         * TODO: We could search for a larger load that happens to
+         * cover the unaligned load, but at some point we will always
+         * require two operations.  Extract from two loads.
+         */
+        goto extract2;
     }
 
+    /*
+     * Check for size in range.
+     */
+    if (likely(size <= access_size_max)) {
+        /*
+         * If the access is aligned or if the model supports
+         * unaligned accesses, use one operation directly.
+         */
+        if (likely((addr & (size - 1)) == 0) || mr->ops->impl.unaligned) {
+            access_size = size;
+            access_sh = 0;
+            goto direct;
+        }
+    }
+
+    /*
+     * It is certain that we require multiple operations.
+     * If the access is aligned (or the model supports unaligned),
+     * then we will perform N accesses which exactly cover the
+     * operation requested.
+     */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
+    if (unlikely(addr & (access_size - 1))) {
+        unsigned lsb = addr & -addr;
+        if (lsb >= access_size_min) {
+            /*
+             * The model supports small enough loads that we can
+             * exactly match the operation requested.  For reads,
+             * this is preferable to touching more than requested.
+             * For writes, this is mandatory.
+             */
+            access_size = lsb;
+        } else if (write) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid unaligned write: %s "
+                          "hwaddr: 0x%" HWADDR_PRIx " size: %u "
+                          "min: %u max: %u\n", __func__,
+                          memory_region_name(mr), addr, size,
+                          access_size_min, access_size_max);
+            return MEMTX_ERROR;
+        } else if (size <= access_size_max) {
+            /* As per above, we can use two loads to implement. */
+            access_size = size;
+            goto extract2;
+        } else {
+            /*
+             * TODO: becaseu access_size_max is small, this case requires
+             * more than 2 loads to assemble and extract.  Bail out.
+             */
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Unhandled unaligned read: %s "
+                          "hwaddr: 0x%" HWADDR_PRIx " size: %u "
+                          "min: %u max: %u\n", __func__,
+                          memory_region_name(mr), addr, size,
+                          access_size_min, access_size_max);
+            return MEMTX_ERROR;
+        }
+    }
+
     access_mask = MAKE_64BIT_MASK(0, access_size * 8);
     if (memory_region_big_endian(mr)) {
-        for (i = 0; i < size; i += access_size) {
+        for (unsigned i = 0; i < size; i += access_size) {
             r |= access_fn(mr, addr + i, value, access_size,
-                        (size - access_size - i) * 8, access_mask, attrs);
+                           (size - access_size - i) * 8, access_mask, attrs);
         }
     } else {
-        for (i = 0; i < size; i += access_size) {
+        for (unsigned i = 0; i < size; i += access_size) {
             r |= access_fn(mr, addr + i, value, access_size, i * 8,
-                        access_mask, attrs);
+                           access_mask, attrs);
         }
     }
     return r;
+
+ extract2:
+    /*
+     * Extract from one or two loads to produce the result.
+     * Validate that we need two loads before performing them.
+     */
+    access_sh = addr & (access_size - 1);
+    if (access_sh + size > access_size) {
+        addr &= ~(access_size - 1);
+        if (memory_region_big_endian(mr)) {
+            access_sh = (access_size - access_sh) * 8;
+            r |= access_fn(mr, addr, value, access_size, access_sh, -1, attrs);
+            access_sh -= access_size * 8;
+            r |= access_fn(mr, addr, value, access_size, access_sh, -1, attrs);
+        } else {
+            access_sh = (access_sh - access_size) * 8;
+            r |= access_fn(mr, addr, value, access_size, access_sh, -1, attrs);
+            access_sh += access_size * 8;
+            r |= access_fn(mr, addr, value, access_size, access_sh, -1, attrs);
+        }
+        *value &= MAKE_64BIT_MASK(0, size * 8);
+        return r;
+    }
+
+ extract:
+    /*
+     * Extract from one larger load to produce the result.
+     */
+    access_sh = addr & (access_size - 1);
+    addr &= ~(access_size - 1);
+    if (memory_region_big_endian(mr)) {
+        access_sh = access_size - size - access_sh;
+    }
+    /* Note that with this interface, right shift is negative. */
+    access_sh *= -8;
+
+ direct:
+    access_mask = MAKE_64BIT_MASK(0, size * 8);
+    return access_fn(mr, addr, value, access_size, access_sh,
+                     access_mask, attrs);
 }
 
 static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 15/15] RFC accel/tcg: Defer some unaligned accesses to memory subsystem
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (13 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 14/15] softmmu/memory: Support some unaligned access Richard Henderson
@ 2021-06-19 17:26 ` Richard Henderson
  2021-06-20 13:08 ` [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Mark Cave-Ayland
  2021-06-20 14:33 ` Peter Maydell
  16 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-06-19 17:26 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, pbonzini, mark.cave-ayland, f4bug

For unaligned i/o accesses that do not cross pages, do not handle
the misalignment in cputlb, but let the memory system deal with it.

RFC because this, for the first time, exposes many guests to the
existing mr->ops->valid.unaligned checks in the memory subsystem.
Previously this code was only reachable when guest code explicitly
calls memory_region_dispatch_*.

This does in fact trip up the original m68k q800 testcase, #360.

Since this hasn't really been reachable, I'm willing to bet that
every device is wrong wrt mr->ops->valid.unaligned, and possibly
that we shouldn't even have it.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 6209e00c9b..905edab19b 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1929,12 +1929,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         CPUIOTLBEntry *iotlbentry;
         bool need_swap;
 
-        /* For anything that is unaligned, recurse through byte_load.  */
-        if ((addr & (size - 1)) != 0) {
-            return load_helper_unaligned(env, addr, oi, retaddr, op,
-                                         code_read, byte_load);
-        }
-
         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 
         /* Handle watchpoints.  */
@@ -2425,7 +2419,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
     if (size > 1
         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
                      >= TARGET_PAGE_SIZE)) {
-    do_unaligned_access:
         store_helper_unaligned(env, addr, val, retaddr, size,
                                mmu_idx, memop_big_endian(op));
         return;
@@ -2436,11 +2429,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
         CPUIOTLBEntry *iotlbentry;
         bool need_swap;
 
-        /* For anything that is unaligned, recurse through byte stores.  */
-        if ((addr & (size - 1)) != 0) {
-            goto do_unaligned_access;
-        }
-
         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 
         /* Handle watchpoints.  */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (14 preceding siblings ...)
  2021-06-19 17:26 ` [PATCH 15/15] RFC accel/tcg: Defer some unaligned accesses to memory subsystem Richard Henderson
@ 2021-06-20 13:08 ` Mark Cave-Ayland
  2021-06-20 14:33 ` Peter Maydell
  16 siblings, 0 replies; 23+ messages in thread
From: Mark Cave-Ayland @ 2021-06-20 13:08 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: pbonzini, alex.bennee, f4bug

On 19/06/2021 18:26, Richard Henderson wrote:

> Short story is that the first two patches resolve the observed
> problem, by completely bypassing quite a lot of code in memory.c.
> 
> Longer story is that we should either use that code in memory.c,
> or we should bypass it to an even lower level, so that we don't
> have multiple locations doing the partial-read assembly thing.
> 
> Patch 13 exposes a number of obvious device bugs via make check.
> I'm sure there are more in devices that are less well tested.
> 
> Patch 15 has an obvious drawback: it breaks the original #360.
> But it starts the conversation as to whether the check in memory.c
> is in fact broken.
> 
> 
> r~
> 
> 
> Mark Cave-Ayland (2):
>    NOTFORMERGE q800: test case for do_unaligned_access issue
>    accel/tcg: Use byte ops for unaligned loads
> 
> Philippe Mathieu-Daudé (1):
>    accel/tcg: Extract load_helper_unaligned from load_helper
> 
> Richard Henderson (12):
>    accel/tcg: Don't test for watchpoints for code read
>    accel/tcg: Handle page span access before i/o access
>    softmmu/memory: Inline memory_region_dispatch_read1
>    softmmu/memory: Simplify access_with_adjusted_size interface
>    hw/net/e1000e: Fix size of io operations
>    hw/net/e1000e: Fix impl.min_access_size
>    hw/pci-host/q35: Improve blackhole_ops
>    hw/scsi/megasas: Fix megasas_mmio_ops sizes
>    hw/scsi/megasas: Improve megasas_queue_ops min_access_size
>    softmmu/memory: Disallow short writes
>    softmmu/memory: Support some unaligned access
>    RFC accel/tcg: Defer some unaligned accesses to memory subsystem
> 
>   accel/tcg/cputlb.c | 147 +++++++++++++----------------
>   hw/m68k/q800.c     | 131 ++------------------------
>   hw/net/e1000e.c    |   8 +-
>   hw/pci-host/q35.c  |   9 +-
>   hw/scsi/megasas.c  |   6 +-
>   softmmu/memory.c   | 226 +++++++++++++++++++++++++++++++++------------
>   6 files changed, 251 insertions(+), 276 deletions(-)

Hi Richard,

I've had a look over this patchset, and based upon my work leading up to #360 this 
does feel like an improvement: in particular separating out page spanning accesses, 
and removing the duplicate cputlb code for splitting/combining unaligned accesses 
seem like wins to me.

As mentioned in the report itself, cputlb has effectively been bypassing 
.min_access_size but I'm not sure about the consequences of this - does it mean that 
the access size check should also be bypassed for the head/tail of unaligned 
accesses? I'm also not completely sure how this behaviour can change based upon the 
target CPU.

The comment in patch 15 re: mr->ops->valid.unaligned is interesting: if the memory 
subsystem can split the accesses beforehand so that they are accepted by the device, 
does this check then become obsolete?

My general feeling is that there are still some discussions to be had around defining 
how unaligned accesses should work, and including a qtest to cover the various 
unaligned/page span cases would help here. Other than that I do feel the patch is 
worthwhile, and it may be there is a situation similar to the 
memory_region_access_valid() changes in 5d971f9e67 whereby we accept the change in 
behaviour will have some fallout but allow plenty of time for regressions to be fixed.

ATB,

Mark.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues
  2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
                   ` (15 preceding siblings ...)
  2021-06-20 13:08 ` [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Mark Cave-Ayland
@ 2021-06-20 14:33 ` Peter Maydell
  16 siblings, 0 replies; 23+ messages in thread
From: Peter Maydell @ 2021-06-20 14:33 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Paolo Bonzini, Mark Cave-Ayland, Alex Bennée,
	QEMU Developers, Philippe Mathieu-Daudé

On Sat, 19 Jun 2021 at 18:28, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Short story is that the first two patches resolve the observed
> problem, by completely bypassing quite a lot of code in memory.c.
>
> Longer story is that we should either use that code in memory.c,
> or we should bypass it to an even lower level, so that we don't
> have multiple locations doing the partial-read assembly thing.

I haven't read the patchset yet, but my initial reaction is that
we ought to be handling this stuff in memory.c, because that
code is shared across all accelerators -- we would want the
same behaviour for accesses to a device that doesn't itself
handle a misalignment or a small access, whether we are using
TCG or KVM or HVF or whatever...

thanks
-- PMM


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 09/15] hw/net/e1000e: Fix impl.min_access_size
  2021-06-19 17:26 ` [PATCH 09/15] hw/net/e1000e: Fix impl.min_access_size Richard Henderson
@ 2021-06-21  7:20   ` Jason Wang
  0 siblings, 0 replies; 23+ messages in thread
From: Jason Wang @ 2021-06-21  7:20 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel
  Cc: alex.bennee, pbonzini, Dmitry Fleytman, mark.cave-ayland, f4bug


在 2021/6/20 上午1:26, Richard Henderson 写道:
> There are certainly architectural 2 byte writes, as evidenced
> by the e1000e_set_16bit function.  I also saw a 1 byte write,
> though that may have been the fuzzer.
>
> Cc: Jason Wang <jasowang@redhat.com>
> Cc: Dmitry Fleytman <dmitry.fleytman@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   hw/net/e1000e.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
> index ea3347fbb4..ad73e39ebc 100644
> --- a/hw/net/e1000e.c
> +++ b/hw/net/e1000e.c
> @@ -185,7 +185,7 @@ static const MemoryRegionOps mmio_ops = {
>       .write = e1000e_mmio_write,
>       .endianness = DEVICE_LITTLE_ENDIAN,
>       .impl = {
> -        .min_access_size = 4,
> +        .min_access_size = 1,


I'm not sure this can work. Looks like at least 
e1000e_get_reg_index_with_offset() was wrote with the assumption that 
min_access_size is 4:

static inline uint16_t
e1000e_get_reg_index_with_offset(const uint16_t *mac_reg_access, hwaddr 
addr)
{
     uint16_t index = (addr & 0x1ffff) >> 2;
     return index + (mac_reg_access[index] & 0xfffe);
}

Thanks


>           .max_access_size = 4,
>       },
>   };
> @@ -195,7 +195,7 @@ static const MemoryRegionOps io_ops = {
>       .write = e1000e_io_write,
>       .endianness = DEVICE_LITTLE_ENDIAN,
>       .impl = {
> -        .min_access_size = 4,
> +        .min_access_size = 1,
>           .max_access_size = 4,
>       },
>   };



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 06/15] softmmu/memory: Inline memory_region_dispatch_read1
  2021-06-19 17:26 ` [PATCH 06/15] softmmu/memory: Inline memory_region_dispatch_read1 Richard Henderson
@ 2021-06-21 18:25   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 23+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-06-21 18:25 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: pbonzini, mark.cave-ayland, alex.bennee

On 6/19/21 7:26 PM, Richard Henderson wrote:
> Inline the body into the only caller of this function.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  softmmu/memory.c | 38 ++++++++++++++------------------------
>  1 file changed, 14 insertions(+), 24 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 07/15] softmmu/memory: Simplify access_with_adjusted_size interface
  2021-06-19 17:26 ` [PATCH 07/15] softmmu/memory: Simplify access_with_adjusted_size interface Richard Henderson
@ 2021-06-21 18:27   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 23+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-06-21 18:27 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: pbonzini, mark.cave-ayland, alex.bennee

On 6/19/21 7:26 PM, Richard Henderson wrote:
> Create a typedef for the access_fn callback.  Remove the
> access_size_{min,max} and access_fn arguments, and instead
> derive these from the MemoryRegion argument.  Add a write
> boolean argument.  Mark the function inline.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  softmmu/memory.c | 67 +++++++++++++++++++-----------------------------
>  1 file changed, 26 insertions(+), 41 deletions(-)

Nice simplification :)

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 04/15] accel/tcg: Don't test for watchpoints for code read
  2021-06-19 17:26 ` [PATCH 04/15] accel/tcg: Don't test for watchpoints for code read Richard Henderson
@ 2021-06-21 18:29   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 23+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-06-21 18:29 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: pbonzini, mark.cave-ayland, alex.bennee

On 6/19/21 7:26 PM, Richard Henderson wrote:
> Data read watchpoints do not apply to code reads.
> Watchpoints for code are called breakpoints, and
> are handled by the translator.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  accel/tcg/cputlb.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 10/15] hw/pci-host/q35: Improve blackhole_ops
  2021-06-19 17:26 ` [PATCH 10/15] hw/pci-host/q35: Improve blackhole_ops Richard Henderson
@ 2021-06-21 18:31   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 23+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-06-21 18:31 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: pbonzini, mark.cave-ayland, alex.bennee

On 6/19/21 7:26 PM, Richard Henderson wrote:
> There is nothing about the blackhole that requires 4 byte
> operations.  Decrease the min size to 1, increase the max
> size to 8.  Drop duplicate endianness spec.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  hw/pci-host/q35.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>



^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2021-06-21 18:37 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-19 17:26 [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Richard Henderson
2021-06-19 17:26 ` [PATCH 01/15] NOTFORMERGE q800: test case for do_unaligned_access issue Richard Henderson
2021-06-19 17:26 ` [PATCH 02/15] accel/tcg: Extract load_helper_unaligned from load_helper Richard Henderson
2021-06-19 17:26 ` [PATCH 03/15] accel/tcg: Use byte ops for unaligned loads Richard Henderson
2021-06-19 17:26 ` [PATCH 04/15] accel/tcg: Don't test for watchpoints for code read Richard Henderson
2021-06-21 18:29   ` Philippe Mathieu-Daudé
2021-06-19 17:26 ` [PATCH 05/15] accel/tcg: Handle page span access before i/o access Richard Henderson
2021-06-19 17:26 ` [PATCH 06/15] softmmu/memory: Inline memory_region_dispatch_read1 Richard Henderson
2021-06-21 18:25   ` Philippe Mathieu-Daudé
2021-06-19 17:26 ` [PATCH 07/15] softmmu/memory: Simplify access_with_adjusted_size interface Richard Henderson
2021-06-21 18:27   ` Philippe Mathieu-Daudé
2021-06-19 17:26 ` [PATCH 08/15] hw/net/e1000e: Fix size of io operations Richard Henderson
2021-06-19 17:26 ` [PATCH 09/15] hw/net/e1000e: Fix impl.min_access_size Richard Henderson
2021-06-21  7:20   ` Jason Wang
2021-06-19 17:26 ` [PATCH 10/15] hw/pci-host/q35: Improve blackhole_ops Richard Henderson
2021-06-21 18:31   ` Philippe Mathieu-Daudé
2021-06-19 17:26 ` [PATCH 11/15] hw/scsi/megasas: Fix megasas_mmio_ops sizes Richard Henderson
2021-06-19 17:26 ` [PATCH 12/15] hw/scsi/megasas: Improve megasas_queue_ops min_access_size Richard Henderson
2021-06-19 17:26 ` [PATCH 13/15] softmmu/memory: Disallow short writes Richard Henderson
2021-06-19 17:26 ` [PATCH 14/15] softmmu/memory: Support some unaligned access Richard Henderson
2021-06-19 17:26 ` [PATCH 15/15] RFC accel/tcg: Defer some unaligned accesses to memory subsystem Richard Henderson
2021-06-20 13:08 ` [PATCH 00/15] accel/tcg: Fix for #360 and other i/o alignment issues Mark Cave-Ayland
2021-06-20 14:33 ` Peter Maydell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.