All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: pbonzini@redhat.com, qemu-devel@nongnu.org, stefanha@redhat.com,
	david@redhat.com
Subject: Re: [PATCH v4 06/16] cputlb: Introduce TLB_BSWAP
Date: Tue, 24 Sep 2019 19:25:26 +0100	[thread overview]
Message-ID: <87zhita6w9.fsf@linaro.org> (raw)
In-Reply-To: <20190923230004.9231-7-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Handle bswap on ram directly in load/store_helper.  This fixes a
> bug with the previous implementation in that one cannot use the
> I/O path for RAM.
>
> Fixes: a26fc6f5152b47f1
> Reviewed-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/exec/cpu-all.h |  4 ++-
>  accel/tcg/cputlb.c     | 62 ++++++++++++++++++++++--------------------
>  2 files changed, 36 insertions(+), 30 deletions(-)
>
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index e0c8dc540c..d148bded35 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -335,12 +335,14 @@ CPUArchState *cpu_copy(CPUArchState *env);
>  #define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 3))
>  /* Set if TLB entry contains a watchpoint.  */
>  #define TLB_WATCHPOINT      (1 << (TARGET_PAGE_BITS_MIN - 4))
> +/* Set if TLB entry requires byte swap.  */
> +#define TLB_BSWAP           (1 << (TARGET_PAGE_BITS_MIN - 5))
>
>  /* Use this mask to check interception with an alignment mask
>   * in a TCG backend.
>   */
>  #define TLB_FLAGS_MASK \
> -    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT)
> +    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP)
>
>  /**
>   * tlb_hit_page: return true if page aligned @addr is a hit against the
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 430ba4a69d..f634edb4f4 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -737,8 +737,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
>          address |= TLB_INVALID_MASK;
>      }
>      if (attrs.byte_swap) {
> -        /* Force the access through the I/O slow path.  */
> -        address |= TLB_MMIO;
> +        address |= TLB_BSWAP;
>      }
>      if (!memory_region_is_ram(section->mr) &&
>          !memory_region_is_romd(section->mr)) {
> @@ -901,10 +900,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
>      bool locked = false;
>      MemTxResult r;
>
> -    if (iotlbentry->attrs.byte_swap) {
> -        op ^= MO_BSWAP;
> -    }
> -
>      section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
>      mr = section->mr;
>      mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> @@ -947,10 +942,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
>      bool locked = false;
>      MemTxResult r;
>
> -    if (iotlbentry->attrs.byte_swap) {
> -        op ^= MO_BSWAP;
> -    }
> -
>      section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
>      mr = section->mr;
>      mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> @@ -1133,8 +1124,8 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
>                               wp_access, retaddr);
>      }
>
> -    if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) {
> -        /* I/O access */
> +    /* Reject I/O access, or other required slow-path.  */
> +    if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) {
>          return NULL;
>      }
>
> @@ -1344,6 +1335,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
>      /* Handle anything that isn't just a straight memory access.  */
>      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
>          CPUIOTLBEntry *iotlbentry;
> +        bool need_swap;
>
>          /* For anything that is unaligned, recurse through full_load.  */
>          if ((addr & (size - 1)) != 0) {
> @@ -1357,17 +1349,22 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
>              /* On watchpoint hit, this will longjmp out.  */
>              cpu_check_watchpoint(env_cpu(env), addr, size,
>                                   iotlbentry->attrs, BP_MEM_READ, retaddr);
> -
> -            /* The backing page may or may not require I/O.  */
> -            tlb_addr &= ~TLB_WATCHPOINT;
> -            if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) {
> -                goto do_aligned_access;
> -            }
>          }
>
           /* We don't apply MO_BSWAP to op here because we want to
            * ensure the compiler can always unfold and dead-code away
            * the final load_memop in the fast path. If you try the
            * you will find the assert will get you ;-)
            */

?

Otherwise:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> +        need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
> +
>          /* Handle I/O access.  */
> -        return io_readx(env, iotlbentry, mmu_idx, addr,
> -                        retaddr, access_type, op);
> +        if (likely(tlb_addr & TLB_MMIO)) {
> +            return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
> +                            access_type, op ^ (need_swap * MO_BSWAP));
> +        }
> +
> +        haddr = (void *)((uintptr_t)addr + entry->addend);
> +
> +        if (unlikely(need_swap)) {
> +            return load_memop(haddr, op ^ MO_BSWAP);
> +        }
> +        return load_memop(haddr, op);
>      }
>
>      /* Handle slow unaligned access (it spans two pages or IO).  */
> @@ -1394,7 +1391,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
>          return res & MAKE_64BIT_MASK(0, size * 8);
>      }
>
> - do_aligned_access:
>      haddr = (void *)((uintptr_t)addr + entry->addend);
>      return load_memop(haddr, op);
>  }
> @@ -1592,6 +1588,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
>      /* Handle anything that isn't just a straight memory access.  */
>      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
>          CPUIOTLBEntry *iotlbentry;
> +        bool need_swap;
>
>          /* For anything that is unaligned, recurse through byte stores.  */
>          if ((addr & (size - 1)) != 0) {
> @@ -1605,16 +1602,24 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
>              /* On watchpoint hit, this will longjmp out.  */
>              cpu_check_watchpoint(env_cpu(env), addr, size,
>                                   iotlbentry->attrs, BP_MEM_WRITE, retaddr);
> -
> -            /* The backing page may or may not require I/O.  */
> -            tlb_addr &= ~TLB_WATCHPOINT;
> -            if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) {
> -                goto do_aligned_access;
> -            }
>          }
>
> +        need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
> +
>          /* Handle I/O access.  */
> -        io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op);
> +        if (likely(tlb_addr & (TLB_MMIO | TLB_NOTDIRTY))) {
> +            io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
> +                      op ^ (need_swap * MO_BSWAP));
> +            return;
> +        }
> +
> +        haddr = (void *)((uintptr_t)addr + entry->addend);
> +
> +        if (unlikely(need_swap)) {
> +            store_memop(haddr, val, op ^ MO_BSWAP);
> +        } else {
> +            store_memop(haddr, val, op);
> +        }
>          return;
>      }
>
> @@ -1683,7 +1688,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
>          return;
>      }
>
> - do_aligned_access:
>      haddr = (void *)((uintptr_t)addr + entry->addend);
>      store_memop(haddr, val, op);
>  }


--
Alex Bennée


  reply	other threads:[~2019-09-24 18:27 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-23 22:59 [PATCH v4 00/16] Move rom and notdirty handling to cputlb Richard Henderson
2019-09-23 22:59 ` [PATCH v4 01/16] exec: Use TARGET_PAGE_BITS_MIN for TLB flags Richard Henderson
2019-09-24 13:53   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 02/16] cputlb: Disable __always_inline__ without optimization Richard Henderson
2019-09-24 13:56   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 03/16] qemu/compiler.h: Add optimize_away Richard Henderson
2019-09-24  7:47   ` David Hildenbrand
2019-09-24 17:27     ` Richard Henderson
2019-09-24 17:29       ` David Hildenbrand
2019-09-24 15:47   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 04/16] cputlb: Use optimize_away in load/store_helpers Richard Henderson
2019-09-24  7:47   ` David Hildenbrand
2019-09-24 15:47   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 05/16] cputlb: Split out load/store_memop Richard Henderson
2019-09-24  7:48   ` David Hildenbrand
2019-09-24 15:51   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 06/16] cputlb: Introduce TLB_BSWAP Richard Henderson
2019-09-24 18:25   ` Alex Bennée [this message]
2019-09-25 17:36     ` Richard Henderson
2019-09-23 22:59 ` [PATCH v4 07/16] exec: Adjust notdirty tracing Richard Henderson
2019-09-24 21:53   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 08/16] cputlb: Move ROM handling from I/O path to TLB path Richard Henderson
2019-09-25  0:16   ` Alex Bennée
2019-09-25  6:59     ` David Hildenbrand
2019-09-25 16:01       ` Alex Bennée
2019-09-25 17:01       ` Richard Henderson
2019-09-23 22:59 ` [PATCH v4 09/16] cputlb: Move NOTDIRTY " Richard Henderson
2019-09-25 16:06   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 10/16] cputlb: Partially inline memory_region_section_get_iotlb Richard Henderson
2019-09-24  7:59   ` David Hildenbrand
2019-09-25 17:55     ` Richard Henderson
2019-09-25 19:40       ` David Hildenbrand
2019-09-25 16:12   ` Alex Bennée
2019-09-23 22:59 ` [PATCH v4 11/16] cputlb: Merge and move memory_notdirty_write_{prepare, complete} Richard Henderson
2019-09-24  8:04   ` [PATCH v4 11/16] cputlb: Merge and move memory_notdirty_write_{prepare,complete} David Hildenbrand
2019-09-25 16:15   ` Alex Bennée
2019-09-23 23:00 ` [PATCH v4 12/16] cputlb: Handle TLB_NOTDIRTY in probe_access Richard Henderson
2019-09-24  8:05   ` David Hildenbrand
2019-09-25 16:21   ` Alex Bennée
2019-09-23 23:00 ` [PATCH v4 13/16] cputlb: Remove cpu->mem_io_vaddr Richard Henderson
2019-09-25 16:22   ` Alex Bennée
2019-09-23 23:00 ` [PATCH v4 14/16] cputlb: Remove tb_invalidate_phys_page_range is_cpu_write_access Richard Henderson
2019-09-25 16:23   ` Alex Bennée
2019-09-23 23:00 ` [PATCH v4 15/16] cputlb: Pass retaddr to tb_invalidate_phys_page_fast Richard Henderson
2019-09-25 16:28   ` Alex Bennée
2019-09-23 23:00 ` [PATCH v4 16/16] cputlb: Pass retaddr to tb_check_watchpoint Richard Henderson
2019-09-25 16:30   ` Alex Bennée
2019-09-25 18:52 ` [PATCH v4 00/16] Move rom and notdirty handling to cputlb Mark Cave-Ayland
2019-09-25 18:54   ` Mark Cave-Ayland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87zhita6w9.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=david@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.