All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: peter.maydell@linux.org, alex.bennee@linux.org, qemu-devel@nongnu.org
Subject: Re: [PATCH v6 18/18] accel/tcg: Introduce TARGET_TB_PCREL
Date: Mon, 03 Oct 2022 14:46:28 +0100	[thread overview]
Message-ID: <87y1txm2xu.fsf@linaro.org> (raw)
In-Reply-To: <20220930212622.108363-19-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Prepare for targets to be able to produce TBs that can
> run in more than one virtual context.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  accel/tcg/internal.h      |  4 +++
>  accel/tcg/tb-jmp-cache.h  |  5 ++++
>  include/exec/cpu-defs.h   |  3 +++
>  include/exec/exec-all.h   | 32 ++++++++++++++++++++--
>  accel/tcg/cpu-exec.c      | 56 +++++++++++++++++++++++++++++++--------
>  accel/tcg/translate-all.c | 50 +++++++++++++++++++++-------------
>  6 files changed, 119 insertions(+), 31 deletions(-)
>
> diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
> index a3875a3b5a..dc800fd485 100644
> --- a/accel/tcg/internal.h
> +++ b/accel/tcg/internal.h
> @@ -21,7 +21,11 @@ void tb_htable_init(void);
>  /* Return the current PC from CPU, which may be cached in TB. */
>  static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
>  {
> +#if TARGET_TB_PCREL
> +    return cpu->cc->get_pc(cpu);
> +#else
>      return tb_pc(tb);
> +#endif
>  }
>  
>  #endif /* ACCEL_TCG_INTERNAL_H */
> diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
> index 2d8fbb1bfe..a7288150bc 100644
> --- a/accel/tcg/tb-jmp-cache.h
> +++ b/accel/tcg/tb-jmp-cache.h
> @@ -14,10 +14,15 @@
>  
>  /*
>   * Accessed in parallel; all accesses to 'tb' must be atomic.
> + * For TARGET_TB_PCREL, accesses to 'pc' must be protected by
> + * a load_acquire/store_release to 'tb'.
>   */
>  struct CPUJumpCache {
>      struct {
>          TranslationBlock *tb;
> +#if TARGET_TB_PCREL
> +        target_ulong pc;
> +#endif
>      } array[TB_JMP_CACHE_SIZE];
>  };
>  
> diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
> index 67239b4e5e..21309cf567 100644
> --- a/include/exec/cpu-defs.h
> +++ b/include/exec/cpu-defs.h
> @@ -54,6 +54,9 @@
>  #  error TARGET_PAGE_BITS must be defined in cpu-param.h
>  # endif
>  #endif
> +#ifndef TARGET_TB_PCREL
> +# define TARGET_TB_PCREL 0
> +#endif
>  
>  #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
>  
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 7ea6026ba9..e5f8b224a5 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -496,8 +496,32 @@ struct tb_tc {
>  };
>  
>  struct TranslationBlock {
> -    target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
> -    target_ulong cs_base; /* CS base for this block */
> +#if !TARGET_TB_PCREL
> +    /*
> +     * Guest PC corresponding to this block.  This must be the true
> +     * virtual address.  Therefore e.g. x86 stores EIP + CS_BASE, and
> +     * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
> +     * privilege, must store those bits elsewhere.
> +     *
> +     * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
> +     * written such that the TB is associated only with the physical
> +     * page and may be run in any virtual address context.  In this case,
> +     * PC must always be taken from ENV in a target-specific manner.
> +     * Unwind information is taken as offsets from the page, to be
> +     * deposited into the "current" PC.
> +     */
> +    target_ulong pc;
> +#endif
> +
> +    /*
> +     * Target-specific data associated with the TranslationBlock, e.g.:
> +     * x86: the original user, the Code Segment virtual base,
> +     * arm: an extension of tb->flags,
> +     * s390x: instruction data for EXECUTE,
> +     * sparc: the next pc of the instruction queue (for delay slots).
> +     */
> +    target_ulong cs_base;
> +
>      uint32_t flags; /* flags defining in which context the code was generated */
>      uint32_t cflags;    /* compile flags */
>  
> @@ -573,7 +597,11 @@ struct TranslationBlock {
>  /* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
>  static inline target_ulong tb_pc(const TranslationBlock *tb)
>  {
> +#if TARGET_TB_PCREL
> +    qemu_build_not_reached();
> +#else
>      return tb->pc;
> +#endif
>  }
>  
>  /* Hide the qatomic_read to make code a little easier on the eyes */
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 8b3f8435fb..acb5646b03 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -186,7 +186,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
>      const TranslationBlock *tb = p;
>      const struct tb_desc *desc = d;
>  
> -    if (tb_pc(tb) == desc->pc &&
> +    if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
>          tb->page_addr[0] == desc->page_addr0 &&
>          tb->cs_base == desc->cs_base &&
>          tb->flags == desc->flags &&
> @@ -237,7 +237,8 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
>          return NULL;
>      }
>      desc.page_addr0 = phys_pc;
> -    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
> +    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
> +                     flags, cflags, *cpu->trace_dstate);
>      return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
>  }
>  
> @@ -247,27 +248,52 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
>                                            uint32_t flags, uint32_t cflags)
>  {
>      TranslationBlock *tb;
> +    CPUJumpCache *jc;
>      uint32_t hash;
>  
>      /* we should never be trying to look up an INVALID tb */
>      tcg_debug_assert(!(cflags & CF_INVALID));
>  
>      hash = tb_jmp_cache_hash_func(pc);
> -    tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
> +    jc = cpu->tb_jmp_cache;
> +#if TARGET_TB_PCREL
> +    /* Use acquire to ensure current load of pc from jc. */
> +    tb = qatomic_load_acquire(&jc->array[hash].tb);
> +#else
> +    /* Use rcu_read to ensure current load of pc from *tb. */
> +    tb = qatomic_rcu_read(&jc->array[hash].tb);
> +#endif

You could further hide the #ifdef nastiness by wrapping this load and
the store further down into appropriate inline helpers in tb-jmp-cache.h.

>  
> -    if (likely(tb &&
> -               tb->pc == pc &&
> -               tb->cs_base == cs_base &&
> -               tb->flags == flags &&
> -               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
> -               tb_cflags(tb) == cflags)) {
> -        return tb;
> +    if (likely(tb)) {
> +        target_ulong jmp_pc;
> +
> +#if TARGET_TB_PCREL
> +        jmp_pc = jc->array[hash].pc;
> +#else
> +        jmp_pc = tb_pc(tb);
> +#endif
> +        if (jmp_pc == pc &&
> +            tb->cs_base == cs_base &&
> +            tb->flags == flags &&
> +            tb->trace_vcpu_dstate == *cpu->trace_dstate &&
> +            tb_cflags(tb) == cflags) {
> +            return tb;
> +        }
>      }
> +
>      tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
>      if (tb == NULL) {
>          return NULL;
>      }
> -    qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
> +
> +#if TARGET_TB_PCREL
> +    jc->array[hash].pc = pc;
> +    /* Use store_release on tb to ensure pc is written first. */
> +    qatomic_store_release(&jc->array[hash].tb, tb);
> +#else
> +    qatomic_set(&jc->array[hash].tb, tb);
> +#endif
> +
>      return tb;
>  }
>  
> @@ -453,6 +479,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
>          if (cc->tcg_ops->synchronize_from_tb) {
>              cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
>          } else {
> +            assert(!TARGET_TB_PCREL);
>              assert(cc->set_pc);
>              cc->set_pc(cpu, tb_pc(last_tb));
>          }
> @@ -1002,7 +1029,14 @@ int cpu_exec(CPUState *cpu)
>                   * for the fast lookup
>                   */
>                  h = tb_jmp_cache_hash_func(pc);

And re-use the helper here as well.

> +
> +#if TARGET_TB_PCREL
> +                cpu->tb_jmp_cache->array[h].pc = pc;
> +                /* Use store_release on tb to ensure pc is current. */
> +                qatomic_store_release(&cpu->tb_jmp_cache->array[h].tb, tb);
> +#else
>                  qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
> +#endif
>              }
>  
>  #ifndef CONFIG_USER_ONLY
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 13c964dcd8..776ac9efe4 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -299,7 +299,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
>  
>          for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
>              if (i == 0) {
> -                prev = (j == 0 ? tb_pc(tb) : 0);
> +                prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
>              } else {
>                  prev = tcg_ctx->gen_insn_data[i - 1][j];
>              }
> @@ -327,7 +327,7 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
>  static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
>                                       uintptr_t searched_pc, bool reset_icount)
>  {
> -    target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
> +    target_ulong data[TARGET_INSN_START_WORDS];
>      uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
>      CPUArchState *env = cpu->env_ptr;
>      const uint8_t *p = tb->tc.ptr + tb->tc.size;
> @@ -343,6 +343,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
>          return -1;
>      }
>  
> +    memset(data, 0, sizeof(data));
> +    if (!TARGET_TB_PCREL) {
> +        data[0] = tb_pc(tb);
> +    }
> +
>      /* Reconstruct the stored insn data while looking for the point at
>         which the end of the insn exceeds the searched_pc.  */
>      for (i = 0; i < num_insns; ++i) {
> @@ -885,13 +890,13 @@ static bool tb_cmp(const void *ap, const void *bp)
>      const TranslationBlock *a = ap;
>      const TranslationBlock *b = bp;
>  
> -    return tb_pc(a) == tb_pc(b) &&
> -        a->cs_base == b->cs_base &&
> -        a->flags == b->flags &&
> -        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
> -        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
> -        a->page_addr[0] == b->page_addr[0] &&
> -        a->page_addr[1] == b->page_addr[1];
> +    return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
> +            a->cs_base == b->cs_base &&
> +            a->flags == b->flags &&
> +            (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
> +            a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
> +            a->page_addr[0] == b->page_addr[0] &&
> +            a->page_addr[1] == b->page_addr[1]);
>  }
>  
>  void tb_htable_init(void)
> @@ -1170,8 +1175,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
>  
>      /* remove the TB from the hash list */
>      phys_pc = tb->page_addr[0];
> -    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
> -                     tb->trace_vcpu_dstate);
> +    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
> +                     tb->flags, orig_cflags, tb->trace_vcpu_dstate);
>      if (!qht_remove(&tb_ctx.htable, tb, h)) {
>          return;
>      }
> @@ -1187,11 +1192,18 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
>      }
>  
>      /* remove the TB from the hash list */
> -    h = tb_jmp_cache_hash_func(tb->pc);
> -    CPU_FOREACH(cpu) {
> -        CPUJumpCache *jc = cpu->tb_jmp_cache;
> -        if (qatomic_read(&jc->array[h].tb) == tb) {
> -            qatomic_set(&jc->array[h].tb, NULL);
> +    if (TARGET_TB_PCREL) {
> +        /* A TB may be at any virtual address */
> +        CPU_FOREACH(cpu) {
> +            tcg_flush_jmp_cache(cpu);
> +        }
> +    } else {
> +        h = tb_jmp_cache_hash_func(tb_pc(tb));
> +        CPU_FOREACH(cpu) {
> +            CPUJumpCache *jc = cpu->tb_jmp_cache;
> +            if (qatomic_read(&jc->array[h].tb) == tb) {
> +                qatomic_set(&jc->array[h].tb, NULL);
> +            }
>          }

This code should also be a inline helper.

>      }
>  
> @@ -1302,8 +1314,8 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
>      }
>  
>      /* add in the hash table */
> -    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
> -                     tb->trace_vcpu_dstate);
> +    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
> +                     tb->flags, tb->cflags, tb->trace_vcpu_dstate);
>      qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
>  
>      /* remove TB from the page(s) if we couldn't insert it */
> @@ -1373,7 +1385,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>  
>      gen_code_buf = tcg_ctx->code_gen_ptr;
>      tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
> +#if !TARGET_TB_PCREL
>      tb->pc = pc;
> +#endif
>      tb->cs_base = cs_base;
>      tb->flags = flags;
>      tb->cflags = cflags;


-- 
Alex Bennée


  reply	other threads:[~2022-10-03 14:00 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-30 21:26 [PATCH v6 00/18] tcg: CPUTLBEntryFull and TARGET_TB_PCREL Richard Henderson
2022-09-30 21:26 ` [PATCH v6 01/18] cpu: cache CPUClass in CPUState for hot code paths Richard Henderson
2022-09-30 21:26 ` [PATCH v6 02/18] hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs Richard Henderson
2022-09-30 21:26 ` [PATCH v6 03/18] cputlb: used cached CPUClass in our hot-paths Richard Henderson
2022-09-30 21:26 ` [PATCH v6 04/18] accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull Richard Henderson
2022-09-30 21:26 ` [PATCH v6 05/18] accel/tcg: Drop addr member from SavedIOTLB Richard Henderson
2022-09-30 21:26 ` [PATCH v6 06/18] accel/tcg: Suppress auto-invalidate in probe_access_internal Richard Henderson
2022-09-30 21:26 ` [PATCH v6 07/18] accel/tcg: Introduce probe_access_full Richard Henderson
2022-09-30 21:26 ` [PATCH v6 08/18] accel/tcg: Introduce tlb_set_page_full Richard Henderson
2022-09-30 21:26 ` [PATCH v6 09/18] include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA Richard Henderson
2022-09-30 21:26 ` [PATCH v6 10/18] accel/tcg: Remove PageDesc code_bitmap Richard Henderson
2022-09-30 21:26 ` [PATCH v6 11/18] accel/tcg: Use bool for page_find_alloc Richard Henderson
2022-09-30 21:26 ` [PATCH v6 12/18] accel/tcg: Use DisasContextBase in plugin_gen_tb_start Richard Henderson
2022-09-30 21:26 ` [PATCH v6 13/18] accel/tcg: Do not align tb->page_addr[0] Richard Henderson
2022-10-03 12:47   ` Alex Bennée
2022-10-03 13:54     ` Richard Henderson
2022-10-03 14:59       ` Alex Bennée
2022-09-30 21:26 ` [PATCH v6 14/18] accel/tcg: Inline tb_flush_jmp_cache Richard Henderson
2022-10-03 12:51   ` Alex Bennée
2022-09-30 21:26 ` [PATCH v6 15/18] include/hw/core: Create struct CPUJumpCache Richard Henderson
2022-10-03 12:57   ` Alex Bennée
2022-09-30 21:26 ` [PATCH v6 16/18] hw/core: Add CPUClass.get_pc Richard Henderson
2022-09-30 21:56   ` Taylor Simpson
2022-10-03  7:58   ` Mark Cave-Ayland
2022-10-03 13:03   ` Alex Bennée
2022-09-30 21:26 ` [PATCH v6 17/18] accel/tcg: Introduce tb_pc and log_pc Richard Henderson
2022-09-30 21:26 ` [PATCH v6 18/18] accel/tcg: Introduce TARGET_TB_PCREL Richard Henderson
2022-10-03 13:46   ` Alex Bennée [this message]
2022-09-30 21:29 ` [PATCH v6 00/18] tcg: CPUTLBEntryFull and TARGET_TB_PCREL Richard Henderson
2022-10-03 16:22 ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87y1txm2xu.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=alex.bennee@linux.org \
    --cc=peter.maydell@linux.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.