All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org
Subject: Re: [PATCH v5 05/36] tcg: Add TCG_CALL_{RET,ARG}_BY_REF
Date: Fri, 27 Jan 2023 10:40:43 +0000	[thread overview]
Message-ID: <87zga4npk2.fsf@linaro.org> (raw)
In-Reply-To: <20230126043824.54819-6-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> These will be used by some hosts, both 32 and 64-bit, to pass and
> return i128.  Not yet used, because allocation is not yet enabled.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/tcg-internal.h |   3 +
>  tcg/tcg.c          | 135 ++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 135 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
> index 6e50aeba3a..2ec1ea01df 100644
> --- a/tcg/tcg-internal.h
> +++ b/tcg/tcg-internal.h
> @@ -36,6 +36,7 @@
>   */
>  typedef enum {
>      TCG_CALL_RET_NORMAL,         /* by registers */
> +    TCG_CALL_RET_BY_REF,         /* for i128, by reference */
>  } TCGCallReturnKind;
>  
>  typedef enum {
> @@ -44,6 +45,8 @@ typedef enum {
>      TCG_CALL_ARG_EXTEND,         /* for i32, as a sign/zero-extended i64 */
>      TCG_CALL_ARG_EXTEND_U,       /*      ... as a zero-extended i64 */
>      TCG_CALL_ARG_EXTEND_S,       /*      ... as a sign-extended i64 */
> +    TCG_CALL_ARG_BY_REF,         /* for i128, by reference, first */
> +    TCG_CALL_ARG_BY_REF_N,       /*       ... by reference, subsequent */
>  } TCGCallArgumentKind;
>  
>  typedef struct TCGCallArgumentLoc {
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a561ef3ced..644dc53196 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -104,8 +104,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
>  static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
>  static void tcg_out_movi(TCGContext *s, TCGType type,
>                           TCGReg ret, tcg_target_long arg);
> -static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long)
> -    __attribute__((unused));
> +static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
>  static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
>  static void tcg_out_goto_tb(TCGContext *s, int which);
>  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> @@ -683,6 +682,38 @@ static void layout_arg_normal_n(TCGCumulativeArgs *cum,
>      cum->arg_slot += n;
>  }
>  
> +static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
> +{
> +    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
> +    int n = 128 / TCG_TARGET_REG_BITS;
> +
> +    /* The first subindex carries the pointer. */
> +    layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
> +
> +    /*
> +     * The callee is allowed to clobber memory associated with
> +     * structure pass by-reference.  Therefore we must make copies.
> +     * Allocate space from "ref_slot", which will be adjusted to
> +     * follow the parameters on the stack.
> +     */
> +    loc[0].ref_slot = cum->ref_slot;
> +
> +    /*
> +     * Subsequent words also go into the reference slot, but
> +     * do not accumulate into the regular arguments.
> +     */
> +    for (int i = 1; i < n; ++i) {
> +        loc[i] = (TCGCallArgumentLoc){
> +            .kind = TCG_CALL_ARG_BY_REF_N,
> +            .arg_idx = cum->arg_idx,
> +            .tmp_subindex = i,
> +            .ref_slot = cum->ref_slot + i,
> +        };
> +    }
> +    cum->info_in_idx += n;
> +    cum->ref_slot += n;
> +}

I'm surprised this is in the core TCG. Are the stack frames organised
the same way across all our host architectures?

> +
>  static void init_call_layout(TCGHelperInfo *info)
>  {
>      int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
> @@ -718,6 +749,14 @@ static void init_call_layout(TCGHelperInfo *info)
>          case TCG_CALL_RET_NORMAL:
>              assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
>              break;
> +        case TCG_CALL_RET_BY_REF:
> +            /*
> +             * Allocate the first argument to the output.
> +             * We don't need to store this anywhere, just make it
> +             * unavailable for use in the input loop below.
> +             */
> +            cum.arg_slot = 1;
> +            break;

It would have helped if ->typemask was documented or accessed with
something like dh_get_typemask(0) for my understanding here.

>          default:
>              qemu_build_not_reached();
>          }
> @@ -796,6 +835,9 @@ static void init_call_layout(TCGHelperInfo *info)
>              case TCG_CALL_ARG_NORMAL:
>                  layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
>                  break;
> +            case TCG_CALL_ARG_BY_REF:
> +                layout_arg_by_ref(&cum, info);
> +                break;
>              default:
>                  qemu_build_not_reached();
>              }
> @@ -811,7 +853,39 @@ static void init_call_layout(TCGHelperInfo *info)
>      assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
>      /* Validate the backend has enough argument space. */
>      assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
> -    assert(cum.ref_slot <= max_stk_slots);
> +
> +    /*
> +     * Relocate the "ref_slot" area to the end of the parameters.
> +     * Minimizing this stack offset helps code size for x86,
> +     * which has a signed 8-bit offset encoding.
> +     */

I don't quite follow this. Are we free to move parameters around in the
stack frame? I thought the position would be directly related to the
argument number?

> +    if (cum.ref_slot != 0) {
> +        int ref_base = 0;
> +
> +        if (cum.arg_slot > max_reg_slots) {
> +            int align = __alignof(Int128) / sizeof(tcg_target_long);
> +
> +            ref_base = cum.arg_slot - max_reg_slots;
> +            if (align > 1) {
> +                ref_base = ROUND_UP(ref_base, align);
> +            }
> +        }
> +        assert(ref_base + cum.ref_slot <= max_stk_slots);
> +
> +        if (ref_base != 0) {
> +            for (int i = cum.info_in_idx - 1; i >= 0; --i) {
> +                TCGCallArgumentLoc *loc = &info->in[i];
> +                switch (loc->kind) {
> +                case TCG_CALL_ARG_BY_REF:
> +                case TCG_CALL_ARG_BY_REF_N:
> +                    loc->ref_slot += ref_base;
> +                    break;
> +                default:
> +                    break;
> +                }
> +            }
> +        }
> +    }
>  }
>  
>  static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
> @@ -1738,6 +1812,8 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
>  
>          switch (loc->kind) {
>          case TCG_CALL_ARG_NORMAL:
> +        case TCG_CALL_ARG_BY_REF:
> +        case TCG_CALL_ARG_BY_REF_N:
>              op->args[pi++] = temp_arg(ts);
>              break;
>  
> @@ -4404,6 +4480,27 @@ static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
>      }
>  }
>  
> +static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
> +                         intptr_t ref_off, TCGRegSet *allocated_regs)
> +{
> +    TCGReg reg;
> +    int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
> +
> +    if (stk_slot < 0) {
> +        reg = tcg_target_call_iarg_regs[arg_slot];
> +        tcg_reg_free(s, reg, *allocated_regs);
> +        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
> +        tcg_regset_set_reg(*allocated_regs, reg);
> +    } else {
> +        reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
> +                            *allocated_regs, 0, false);
> +        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
> +        tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
> +                   TCG_TARGET_CALL_STACK_OFFSET
> +                   + stk_slot * sizeof(tcg_target_long));
> +    }
> +}
> +
>  static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
>  {
>      const int nb_oargs = TCGOP_CALLO(op);
> @@ -4427,6 +4524,16 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
>          case TCG_CALL_ARG_EXTEND_S:
>              load_arg_normal(s, loc, ts, &allocated_regs);
>              break;
> +        case TCG_CALL_ARG_BY_REF:
> +            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
> +            load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
> +                         TCG_TARGET_CALL_STACK_OFFSET
> +                         + loc->ref_slot * sizeof(tcg_target_long),
> +                         &allocated_regs);
> +            break;
> +        case TCG_CALL_ARG_BY_REF_N:
> +            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
> +            break;
>          default:
>              g_assert_not_reached();
>          }
> @@ -4458,6 +4565,19 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
>          save_globals(s, allocated_regs);
>      }
>  
> +    /*
> +     * If the ABI passes a pointer to the returned struct as the first
> +     * argument, load that now.  Pass a pointer to the output home slot.
> +     */
> +    if (info->out_kind == TCG_CALL_RET_BY_REF) {
> +        TCGTemp *ts = arg_temp(op->args[0]);
> +
> +        if (!ts->mem_allocated) {
> +            temp_allocate_frame(s, ts);
> +        }
> +        load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
> +    }
> +
>      tcg_out_call(s, tcg_call_func(op), info);
>  
>      /* Assign output registers and emit moves if needed.  */
> @@ -4474,6 +4594,15 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
>              ts->mem_coherent = 0;
>          }
>          break;
> +
> +    case TCG_CALL_RET_BY_REF:
> +        /* The callee has performed a write through the reference. */
> +        for (i = 0; i < nb_oargs; i++) {
> +            TCGTemp *ts = arg_temp(op->args[i]);
> +            ts->val_type = TEMP_VAL_MEM;
> +        }
> +        break;
> +
>      default:
>          g_assert_not_reached();
>      }


-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro


  reply	other threads:[~2023-01-27 12:13 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-26  4:37 [PATCH v5 00/36] tcg: Support for Int128 with helpers Richard Henderson
2023-01-26  4:37 ` [PATCH v5 01/36] tcg: Define TCG_TYPE_I128 and related helper macros Richard Henderson
2023-01-26  4:37 ` [PATCH v5 02/36] tcg: Handle dh_typecode_i128 with TCG_CALL_{RET, ARG}_NORMAL Richard Henderson
2023-01-26  4:37 ` [PATCH v5 03/36] tcg: Allocate objects contiguously in temp_allocate_frame Richard Henderson
2023-01-26 17:12   ` Alex Bennée
2023-01-26 19:48     ` Richard Henderson
2023-01-26  4:37 ` [PATCH v5 04/36] tcg: Introduce tcg_out_addi_ptr Richard Henderson
2023-01-26  4:37 ` [PATCH v5 05/36] tcg: Add TCG_CALL_{RET,ARG}_BY_REF Richard Henderson
2023-01-27 10:40   ` Alex Bennée [this message]
2023-01-27 18:48     ` Richard Henderson
2023-01-26  4:37 ` [PATCH v5 06/36] tcg: Introduce tcg_target_call_oarg_reg Richard Henderson
2023-01-26  4:37 ` [PATCH v5 07/36] tcg: Add TCG_CALL_RET_BY_VEC Richard Henderson
2023-01-26  4:37 ` [PATCH v5 08/36] include/qemu/int128: Use Int128 structure for TCI Richard Henderson
2023-01-27 13:51   ` Alex Bennée
2023-01-26  4:37 ` [PATCH v5 09/36] tcg/i386: Add TCG_TARGET_CALL_{RET,ARG}_I128 Richard Henderson
2023-01-27 13:52   ` Alex Bennée
2023-01-26  4:37 ` [PATCH v5 10/36] tcg/tci: Fix big-endian return register ordering Richard Henderson
2023-01-27 13:53   ` Alex Bennée
2023-01-26  4:37 ` [PATCH v5 11/36] tcg/tci: Add TCG_TARGET_CALL_{RET,ARG}_I128 Richard Henderson
2023-01-27 14:00   ` Alex Bennée
2023-01-27 18:55     ` Richard Henderson
2023-01-26  4:38 ` [PATCH v5 12/36] tcg: " Richard Henderson
2023-01-27 17:04   ` Alex Bennée
2023-01-26  4:38 ` [PATCH v5 13/36] tcg: Add temp allocation for TCGv_i128 Richard Henderson
2023-01-27 17:08   ` Alex Bennée
2023-01-27 18:56     ` Richard Henderson
2023-01-26  4:38 ` [PATCH v5 14/36] tcg: Add basic data movement " Richard Henderson
2023-01-27 18:23   ` Alex Bennée
2023-01-26  4:38 ` [PATCH v5 15/36] tcg: Add guest load/store primitives " Richard Henderson
2023-01-26  4:38 ` [PATCH v5 16/36] tcg: Add tcg_gen_{non}atomic_cmpxchg_i128 Richard Henderson
2023-01-27  0:45   ` Philippe Mathieu-Daudé
2023-01-27  6:39     ` Richard Henderson
2023-01-27 23:49       ` Philippe Mathieu-Daudé
2023-01-26  4:38 ` [PATCH v5 17/36] tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64} Richard Henderson
2023-01-27  0:53   ` Philippe Mathieu-Daudé
2023-01-27  6:44     ` Richard Henderson
2023-01-26  4:38 ` [PATCH v5 18/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP Richard Henderson
2023-01-26  4:38 ` [PATCH v5 19/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP Richard Henderson
2023-01-26  4:38 ` [PATCH v5 20/36] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX Richard Henderson
2023-01-26  4:38 ` [PATCH v5 21/36] tests/tcg/s390x: Add div.c Richard Henderson
2023-01-26  4:38 ` [PATCH v5 22/36] tests/tcg/s390x: Add clst.c Richard Henderson
2023-01-26  4:38 ` [PATCH v5 23/36] tests/tcg/s390x: Add long-double.c Richard Henderson
2023-01-26  4:38 ` [PATCH v5 24/36] target/s390x: Use a single return for helper_divs32/u32 Richard Henderson
2023-01-26  9:58   ` David Hildenbrand
2023-01-27  0:57   ` Philippe Mathieu-Daudé
2023-01-26  4:38 ` [PATCH v5 25/36] target/s390x: Use a single return for helper_divs64/u64 Richard Henderson
2023-01-26  4:38 ` [PATCH v5 26/36] target/s390x: Use Int128 for return from CLST Richard Henderson
2023-01-26  4:38 ` [PATCH v5 27/36] target/s390x: Use Int128 for return from CKSM Richard Henderson
2023-01-26  4:38 ` [PATCH v5 28/36] target/s390x: Use Int128 for return from TRE Richard Henderson
2023-01-26  4:38 ` [PATCH v5 29/36] target/s390x: Copy wout_x1 to wout_x1_P Richard Henderson
2023-01-26  4:38 ` [PATCH v5 30/36] target/s390x: Use Int128 for returning float128 Richard Henderson
2023-01-26 10:06   ` David Hildenbrand
2023-01-26  4:38 ` [PATCH v5 31/36] target/s390x: Use Int128 for passing float128 Richard Henderson
2023-01-26 11:19   ` David Hildenbrand
2023-01-26  4:38 ` [PATCH v5 32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG Richard Henderson
2023-01-26 11:27   ` David Hildenbrand
2023-01-26 21:01     ` Richard Henderson
2023-01-27 16:09       ` David Hildenbrand
2023-01-26  4:38 ` [PATCH v5 33/36] target/s390x: Implement CC_OP_NZ in gen_op_calc_cc Richard Henderson
2023-01-26 11:25   ` David Hildenbrand
2023-01-26  4:38 ` [PATCH v5 34/36] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b Richard Henderson
2023-01-26  4:38 ` [PATCH v5 35/36] target/i386: Inline cmpxchg8b Richard Henderson
2023-01-26  4:38 ` [PATCH v5 36/36] target/i386: Inline cmpxchg16b Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87zga4npk2.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.