All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] tcg/loongarch64: add neg tcg_op and direct jump support
@ 2022-10-12  9:13 Qi Hu
  2022-10-12  9:13 ` [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64} Qi Hu
  2022-10-12  9:13 ` [PATCH 2/2] tcg/loongarch64: Add direct jump support Qi Hu
  0 siblings, 2 replies; 11+ messages in thread
From: Qi Hu @ 2022-10-12  9:13 UTC (permalink / raw)
  To: WANG Xuerui, Richard Henderson; +Cc: qemu-devel

Hi,

This patch series add neg tcg_op and direct jump support into loongarch tcg backend.


Qi Hu (2):
  tcg/loongarch64: Implement INDEX_op_neg_i{32,64}
  tcg/loongarch64: Add direct jump support

 tcg/loongarch64/tcg-insn-defs.c.inc |  3 ++
 tcg/loongarch64/tcg-target.c.inc    | 58 ++++++++++++++++++++++++++---
 tcg/loongarch64/tcg-target.h        |  6 +--
 3 files changed, 59 insertions(+), 8 deletions(-)

-- 
2.37.3



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64}
  2022-10-12  9:13 [PATCH 0/2] tcg/loongarch64: add neg tcg_op and direct jump support Qi Hu
@ 2022-10-12  9:13 ` Qi Hu
  2022-10-12  9:41   ` WANG Xuerui
  2022-10-12  9:13 ` [PATCH 2/2] tcg/loongarch64: Add direct jump support Qi Hu
  1 sibling, 1 reply; 11+ messages in thread
From: Qi Hu @ 2022-10-12  9:13 UTC (permalink / raw)
  To: WANG Xuerui, Richard Henderson; +Cc: qemu-devel

Signed-off-by: Qi Hu <huqi@loongson.cn>
---
 tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
 tcg/loongarch64/tcg-target.h     | 4 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index a3debf6da7..f5a214a17f 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1125,6 +1125,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
         break;
 
+    case INDEX_op_neg_i32:
+        tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1);
+        break;
+    case INDEX_op_neg_i64:
+        tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1);
+        break;
+
     case INDEX_op_nor_i32:
     case INDEX_op_nor_i64:
         if (c2) {
@@ -1503,6 +1510,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_ext_i32_i64:
     case INDEX_op_not_i32:
     case INDEX_op_not_i64:
+    case INDEX_op_neg_i32:
+    case INDEX_op_neg_i64:
     case INDEX_op_extract_i32:
     case INDEX_op_extract_i64:
     case INDEX_op_bswap16_i32:
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index d58a6162f2..67380b2432 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -114,7 +114,7 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_neg_i32          0
+#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_orc_i32          1
 #define TCG_TARGET_HAS_eqv_i32          0
@@ -150,7 +150,7 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_neg_i64          0
+#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_andc_i64         1
 #define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_eqv_i64          0
-- 
2.37.3



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] tcg/loongarch64: Add direct jump support
  2022-10-12  9:13 [PATCH 0/2] tcg/loongarch64: add neg tcg_op and direct jump support Qi Hu
  2022-10-12  9:13 ` [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64} Qi Hu
@ 2022-10-12  9:13 ` Qi Hu
  2022-10-12 11:34   ` WANG Xuerui
  2022-10-13  3:01   ` [PATCH v2] " Qi Hu
  1 sibling, 2 replies; 11+ messages in thread
From: Qi Hu @ 2022-10-12  9:13 UTC (permalink / raw)
  To: WANG Xuerui, Richard Henderson; +Cc: qemu-devel

Similar to the ARM64, LoongArch has PC-relative instructions such as
PCADDU18I. These instructions can be used to support direct jump for
LoongArch. Additionally, if instruction "B offset" can cover the target
address, "tb_target_set_jmp_target" will only patch the "B offset".

Signed-off-by: Qi Hu <huqi@loongson.cn>
---
 tcg/loongarch64/tcg-insn-defs.c.inc |  3 ++
 tcg/loongarch64/tcg-target.c.inc    | 49 ++++++++++++++++++++++++++---
 tcg/loongarch64/tcg-target.h        |  2 +-
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc
index d162571856..f5869c6bb1 100644
--- a/tcg/loongarch64/tcg-insn-defs.c.inc
+++ b/tcg/loongarch64/tcg-insn-defs.c.inc
@@ -112,6 +112,9 @@ typedef enum {
     OPC_BLE = 0x64000000,
     OPC_BGTU = 0x68000000,
     OPC_BLEU = 0x6c000000,
+    /* pseudo-instruction */
+    NOP = 0x03400000,
+
 } LoongArchInsn;
 
 static int32_t __attribute__((unused))
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index f5a214a17f..3a7b1df081 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1058,11 +1058,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_goto_tb:
-        assert(s->tb_jmp_insn_offset == 0);
-        /* indirect jump method */
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
-                   (uintptr_t)(s->tb_jmp_target_addr + a0));
-        tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        if (s->tb_jmp_insn_offset != NULL) {
+            /* TCG_TARGET_HAS_direct_jump */
+            /* Ensure that PCADD+JIRL are 8-byte aligned so that an atomic
+               write can be used to patch the target address. */
+            if ((uintptr_t)s->code_ptr & 7) {
+                tcg_out32(s, NOP);
+            }
+            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+            /* actual branch destination will be patched by
+               tb_target_set_jmp_target later. */
+            tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
+            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        } else {
+            /* !TCG_TARGET_HAS_direct_jump */
+            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+                    (uintptr_t)(s->tb_jmp_target_addr + a0));
+            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        }
         set_jmp_reset_offset(s, a0);
         break;
 
@@ -1708,6 +1721,32 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
 }
 
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+                              uintptr_t jmp_rw, uintptr_t addr)
+{
+    tcg_insn_unit i1, i2;
+
+    ptrdiff_t offset = addr - jmp_rx;
+
+    if (offset == sextreg(offset, 0, 28)) {
+        i1 = OPC_B | ((offset >> 18) & 0x3ff) | ((offset << 8) & 0x3fffc00);
+        i2 = NOP;
+    } else {
+        offset >>= 2;
+
+        ptrdiff_t upper, lower;
+        upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
+        lower = (offset & 0xffff);
+        /* patch pcaddu18i */
+        i1 = OPC_PCADDU18I | upper << 5 | TCG_REG_T0;
+        /* patch jirl */
+        i2 = OPC_JIRL | lower << 10 | TCG_REG_T0 << 5;
+    }
+    uint64_t pair = (uint64_t)i2 << 32 | i1;
+    qatomic_set((uint64_t *)jmp_rw, pair);
+    flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
+
 typedef struct {
     DebugFrameHeader h;
     uint8_t fde_def_cfa[4];
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 67380b2432..0e552731f5 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -123,7 +123,7 @@ typedef enum {
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_direct_jump      0
+#define TCG_TARGET_HAS_direct_jump      1
 #define TCG_TARGET_HAS_brcond2          0
 #define TCG_TARGET_HAS_setcond2         0
 #define TCG_TARGET_HAS_qemu_st8_i32     0
-- 
2.37.3



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64}
  2022-10-12  9:13 ` [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64} Qi Hu
@ 2022-10-12  9:41   ` WANG Xuerui
  2022-10-13  1:25     ` Qi Hu
  0 siblings, 1 reply; 11+ messages in thread
From: WANG Xuerui @ 2022-10-12  9:41 UTC (permalink / raw)
  To: Qi Hu, WANG Xuerui, Richard Henderson; +Cc: qemu-devel

Hi,

On 2022/10/12 17:13, Qi Hu wrote:
> Signed-off-by: Qi Hu <huqi@loongson.cn>
> ---
>   tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
>   tcg/loongarch64/tcg-target.h     | 4 ++--
>   2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
> index a3debf6da7..f5a214a17f 100644
> --- a/tcg/loongarch64/tcg-target.c.inc
> +++ b/tcg/loongarch64/tcg-target.c.inc
> @@ -1125,6 +1125,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>           tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
>           break;
>   
> +    case INDEX_op_neg_i32:
> +        tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1);
> +        break;
> +    case INDEX_op_neg_i64:
> +        tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1);
> +        break;
> +
>       case INDEX_op_nor_i32:
>       case INDEX_op_nor_i64:
>           if (c2) {
> @@ -1503,6 +1510,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>       case INDEX_op_ext_i32_i64:
>       case INDEX_op_not_i32:
>       case INDEX_op_not_i64:
> +    case INDEX_op_neg_i32:
> +    case INDEX_op_neg_i64:
>       case INDEX_op_extract_i32:
>       case INDEX_op_extract_i64:
>       case INDEX_op_bswap16_i32:
> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
> index d58a6162f2..67380b2432 100644
> --- a/tcg/loongarch64/tcg-target.h
> +++ b/tcg/loongarch64/tcg-target.h
> @@ -114,7 +114,7 @@ typedef enum {
>   #define TCG_TARGET_HAS_bswap16_i32      1
>   #define TCG_TARGET_HAS_bswap32_i32      1
>   #define TCG_TARGET_HAS_not_i32          1
> -#define TCG_TARGET_HAS_neg_i32          0
> +#define TCG_TARGET_HAS_neg_i32          1
>   #define TCG_TARGET_HAS_andc_i32         1
>   #define TCG_TARGET_HAS_orc_i32          1
>   #define TCG_TARGET_HAS_eqv_i32          0
> @@ -150,7 +150,7 @@ typedef enum {
>   #define TCG_TARGET_HAS_bswap32_i64      1
>   #define TCG_TARGET_HAS_bswap64_i64      1
>   #define TCG_TARGET_HAS_not_i64          1
> -#define TCG_TARGET_HAS_neg_i64          0
> +#define TCG_TARGET_HAS_neg_i64          1
>   #define TCG_TARGET_HAS_andc_i64         1
>   #define TCG_TARGET_HAS_orc_i64          1
>   #define TCG_TARGET_HAS_eqv_i64          0
The whole change is not necessary, if target doesn't have neg then the 
target-independent logic already makes sure a sub with the same 
semantics is emitted. This is explained in the commit message of that 
commit introducing add/sub support.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] tcg/loongarch64: Add direct jump support
  2022-10-12  9:13 ` [PATCH 2/2] tcg/loongarch64: Add direct jump support Qi Hu
@ 2022-10-12 11:34   ` WANG Xuerui
  2022-10-12 11:53     ` WANG Xuerui
  2022-10-13  1:25     ` Qi Hu
  2022-10-13  3:01   ` [PATCH v2] " Qi Hu
  1 sibling, 2 replies; 11+ messages in thread
From: WANG Xuerui @ 2022-10-12 11:34 UTC (permalink / raw)
  To: Qi Hu, WANG Xuerui, Richard Henderson; +Cc: qemu-devel

Hi,

Thanks for the improvement! Some room for improvement though...

On 2022/10/12 17:13, Qi Hu wrote:
> Similar to the ARM64, LoongArch has PC-relative instructions such as
> PCADDU18I. These instructions can be used to support direct jump for
> LoongArch. Additionally, if instruction "B offset" can cover the target
> address, "tb_target_set_jmp_target" will only patch the "B offset".

"if the target is within +/- 28 bits range, a single "B offset" plus a 
nop will be used instead" might sound better?

>
> Signed-off-by: Qi Hu <huqi@loongson.cn>
> ---
>   tcg/loongarch64/tcg-insn-defs.c.inc |  3 ++
>   tcg/loongarch64/tcg-target.c.inc    | 49 ++++++++++++++++++++++++++---
>   tcg/loongarch64/tcg-target.h        |  2 +-
>   3 files changed, 48 insertions(+), 6 deletions(-)
>
> diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc
> index d162571856..f5869c6bb1 100644
> --- a/tcg/loongarch64/tcg-insn-defs.c.inc
> +++ b/tcg/loongarch64/tcg-insn-defs.c.inc
> @@ -112,6 +112,9 @@ typedef enum {
>       OPC_BLE = 0x64000000,
>       OPC_BGTU = 0x68000000,
>       OPC_BLEU = 0x6c000000,
> +    /* pseudo-instruction */
> +    NOP = 0x03400000,
> +

You certainly saw the big fat comment block saying the file was 
auto-generated and thus shouldn't be touched, didn't you? ;-)

I saw your need for a NOP constant later though, and you can instead add 
`#define OPC_NOP OPC_ANDI` in tcg-target.c.inc, just below the include 
of tcg-insn-defs.c.inc.

>   } LoongArchInsn;
>   
>   static int32_t __attribute__((unused))
> diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
> index f5a214a17f..3a7b1df081 100644
> --- a/tcg/loongarch64/tcg-target.c.inc
> +++ b/tcg/loongarch64/tcg-target.c.inc
> @@ -1058,11 +1058,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>           break;
>   
>       case INDEX_op_goto_tb:
> -        assert(s->tb_jmp_insn_offset == 0);
> -        /* indirect jump method */
> -        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
> -                   (uintptr_t)(s->tb_jmp_target_addr + a0));
> -        tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> +        if (s->tb_jmp_insn_offset != NULL) {
> +            /* TCG_TARGET_HAS_direct_jump */
> +            /* Ensure that PCADD+JIRL are 8-byte aligned so that an atomic
> +               write can be used to patch the target address. */
There isn't a "PCADD" in LoongArch, and it's possible for the 2 insns to 
be "B + NOP" as well. So better reword a bit like "Ensure that the 
8-byte direct jump fragment is aligned ..." (and add another space after 
the period at the end of the sentence).
> +            if ((uintptr_t)s->code_ptr & 7) {
> +                tcg_out32(s, NOP);
> +            }
> +            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
> +            /* actual branch destination will be patched by
> +               tb_target_set_jmp_target later. */
Either make it a proper sentence by title-casing "actual" and adding 
another space after the trailing period, or remove the period.
> +            tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> +        } else {
> +            /* !TCG_TARGET_HAS_direct_jump */
> +            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
> +                    (uintptr_t)(s->tb_jmp_target_addr + a0));
> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> +        }
We unconditionally support the direct jump method after the change, so 
do we need to retain this block any more? Note the aarch64 port 
currently does the same (declaring unconditional support for direct 
jumps but keeping both code paths), if we want to remove this code path 
then you may want to remove the aarch64 one respectively too.
>           set_jmp_reset_offset(s, a0);
>           break;
>   
> @@ -1708,6 +1721,32 @@ static void tcg_target_init(TCGContext *s)
>       tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
>   }
>   
> +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
> +                              uintptr_t jmp_rw, uintptr_t addr)
> +{

Move the function to somewhere above, like above the "/* Entrypoints */" 
section (and maybe introduce another section)? The various parts are 
more-or-less arranged in a particular order, so it's like going back to 
implementing concrete things after finishing everything and calling it a 
day.

Also you forgot to remove the now inappropriate comment about this 
helper in tcg-target.h.

> +    tcg_insn_unit i1, i2;
> +
> +    ptrdiff_t offset = addr - jmp_rx;
> +
> +    if (offset == sextreg(offset, 0, 28)) {
> +        i1 = OPC_B | ((offset >> 18) & 0x3ff) | ((offset << 8) & 0x3fffc00);
Use encode_sd10k16_insn instead. No need to juggle the bits yourself and 
you get nice range assertion for free.
> +        i2 = NOP;
> +    } else {
> +        offset >>= 2;
> +
> +        ptrdiff_t upper, lower;
Promote the declaration to the top of the block (before offset 
shifting), IIRC that's the official coding style.
> +        upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
> +        lower = (offset & 0xffff);
> +        /* patch pcaddu18i */
> +        i1 = OPC_PCADDU18I | upper << 5 | TCG_REG_T0;
> +        /* patch jirl */
> +        i2 = OPC_JIRL | lower << 10 | TCG_REG_T0 << 5;

Similarly you could simplify the assembly here, with encode_dsj20_insn 
and encode_djsk16_insn respectively.

And the code is straight-forward enough to not need the "patch foo" 
comments.

> +    }
> +    uint64_t pair = (uint64_t)i2 << 32 | i1;
Maybe add a couple more parentheses to make the precedence easier to 
follow? (for people who struggle to remember things like this, like me)
> +    qatomic_set((uint64_t *)jmp_rw, pair);
> +    flush_idcache_range(jmp_rx, jmp_rw, 8);
> +}
> +
>   typedef struct {
>       DebugFrameHeader h;
>       uint8_t fde_def_cfa[4];
> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
> index 67380b2432..0e552731f5 100644
> --- a/tcg/loongarch64/tcg-target.h
> +++ b/tcg/loongarch64/tcg-target.h
> @@ -123,7 +123,7 @@ typedef enum {
>   #define TCG_TARGET_HAS_clz_i32          1
>   #define TCG_TARGET_HAS_ctz_i32          1
>   #define TCG_TARGET_HAS_ctpop_i32        0
> -#define TCG_TARGET_HAS_direct_jump      0
> +#define TCG_TARGET_HAS_direct_jump      1
>   #define TCG_TARGET_HAS_brcond2          0
>   #define TCG_TARGET_HAS_setcond2         0
>   #define TCG_TARGET_HAS_qemu_st8_i32     0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] tcg/loongarch64: Add direct jump support
  2022-10-12 11:34   ` WANG Xuerui
@ 2022-10-12 11:53     ` WANG Xuerui
  2022-10-13  1:25     ` Qi Hu
  1 sibling, 0 replies; 11+ messages in thread
From: WANG Xuerui @ 2022-10-12 11:53 UTC (permalink / raw)
  To: WANG Xuerui, Qi Hu, WANG Xuerui, Richard Henderson; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1502 bytes --]


On 2022/10/12 19:34, WANG Xuerui wrote:
>> +    tcg_insn_unit i1, i2;
>> +
>> +    ptrdiff_t offset = addr - jmp_rx;
>> +
>> +    if (offset == sextreg(offset, 0, 28)) {
>> +        i1 = OPC_B | ((offset >> 18) & 0x3ff) | ((offset << 8) & 
>> 0x3fffc00);
> Use encode_sd10k16_insn instead. No need to juggle the bits yourself 
> and you get nice range assertion for free.
>> +        i2 = NOP; 

One more thing. I think there's a certain trend of making sure 
potentially bad things don't happen and stopping speculative insn 
fetch/execution by making the NOP here a BREAK instead. Although I'm not 
sure if LoongArch or TCG is affected by the various speculation 
vulnerabilities out there, to any degree, it might not hurt to just make 
it a BREAK? Performance shouldn't get affected in any way but one could 
probably sleep better with a guaranteed termination of execution flow 
after the B.

And you should change the "tcg_out32(s, NOP)" some lines above into just 
a "tcg_out_andi(s, 0, 0, 0)". This way you wouldn't have to define a NOP 
constant at all... Or, maybe better, make a "tcg_out_nop" helper right 
after the insn-defs include and after the "TCG intrinsics" section (it 
isn't one, but there isn't a better place), then use it here, so you get 
to include some more comments while still not having to define a NOP. Like:

static void tcg_out_nop(TCGContext *s)
{
	/* Conventional NOP in LoongArch is `andi zero, zero, 0`.  */
	tcg_out_opc_andi(s, 0, 0, 0);
}

[-- Attachment #2: Type: text/html, Size: 2170 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64}
  2022-10-12  9:41   ` WANG Xuerui
@ 2022-10-13  1:25     ` Qi Hu
  0 siblings, 0 replies; 11+ messages in thread
From: Qi Hu @ 2022-10-13  1:25 UTC (permalink / raw)
  To: WANG Xuerui, WANG Xuerui, Richard Henderson; +Cc: qemu-devel


On 2022/10/12 17:41, WANG Xuerui wrote:
> Hi,
>
> On 2022/10/12 17:13, Qi Hu wrote:
>> Signed-off-by: Qi Hu <huqi@loongson.cn>
>> ---
>>   tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
>>   tcg/loongarch64/tcg-target.h     | 4 ++--
>>   2 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/tcg/loongarch64/tcg-target.c.inc 
>> b/tcg/loongarch64/tcg-target.c.inc
>> index a3debf6da7..f5a214a17f 100644
>> --- a/tcg/loongarch64/tcg-target.c.inc
>> +++ b/tcg/loongarch64/tcg-target.c.inc
>> @@ -1125,6 +1125,13 @@ static void tcg_out_op(TCGContext *s, 
>> TCGOpcode opc,
>>           tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
>>           break;
>>   +    case INDEX_op_neg_i32:
>> +        tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1);
>> +        break;
>> +    case INDEX_op_neg_i64:
>> +        tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1);
>> +        break;
>> +
>>       case INDEX_op_nor_i32:
>>       case INDEX_op_nor_i64:
>>           if (c2) {
>> @@ -1503,6 +1510,8 @@ static TCGConstraintSetIndex 
>> tcg_target_op_def(TCGOpcode op)
>>       case INDEX_op_ext_i32_i64:
>>       case INDEX_op_not_i32:
>>       case INDEX_op_not_i64:
>> +    case INDEX_op_neg_i32:
>> +    case INDEX_op_neg_i64:
>>       case INDEX_op_extract_i32:
>>       case INDEX_op_extract_i64:
>>       case INDEX_op_bswap16_i32:
>> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
>> index d58a6162f2..67380b2432 100644
>> --- a/tcg/loongarch64/tcg-target.h
>> +++ b/tcg/loongarch64/tcg-target.h
>> @@ -114,7 +114,7 @@ typedef enum {
>>   #define TCG_TARGET_HAS_bswap16_i32      1
>>   #define TCG_TARGET_HAS_bswap32_i32      1
>>   #define TCG_TARGET_HAS_not_i32          1
>> -#define TCG_TARGET_HAS_neg_i32          0
>> +#define TCG_TARGET_HAS_neg_i32          1
>>   #define TCG_TARGET_HAS_andc_i32         1
>>   #define TCG_TARGET_HAS_orc_i32          1
>>   #define TCG_TARGET_HAS_eqv_i32          0
>> @@ -150,7 +150,7 @@ typedef enum {
>>   #define TCG_TARGET_HAS_bswap32_i64      1
>>   #define TCG_TARGET_HAS_bswap64_i64      1
>>   #define TCG_TARGET_HAS_not_i64          1
>> -#define TCG_TARGET_HAS_neg_i64          0
>> +#define TCG_TARGET_HAS_neg_i64          1
>>   #define TCG_TARGET_HAS_andc_i64         1
>>   #define TCG_TARGET_HAS_orc_i64          1
>>   #define TCG_TARGET_HAS_eqv_i64          0
> The whole change is not necessary, if target doesn't have neg then the 
> target-independent logic already makes sure a sub with the same 
> semantics is emitted. This is explained in the commit message of that 
> commit introducing add/sub support.

That a good news. I think this patch can be absolutely ignored.

Thanks.

Qi



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] tcg/loongarch64: Add direct jump support
  2022-10-12 11:34   ` WANG Xuerui
  2022-10-12 11:53     ` WANG Xuerui
@ 2022-10-13  1:25     ` Qi Hu
  1 sibling, 0 replies; 11+ messages in thread
From: Qi Hu @ 2022-10-13  1:25 UTC (permalink / raw)
  To: WANG Xuerui, WANG Xuerui, Richard Henderson; +Cc: qemu-devel


On 2022/10/12 19:34, WANG Xuerui wrote:
> Hi,
>
> Thanks for the improvement! Some room for improvement though...
>
> On 2022/10/12 17:13, Qi Hu wrote:
>> Similar to the ARM64, LoongArch has PC-relative instructions such as
>> PCADDU18I. These instructions can be used to support direct jump for
>> LoongArch. Additionally, if instruction "B offset" can cover the target
>> address, "tb_target_set_jmp_target" will only patch the "B offset".
>
> "if the target is within +/- 28 bits range, a single "B offset" plus a 
> nop will be used instead" might sound better?
Yeah, I will add this at commit message. :)
>
>>
>> Signed-off-by: Qi Hu <huqi@loongson.cn>
>> ---
>>   tcg/loongarch64/tcg-insn-defs.c.inc |  3 ++
>>   tcg/loongarch64/tcg-target.c.inc    | 49 ++++++++++++++++++++++++++---
>>   tcg/loongarch64/tcg-target.h        |  2 +-
>>   3 files changed, 48 insertions(+), 6 deletions(-)
>>
>> diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc 
>> b/tcg/loongarch64/tcg-insn-defs.c.inc
>> index d162571856..f5869c6bb1 100644
>> --- a/tcg/loongarch64/tcg-insn-defs.c.inc
>> +++ b/tcg/loongarch64/tcg-insn-defs.c.inc
>> @@ -112,6 +112,9 @@ typedef enum {
>>       OPC_BLE = 0x64000000,
>>       OPC_BGTU = 0x68000000,
>>       OPC_BLEU = 0x6c000000,
>> +    /* pseudo-instruction */
>> +    NOP = 0x03400000,
>> +
>
> You certainly saw the big fat comment block saying the file was 
> auto-generated and thus shouldn't be touched, didn't you? ;-)
>
> I saw your need for a NOP constant later though, and you can instead 
> add `#define OPC_NOP OPC_ANDI` in tcg-target.c.inc, just below the 
> include of tcg-insn-defs.c.inc.
Oh, I think I can add  "tcg_out_nop" instead of "NOP" here.
>
>>   } LoongArchInsn;
>>     static int32_t __attribute__((unused))
>> diff --git a/tcg/loongarch64/tcg-target.c.inc 
>> b/tcg/loongarch64/tcg-target.c.inc
>> index f5a214a17f..3a7b1df081 100644
>> --- a/tcg/loongarch64/tcg-target.c.inc
>> +++ b/tcg/loongarch64/tcg-target.c.inc
>> @@ -1058,11 +1058,24 @@ static void tcg_out_op(TCGContext *s, 
>> TCGOpcode opc,
>>           break;
>>         case INDEX_op_goto_tb:
>> -        assert(s->tb_jmp_insn_offset == 0);
>> -        /* indirect jump method */
>> -        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
>> -                   (uintptr_t)(s->tb_jmp_target_addr + a0));
>> -        tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> +        if (s->tb_jmp_insn_offset != NULL) {
>> +            /* TCG_TARGET_HAS_direct_jump */
>> +            /* Ensure that PCADD+JIRL are 8-byte aligned so that an 
>> atomic
>> +               write can be used to patch the target address. */
> There isn't a "PCADD" in LoongArch, and it's possible for the 2 insns 
> to be "B + NOP" as well. So better reword a bit like "Ensure that the 
> 8-byte direct jump fragment is aligned ..." (and add another space 
> after the period at the end of the sentence).
~
>> +            if ((uintptr_t)s->code_ptr & 7) {
>> +                tcg_out32(s, NOP);
>> +            }
>> +            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
>> +            /* actual branch destination will be patched by
>> +               tb_target_set_jmp_target later. */
> Either make it a proper sentence by title-casing "actual" and adding 
> another space after the trailing period, or remove the period.
I will modify these comments.
>> +            tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
>> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> +        } else {
>> +            /* !TCG_TARGET_HAS_direct_jump */
>> +            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
>> +                    (uintptr_t)(s->tb_jmp_target_addr + a0));
>> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> +        }
> We unconditionally support the direct jump method after the change, so 
> do we need to retain this block any more? Note the aarch64 port 
> currently does the same (declaring unconditional support for direct 
> jumps but keeping both code paths), if we want to remove this code 
> path then you may want to remove the aarch64 one respectively too.
Yes, maybe we can remove these in our patch and submit another patch to 
modify aarch64 port?
>>           set_jmp_reset_offset(s, a0);
>>           break;
>>   @@ -1708,6 +1721,32 @@ static void tcg_target_init(TCGContext *s)
>>       tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
>>   }
>>   +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
>> +                              uintptr_t jmp_rw, uintptr_t addr)
>> +{
>
> Move the function to somewhere above, like above the "/* Entrypoints 
> */" section (and maybe introduce another section)? The various parts 
> are more-or-less arranged in a particular order, so it's like going 
> back to implementing concrete things after finishing everything and 
> calling it a day.
~
>
> Also you forgot to remove the now inappropriate comment about this 
> helper in tcg-target.h.
Oh, I will remove the comment.
>
>> +    tcg_insn_unit i1, i2;
>> +
>> +    ptrdiff_t offset = addr - jmp_rx;
>> +
>> +    if (offset == sextreg(offset, 0, 28)) {
>> +        i1 = OPC_B | ((offset >> 18) & 0x3ff) | ((offset << 8) & 
>> 0x3fffc00);
> Use encode_sd10k16_insn instead. No need to juggle the bits yourself 
> and you get nice range assertion for free.
~
>> +        i2 = NOP;
>> +    } else {
>> +        offset >>= 2;
>> +
>> +        ptrdiff_t upper, lower;
> Promote the declaration to the top of the block (before offset 
> shifting), IIRC that's the official coding style.
~
>> +        upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
>> +        lower = (offset & 0xffff);
>> +        /* patch pcaddu18i */
>> +        i1 = OPC_PCADDU18I | upper << 5 | TCG_REG_T0;
>> +        /* patch jirl */
>> +        i2 = OPC_JIRL | lower << 10 | TCG_REG_T0 << 5;
>
> Similarly you could simplify the assembly here, with encode_dsj20_insn 
> and encode_djsk16_insn respectively.
>
> And the code is straight-forward enough to not need the "patch foo" 
> comments.
~
>
>> +    }
>> +    uint64_t pair = (uint64_t)i2 << 32 | i1;
> Maybe add a couple more parentheses to make the precedence easier to 
> follow? (for people who struggle to remember things like this, like me)
~
>> +    qatomic_set((uint64_t *)jmp_rw, pair);
>> +    flush_idcache_range(jmp_rx, jmp_rw, 8);
>> +}
>> +
>>   typedef struct {
>>       DebugFrameHeader h;
>>       uint8_t fde_def_cfa[4];
>> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
>> index 67380b2432..0e552731f5 100644
>> --- a/tcg/loongarch64/tcg-target.h
>> +++ b/tcg/loongarch64/tcg-target.h
>> @@ -123,7 +123,7 @@ typedef enum {
>>   #define TCG_TARGET_HAS_clz_i32          1
>>   #define TCG_TARGET_HAS_ctz_i32          1
>>   #define TCG_TARGET_HAS_ctpop_i32        0
>> -#define TCG_TARGET_HAS_direct_jump      0
>> +#define TCG_TARGET_HAS_direct_jump      1
>>   #define TCG_TARGET_HAS_brcond2          0
>>   #define TCG_TARGET_HAS_setcond2         0
>>   #define TCG_TARGET_HAS_qemu_st8_i32     0

Thanks for your advise. I will make some changes and submit 2nd version.

Qi



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] tcg/loongarch64: Add direct jump support
  2022-10-12  9:13 ` [PATCH 2/2] tcg/loongarch64: Add direct jump support Qi Hu
  2022-10-12 11:34   ` WANG Xuerui
@ 2022-10-13  3:01   ` Qi Hu
  2022-10-13 18:52     ` Richard Henderson
  1 sibling, 1 reply; 11+ messages in thread
From: Qi Hu @ 2022-10-13  3:01 UTC (permalink / raw)
  To: WANG Xuerui, Richard Henderson; +Cc: qemu-devel

Similar to the ARM64, LoongArch has PC-relative instructions such as
PCADDU18I. These instructions can be used to support direct jump for
LoongArch. Additionally, if instruction "B offset" can cover the target
address(target is within ±128MB range), a single "B offset" plus a nop
will be used by "tb_target_set_jump_target".

Signed-off-by: Qi Hu <huqi@loongson.cn>
---
 tcg/loongarch64/tcg-target.c.inc | 53 +++++++++++++++++++++++++++++---
 tcg/loongarch64/tcg-target.h     |  3 +-
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index f5a214a17f..9f9508836a 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1031,6 +1031,36 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
 #endif
 }
 
+/* LoongArch use `andi zero, zero, 0` as NOP.  */
+#define NOP OPC_ANDI
+static void tcg_out_nop(TCGContext *s)
+{
+	tcg_out32(s, NOP);
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+                              uintptr_t jmp_rw, uintptr_t addr)
+{
+    tcg_insn_unit i1, i2;
+    ptrdiff_t upper, lower;
+    ptrdiff_t offset = (addr - jmp_rx) >> 2;
+
+    if (offset == sextreg(offset, 0, 28)) {
+        i1 = encode_sd10k16_insn(OPC_B, offset);
+        i2 = NOP;
+    } else {
+        upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
+        lower = (offset & 0xffff);
+        /* patch pcaddu18i */
+        i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_T0, upper);
+        /* patch jirl */
+        i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_T0, lower);
+    }
+    uint64_t pair = ((uint64_t)i2 << 32) | i1;
+    qatomic_set((uint64_t *)jmp_rw, pair);
+    flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
+
 /*
  * Entry-points
  */
@@ -1058,11 +1088,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_goto_tb:
-        assert(s->tb_jmp_insn_offset == 0);
-        /* indirect jump method */
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
-                   (uintptr_t)(s->tb_jmp_target_addr + a0));
-        tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        if (s->tb_jmp_insn_offset != NULL) {
+            /* TCG_TARGET_HAS_direct_jump */
+            /* Ensure that "patch area" are 8-byte aligned so that an
+               atomic write can be used to patch the target address. */
+            if ((uintptr_t)s->code_ptr & 7) {
+                tcg_out_nop(s);
+            }
+            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+            /* actual branch destination will be patched by
+               tb_target_set_jmp_target later */
+            tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
+            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        } else {
+            /* !TCG_TARGET_HAS_direct_jump */
+            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+                    (uintptr_t)(s->tb_jmp_target_addr + a0));
+            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+        }
         set_jmp_reset_offset(s, a0);
         break;
 
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 67380b2432..c008d5686d 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -123,7 +123,7 @@ typedef enum {
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_direct_jump      0
+#define TCG_TARGET_HAS_direct_jump      1
 #define TCG_TARGET_HAS_brcond2          0
 #define TCG_TARGET_HAS_setcond2         0
 #define TCG_TARGET_HAS_qemu_st8_i32     0
@@ -166,7 +166,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-/* not defined -- call should be eliminated at compile time */
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_DEFAULT_MO (0)
-- 
2.37.3



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] tcg/loongarch64: Add direct jump support
  2022-10-13  3:01   ` [PATCH v2] " Qi Hu
@ 2022-10-13 18:52     ` Richard Henderson
  2022-10-14  3:14       ` Qi Hu
  0 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2022-10-13 18:52 UTC (permalink / raw)
  To: Qi Hu, WANG Xuerui; +Cc: qemu-devel

On 10/13/22 20:01, Qi Hu wrote:
> Similar to the ARM64, LoongArch has PC-relative instructions such as
> PCADDU18I. These instructions can be used to support direct jump for
> LoongArch. Additionally, if instruction "B offset" can cover the target
> address(target is within ±128MB range), a single "B offset" plus a nop
> will be used by "tb_target_set_jump_target".
> 
> Signed-off-by: Qi Hu <huqi@loongson.cn>

First, when sending a v2, do not reply to a different thread.
This confuses our patch tracking tools like patchew.org.

> +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
> +                              uintptr_t jmp_rw, uintptr_t addr)
> +{
> +    tcg_insn_unit i1, i2;
> +    ptrdiff_t upper, lower;
> +    ptrdiff_t offset = (addr - jmp_rx) >> 2;
> +
> +    if (offset == sextreg(offset, 0, 28)) {
> +        i1 = encode_sd10k16_insn(OPC_B, offset);
> +        i2 = NOP;
> +    } else {
> +        upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
> +        lower = (offset & 0xffff);

This computation is incorrect, cropping the values to unsigned.
This will assert inside

> +        /* patch pcaddu18i */
> +        i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_T0, upper);
> +        /* patch jirl */
> +        i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_T0, lower);

these encoding functions.  You want

     lower = (int16_t)offset;
     upper = (offset - lower) >> 16;

Wang Xuerui asked you to remove the redundant comments there, which give no more 
information than the code itself.

> @@ -1058,11 +1088,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>           break;
>   
>       case INDEX_op_goto_tb:
> -        assert(s->tb_jmp_insn_offset == 0);
> -        /* indirect jump method */
> -        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
> -                   (uintptr_t)(s->tb_jmp_target_addr + a0));
> -        tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> +        if (s->tb_jmp_insn_offset != NULL) {
> +            /* TCG_TARGET_HAS_direct_jump */
> +            /* Ensure that "patch area" are 8-byte aligned so that an
> +               atomic write can be used to patch the target address. */
> +            if ((uintptr_t)s->code_ptr & 7) {
> +                tcg_out_nop(s);
> +            }
> +            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
> +            /* actual branch destination will be patched by
> +               tb_target_set_jmp_target later */
> +            tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> +        } else {
> +            /* !TCG_TARGET_HAS_direct_jump */
> +            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
> +                    (uintptr_t)(s->tb_jmp_target_addr + a0));
> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> +        }

Your comment formatting does not follow our coding style.  It must be

     /*
      * Comment with
      * multiple lines.
      */

There is a tool, ./scripts/check_patch.pl, that will diagnose this error.

> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
> index 67380b2432..c008d5686d 100644
> --- a/tcg/loongarch64/tcg-target.h
> +++ b/tcg/loongarch64/tcg-target.h
> @@ -123,7 +123,7 @@ typedef enum {
>   #define TCG_TARGET_HAS_clz_i32          1
>   #define TCG_TARGET_HAS_ctz_i32          1
>   #define TCG_TARGET_HAS_ctpop_i32        0
> -#define TCG_TARGET_HAS_direct_jump      0
> +#define TCG_TARGET_HAS_direct_jump      1
>   #define TCG_TARGET_HAS_brcond2          0
>   #define TCG_TARGET_HAS_setcond2         0
>   #define TCG_TARGET_HAS_qemu_st8_i32     0
> @@ -166,7 +166,6 @@ typedef enum {
>   #define TCG_TARGET_HAS_muluh_i64        1
>   #define TCG_TARGET_HAS_mulsh_i64        1
>   
> -/* not defined -- call should be eliminated at compile time */
>   void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
>   
>   #define TCG_TARGET_DEFAULT_MO (0)

You are missing a change to

#define MAX_CODE_GEN_BUFFER_SIZE  SIZE_MAX

The branch distance with pcaddu18i is +/- 37 bits (128GB) not 64 bits.


r~



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] tcg/loongarch64: Add direct jump support
  2022-10-13 18:52     ` Richard Henderson
@ 2022-10-14  3:14       ` Qi Hu
  0 siblings, 0 replies; 11+ messages in thread
From: Qi Hu @ 2022-10-14  3:14 UTC (permalink / raw)
  To: Richard Henderson, WANG Xuerui; +Cc: qemu-devel


On 2022/10/14 02:52, Richard Henderson wrote:
> On 10/13/22 20:01, Qi Hu wrote:
>> Similar to the ARM64, LoongArch has PC-relative instructions such as
>> PCADDU18I. These instructions can be used to support direct jump for
>> LoongArch. Additionally, if instruction "B offset" can cover the target
>> address(target is within ±128MB range), a single "B offset" plus a nop
>> will be used by "tb_target_set_jump_target".
>>
>> Signed-off-by: Qi Hu <huqi@loongson.cn>
>
> First, when sending a v2, do not reply to a different thread.
> This confuses our patch tracking tools like patchew.org.
>
Ok, I will pay attention about that.
>> +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
>> +                              uintptr_t jmp_rw, uintptr_t addr)
>> +{
>> +    tcg_insn_unit i1, i2;
>> +    ptrdiff_t upper, lower;
>> +    ptrdiff_t offset = (addr - jmp_rx) >> 2;
>> +
>> +    if (offset == sextreg(offset, 0, 28)) {
>> +        i1 = encode_sd10k16_insn(OPC_B, offset);
>> +        i2 = NOP;
>> +    } else {
>> +        upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
>> +        lower = (offset & 0xffff);
>
> This computation is incorrect, cropping the values to unsigned.
> This will assert inside
>
>> +        /* patch pcaddu18i */
>> +        i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_T0, upper);
>> +        /* patch jirl */
>> +        i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_T0, 
>> lower);
>
> these encoding functions.  You want
>
>     lower = (int16_t)offset;
>     upper = (offset - lower) >> 16;
Yes, thanks for your advise.
>
> Wang Xuerui asked you to remove the redundant comments there, which 
> give no more information than the code itself.
I will remove these comments.
>
>> @@ -1058,11 +1088,24 @@ static void tcg_out_op(TCGContext *s, 
>> TCGOpcode opc,
>>           break;
>>         case INDEX_op_goto_tb:
>> -        assert(s->tb_jmp_insn_offset == 0);
>> -        /* indirect jump method */
>> -        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
>> -                   (uintptr_t)(s->tb_jmp_target_addr + a0));
>> -        tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> +        if (s->tb_jmp_insn_offset != NULL) {
>> +            /* TCG_TARGET_HAS_direct_jump */
>> +            /* Ensure that "patch area" are 8-byte aligned so that an
>> +               atomic write can be used to patch the target address. */
>> +            if ((uintptr_t)s->code_ptr & 7) {
>> +                tcg_out_nop(s);
>> +            }
>> +            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
>> +            /* actual branch destination will be patched by
>> +               tb_target_set_jmp_target later */
>> +            tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
>> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> +        } else {
>> +            /* !TCG_TARGET_HAS_direct_jump */
>> +            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
>> +                    (uintptr_t)(s->tb_jmp_target_addr + a0));
>> +            tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> +        }
>
> Your comment formatting does not follow our coding style.  It must be
>
>     /*
>      * Comment with
>      * multiple lines.
>      */
>
> There is a tool, ./scripts/check_patch.pl, that will diagnose this error.
I will modify these comments.
>
>> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
>> index 67380b2432..c008d5686d 100644
>> --- a/tcg/loongarch64/tcg-target.h
>> +++ b/tcg/loongarch64/tcg-target.h
>> @@ -123,7 +123,7 @@ typedef enum {
>>   #define TCG_TARGET_HAS_clz_i32          1
>>   #define TCG_TARGET_HAS_ctz_i32          1
>>   #define TCG_TARGET_HAS_ctpop_i32        0
>> -#define TCG_TARGET_HAS_direct_jump      0
>> +#define TCG_TARGET_HAS_direct_jump      1
>>   #define TCG_TARGET_HAS_brcond2          0
>>   #define TCG_TARGET_HAS_setcond2         0
>>   #define TCG_TARGET_HAS_qemu_st8_i32     0
>> @@ -166,7 +166,6 @@ typedef enum {
>>   #define TCG_TARGET_HAS_muluh_i64        1
>>   #define TCG_TARGET_HAS_mulsh_i64        1
>>   -/* not defined -- call should be eliminated at compile time */
>>   void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, 
>> uintptr_t);
>>     #define TCG_TARGET_DEFAULT_MO (0)
>
> You are missing a change to
>
> #define MAX_CODE_GEN_BUFFER_SIZE  SIZE_MAX
>
> The branch distance with pcaddu18i is +/- 37 bits (128GB) not 64 bits.
>
Oh, thanks for your reminder. I will add this.
>
> r~

I will send V3 after editing these.

Thanks.

Qi



^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-10-14  3:16 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-12  9:13 [PATCH 0/2] tcg/loongarch64: add neg tcg_op and direct jump support Qi Hu
2022-10-12  9:13 ` [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64} Qi Hu
2022-10-12  9:41   ` WANG Xuerui
2022-10-13  1:25     ` Qi Hu
2022-10-12  9:13 ` [PATCH 2/2] tcg/loongarch64: Add direct jump support Qi Hu
2022-10-12 11:34   ` WANG Xuerui
2022-10-12 11:53     ` WANG Xuerui
2022-10-13  1:25     ` Qi Hu
2022-10-13  3:01   ` [PATCH v2] " Qi Hu
2022-10-13 18:52     ` Richard Henderson
2022-10-14  3:14       ` Qi Hu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.