* [PATCH v5 01/16] tcg: Expand usadd/ussub with umin/umax
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
@ 2021-09-15 21:30 ` Richard Henderson
2021-09-17 11:24 ` Matheus K. Ferst
2021-09-15 21:31 ` [PATCH v5 02/16] tcg/s390x: Rename from tcg/s390 Richard Henderson
` (14 subsequent siblings)
15 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:30 UTC (permalink / raw)
To: qemu-devel; +Cc: david
For usadd, we only have to consider overflow. Since ~B + B == -1,
the maximum value for A that saturates is ~B.
For ussub, we only have to consider underflow. The minimum value
that saturates to 0 from A - B is B.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tcg-op-vec.c | 37 +++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index 15e026ae49..7705a49c0b 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -119,6 +119,18 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
continue;
}
break;
+ case INDEX_op_usadd_vec:
+ if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
+ tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
+ continue;
+ }
+ break;
+ case INDEX_op_ussub_vec:
+ if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
+ tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
+ continue;
+ }
+ break;
case INDEX_op_cmpsel_vec:
case INDEX_op_smin_vec:
case INDEX_op_smax_vec:
@@ -603,7 +615,18 @@ void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
- do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
+ if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+
+ /* usadd(a, b) = min(a, ~b) + b */
+ tcg_gen_not_vec(vece, t, b);
+ tcg_gen_umin_vec(vece, t, t, a);
+ tcg_gen_add_vec(vece, r, r, b);
+
+ tcg_temp_free_vec(t);
+ tcg_swap_vecop_list(hold_list);
+ }
}
void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
@@ -613,7 +636,17 @@ void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
- do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec);
+ if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+
+ /* ussub(a, b) = max(a, b) - b */
+ tcg_gen_umax_vec(vece, t, a, b);
+ tcg_gen_sub_vec(vece, r, t, b);
+
+ tcg_temp_free_vec(t);
+ tcg_swap_vecop_list(hold_list);
+ }
}
static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v5 01/16] tcg: Expand usadd/ussub with umin/umax
2021-09-15 21:30 ` [PATCH v5 01/16] tcg: Expand usadd/ussub with umin/umax Richard Henderson
@ 2021-09-17 11:24 ` Matheus K. Ferst
2021-09-17 13:46 ` Richard Henderson
0 siblings, 1 reply; 23+ messages in thread
From: Matheus K. Ferst @ 2021-09-17 11:24 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: david
On 15/09/2021 18:30, Richard Henderson wrote:
> [E-MAIL EXTERNO] Não clique em links ou abra anexos, a menos que você possa confirmar o remetente e saber que o conteúdo é seguro. Em caso de e-mail suspeito entre imediatamente em contato com o DTI.
>
> For usadd, we only have to consider overflow. Since ~B + B == -1,
> the maximum value for A that saturates is ~B.
>
> For ussub, we only have to consider underflow. The minimum value
> that saturates to 0 from A - B is B.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/tcg-op-vec.c | 37 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
> index 15e026ae49..7705a49c0b 100644
> --- a/tcg/tcg-op-vec.c
> +++ b/tcg/tcg-op-vec.c
> @@ -119,6 +119,18 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
> continue;
> }
> break;
> + case INDEX_op_usadd_vec:
> + if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
> + tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
> + continue;
> + }
> + break;
> + case INDEX_op_ussub_vec:
> + if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
> + tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
> + continue;
> + }
> + break;
> case INDEX_op_cmpsel_vec:
> case INDEX_op_smin_vec:
> case INDEX_op_smax_vec:
> @@ -603,7 +615,18 @@ void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
>
> void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> {
> - do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
> + if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
> + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
> + TCGv_vec t = tcg_temp_new_vec_matching(r);
> +
> + /* usadd(a, b) = min(a, ~b) + b */
> + tcg_gen_not_vec(vece, t, b);
> + tcg_gen_umin_vec(vece, t, t, a);
> + tcg_gen_add_vec(vece, r, r, b);
I think it should be
tcg_gen_add_vec(vece, r, t, b);
> +
> + tcg_temp_free_vec(t);
> + tcg_swap_vecop_list(hold_list);
> + }
> }
>
> void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> @@ -613,7 +636,17 @@ void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
>
> void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> {
> - do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec);
> + if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
> + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
> + TCGv_vec t = tcg_temp_new_vec_matching(r);
> +
> + /* ussub(a, b) = max(a, b) - b */
> + tcg_gen_umax_vec(vece, t, a, b);
> + tcg_gen_sub_vec(vece, r, t, b);
> +
> + tcg_temp_free_vec(t);
> + tcg_swap_vecop_list(hold_list);
> + }
> }
>
> static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
> --
> 2.25.1
>
>
Thanks,
Matheus K. Ferst
Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/>
Analista de Software Júnior
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v5 01/16] tcg: Expand usadd/ussub with umin/umax
2021-09-17 11:24 ` Matheus K. Ferst
@ 2021-09-17 13:46 ` Richard Henderson
0 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-17 13:46 UTC (permalink / raw)
To: Matheus K. Ferst, qemu-devel; +Cc: david
On 9/17/21 4:24 AM, Matheus K. Ferst wrote:
>> void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
>> {
>> - do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
>> + if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
>> + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
>> + TCGv_vec t = tcg_temp_new_vec_matching(r);
>> +
>> + /* usadd(a, b) = min(a, ~b) + b */
>> + tcg_gen_not_vec(vece, t, b);
>> + tcg_gen_umin_vec(vece, t, t, a);
>> + tcg_gen_add_vec(vece, r, r, b);
>
> I think it should be
>
> tcg_gen_add_vec(vece, r, t, b);
Yep, good catch.
r~
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 02/16] tcg/s390x: Rename from tcg/s390
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
2021-09-15 21:30 ` [PATCH v5 01/16] tcg: Expand usadd/ussub with umin/umax Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-16 5:19 ` Thomas Huth
2021-09-16 5:27 ` Philippe Mathieu-Daudé
2021-09-15 21:31 ` [PATCH v5 03/16] tcg/s390x: Change FACILITY representation Richard Henderson
` (13 subsequent siblings)
15 siblings, 2 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
This emphasizes that we don't support s390, only 64-bit s390x hosts.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
meson.build | 2 --
tcg/{s390 => s390x}/tcg-target-con-set.h | 0
tcg/{s390 => s390x}/tcg-target-con-str.h | 0
tcg/{s390 => s390x}/tcg-target.h | 0
tcg/{s390 => s390x}/tcg-target.c.inc | 0
5 files changed, 2 deletions(-)
rename tcg/{s390 => s390x}/tcg-target-con-set.h (100%)
rename tcg/{s390 => s390x}/tcg-target-con-str.h (100%)
rename tcg/{s390 => s390x}/tcg-target.h (100%)
rename tcg/{s390 => s390x}/tcg-target.c.inc (100%)
diff --git a/meson.build b/meson.build
index 2711cbb789..1cf370ab56 100644
--- a/meson.build
+++ b/meson.build
@@ -265,8 +265,6 @@ if not get_option('tcg').disabled()
tcg_arch = 'tci'
elif config_host['ARCH'] == 'sparc64'
tcg_arch = 'sparc'
- elif config_host['ARCH'] == 's390x'
- tcg_arch = 's390'
elif config_host['ARCH'] in ['x86_64', 'x32']
tcg_arch = 'i386'
elif config_host['ARCH'] == 'ppc64'
diff --git a/tcg/s390/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
similarity index 100%
rename from tcg/s390/tcg-target-con-set.h
rename to tcg/s390x/tcg-target-con-set.h
diff --git a/tcg/s390/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
similarity index 100%
rename from tcg/s390/tcg-target-con-str.h
rename to tcg/s390x/tcg-target-con-str.h
diff --git a/tcg/s390/tcg-target.h b/tcg/s390x/tcg-target.h
similarity index 100%
rename from tcg/s390/tcg-target.h
rename to tcg/s390x/tcg-target.h
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
similarity index 100%
rename from tcg/s390/tcg-target.c.inc
rename to tcg/s390x/tcg-target.c.inc
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v5 02/16] tcg/s390x: Rename from tcg/s390
2021-09-15 21:31 ` [PATCH v5 02/16] tcg/s390x: Rename from tcg/s390 Richard Henderson
@ 2021-09-16 5:19 ` Thomas Huth
2021-09-16 5:27 ` Philippe Mathieu-Daudé
1 sibling, 0 replies; 23+ messages in thread
From: Thomas Huth @ 2021-09-16 5:19 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: david
On 15/09/2021 23.31, Richard Henderson wrote:
> This emphasizes that we don't support s390, only 64-bit s390x hosts.
>
> Reviewed-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> meson.build | 2 --
> tcg/{s390 => s390x}/tcg-target-con-set.h | 0
> tcg/{s390 => s390x}/tcg-target-con-str.h | 0
> tcg/{s390 => s390x}/tcg-target.h | 0
> tcg/{s390 => s390x}/tcg-target.c.inc | 0
> 5 files changed, 2 deletions(-)
> rename tcg/{s390 => s390x}/tcg-target-con-set.h (100%)
> rename tcg/{s390 => s390x}/tcg-target-con-str.h (100%)
> rename tcg/{s390 => s390x}/tcg-target.h (100%)
> rename tcg/{s390 => s390x}/tcg-target.c.inc (100%)
>
> diff --git a/meson.build b/meson.build
> index 2711cbb789..1cf370ab56 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -265,8 +265,6 @@ if not get_option('tcg').disabled()
> tcg_arch = 'tci'
> elif config_host['ARCH'] == 'sparc64'
> tcg_arch = 'sparc'
> - elif config_host['ARCH'] == 's390x'
> - tcg_arch = 's390'
> elif config_host['ARCH'] in ['x86_64', 'x32']
> tcg_arch = 'i386'
> elif config_host['ARCH'] == 'ppc64'
> diff --git a/tcg/s390/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
> similarity index 100%
> rename from tcg/s390/tcg-target-con-set.h
> rename to tcg/s390x/tcg-target-con-set.h
> diff --git a/tcg/s390/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
> similarity index 100%
> rename from tcg/s390/tcg-target-con-str.h
> rename to tcg/s390x/tcg-target-con-str.h
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390x/tcg-target.h
> similarity index 100%
> rename from tcg/s390/tcg-target.h
> rename to tcg/s390x/tcg-target.h
> diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
> similarity index 100%
> rename from tcg/s390/tcg-target.c.inc
> rename to tcg/s390x/tcg-target.c.inc
>
Good idea!
Reviewed-by: Thomas Huth <thuth@redhat.com>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v5 02/16] tcg/s390x: Rename from tcg/s390
2021-09-15 21:31 ` [PATCH v5 02/16] tcg/s390x: Rename from tcg/s390 Richard Henderson
2021-09-16 5:19 ` Thomas Huth
@ 2021-09-16 5:27 ` Philippe Mathieu-Daudé
1 sibling, 0 replies; 23+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-09-16 5:27 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: david
On 9/15/21 11:31 PM, Richard Henderson wrote:
> This emphasizes that we don't support s390, only 64-bit s390x hosts.
>
> Reviewed-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> meson.build | 2 --
> tcg/{s390 => s390x}/tcg-target-con-set.h | 0
> tcg/{s390 => s390x}/tcg-target-con-str.h | 0
> tcg/{s390 => s390x}/tcg-target.h | 0
> tcg/{s390 => s390x}/tcg-target.c.inc | 0
> 5 files changed, 2 deletions(-)
> rename tcg/{s390 => s390x}/tcg-target-con-set.h (100%)
> rename tcg/{s390 => s390x}/tcg-target-con-str.h (100%)
> rename tcg/{s390 => s390x}/tcg-target.h (100%)
> rename tcg/{s390 => s390x}/tcg-target.c.inc (100%)
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 03/16] tcg/s390x: Change FACILITY representation
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
2021-09-15 21:30 ` [PATCH v5 01/16] tcg: Expand usadd/ussub with umin/umax Richard Henderson
2021-09-15 21:31 ` [PATCH v5 02/16] tcg/s390x: Rename from tcg/s390 Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 04/16] tcg/s390x: Merge TCG_AREG0 and TCG_REG_CALL_STACK into TCGReg Richard Henderson
` (12 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
We will shortly need to be able to check facilities beyond the
first 64. Instead of explicitly masking against s390_facilities,
create a HAVE_FACILITY macro that indexes an array.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Change name to HAVE_FACILITY (david)
---
tcg/s390x/tcg-target.h | 29 ++++++++-------
tcg/s390x/tcg-target.c.inc | 74 +++++++++++++++++++-------------------
2 files changed, 52 insertions(+), 51 deletions(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 2e4ede2ea2..18d0d330e6 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -55,16 +55,19 @@ typedef enum TCGReg {
/* A list of relevant facilities used by this translator. Some of these
are required for proper operation, and these are checked at startup. */
-#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2))
-#define FACILITY_LONG_DISP (1ULL << (63 - 18))
-#define FACILITY_EXT_IMM (1ULL << (63 - 21))
-#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
-#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
+#define FACILITY_ZARCH_ACTIVE 2
+#define FACILITY_LONG_DISP 18
+#define FACILITY_EXT_IMM 21
+#define FACILITY_GEN_INST_EXT 34
+#define FACILITY_LOAD_ON_COND 45
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
-#define FACILITY_LOAD_ON_COND2 (1ULL << (63 - 53))
+#define FACILITY_LOAD_ON_COND2 53
-extern uint64_t s390_facilities;
+extern uint64_t s390_facilities[1];
+
+#define HAVE_FACILITY(X) \
+ ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -85,8 +88,8 @@ extern uint64_t s390_facilities;
#define TCG_TARGET_HAS_clz_i32 0
#define TCG_TARGET_HAS_ctz_i32 0
#define TCG_TARGET_HAS_ctpop_i32 0
-#define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT)
-#define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_sextract_i32 0
#define TCG_TARGET_HAS_extract2_i32 0
#define TCG_TARGET_HAS_movcond_i32 1
@@ -98,7 +101,7 @@ extern uint64_t s390_facilities;
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_extrl_i64_i32 0
#define TCG_TARGET_HAS_extrh_i64_i32 0
-#define TCG_TARGET_HAS_direct_jump (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_direct_jump HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_div2_i64 1
@@ -119,11 +122,11 @@ extern uint64_t s390_facilities;
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
-#define TCG_TARGET_HAS_clz_i64 (s390_facilities & FACILITY_EXT_IMM)
+#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
#define TCG_TARGET_HAS_ctz_i64 0
#define TCG_TARGET_HAS_ctpop_i64 0
-#define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT)
-#define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT)
+#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_extract2_i64 0
#define TCG_TARGET_HAS_movcond_i64 1
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index b82cf19f09..f7e4a619e7 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -66,7 +66,7 @@
We don't need this when we have pc-relative loads with the general
instructions extension facility. */
#define TCG_REG_TB TCG_REG_R12
-#define USE_REG_TB (!(s390_facilities & FACILITY_GEN_INST_EXT))
+#define USE_REG_TB (!HAVE_FACILITY(GEN_INST_EXT))
#ifndef CONFIG_SOFTMMU
#define TCG_GUEST_BASE_REG TCG_REG_R13
@@ -377,7 +377,7 @@ static void * const qemu_st_helpers[16] = {
#endif
static const tcg_insn_unit *tb_ret_addr;
-uint64_t s390_facilities;
+uint64_t s390_facilities[1];
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
intptr_t value, intptr_t addend)
@@ -577,7 +577,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
}
/* Try all 48-bit insns that can load it in one go. */
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
if (sval == (int32_t)sval) {
tcg_out_insn(s, RIL, LGFI, ret, sval);
return;
@@ -620,7 +620,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
}
/* Otherwise, stuff it in the constant pool. */
- if (s390_facilities & FACILITY_GEN_INST_EXT) {
+ if (HAVE_FACILITY(GEN_INST_EXT)) {
tcg_out_insn(s, RIL, LGRL, ret, 0);
new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
} else if (USE_REG_TB && !in_prologue) {
@@ -706,7 +706,7 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type,
{
intptr_t addr = (intptr_t)abs;
- if ((s390_facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
+ if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
if (disp == (int32_t)disp) {
if (type == TCG_TYPE_I32) {
@@ -740,7 +740,7 @@ static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
tcg_out_insn(s, RRE, LGBR, dest, src);
return;
}
@@ -760,7 +760,7 @@ static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
tcg_out_insn(s, RRE, LLGCR, dest, src);
return;
}
@@ -780,7 +780,7 @@ static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
tcg_out_insn(s, RRE, LGHR, dest, src);
return;
}
@@ -800,7 +800,7 @@ static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
tcg_out_insn(s, RRE, LLGHR, dest, src);
return;
}
@@ -888,7 +888,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
tgen_ext32u(s, dest, dest);
return;
}
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
if ((val & valid) == 0xff) {
tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
return;
@@ -909,7 +909,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
}
/* Try all 48-bit insns that can perform it in one go. */
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
for (i = 0; i < 2; i++) {
tcg_target_ulong mask = ~(0xffffffffull << i*32);
if (((val | ~valid) & mask) == mask) {
@@ -918,7 +918,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
}
}
}
- if ((s390_facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
+ if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
tgen_andi_risbg(s, dest, dest, val);
return;
}
@@ -967,7 +967,7 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
}
/* Try all 48-bit insns that can perform it in one go. */
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
for (i = 0; i < 2; i++) {
tcg_target_ulong mask = (0xffffffffull << i*32);
if ((val & mask) != 0 && (val & ~mask) == 0) {
@@ -992,7 +992,7 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
/* Perform the OR via sequential modifications to the high and
low parts. Do this via recursion to handle 16-bit vs 32-bit
masks in each half. */
- tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
+ tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
}
@@ -1001,7 +1001,7 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
{
/* Try all 48-bit insns that can perform it in one go. */
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
if ((val & 0xffffffff00000000ull) == 0) {
tcg_out_insn(s, RIL, XILF, dest, val);
return;
@@ -1025,7 +1025,7 @@ static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
tcg_tbrel_diff(s, NULL));
} else {
/* Perform the xor by parts. */
- tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
+ tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
if (val & 0xffffffff) {
tcg_out_insn(s, RIL, XILF, dest, val);
}
@@ -1059,7 +1059,7 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
goto exit;
}
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
if (type == TCG_TYPE_I32) {
op = (is_unsigned ? RIL_CLFI : RIL_CFI);
tcg_out_insn_RIL(s, op, r1, c2);
@@ -1122,7 +1122,7 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
bool have_loc;
/* With LOC2, we can always emit the minimum 3 insns. */
- if (s390_facilities & FACILITY_LOAD_ON_COND2) {
+ if (HAVE_FACILITY(LOAD_ON_COND2)) {
/* Emit: d = 0, d = (cc ? 1 : d). */
cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
@@ -1130,7 +1130,7 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
return;
}
- have_loc = (s390_facilities & FACILITY_LOAD_ON_COND) != 0;
+ have_loc = HAVE_FACILITY(LOAD_ON_COND);
/* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller. */
restart:
@@ -1216,7 +1216,7 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
TCGArg v3, int v3const)
{
int cc;
- if (s390_facilities & FACILITY_LOAD_ON_COND) {
+ if (HAVE_FACILITY(LOAD_ON_COND)) {
cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
if (v3const) {
tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
@@ -1249,7 +1249,7 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
} else {
tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
}
- if (s390_facilities & FACILITY_LOAD_ON_COND) {
+ if (HAVE_FACILITY(LOAD_ON_COND)) {
/* Emit: if (one bit found) dest = r0. */
tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
} else {
@@ -1325,7 +1325,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
{
int cc;
- if (s390_facilities & FACILITY_GEN_INST_EXT) {
+ if (HAVE_FACILITY(GEN_INST_EXT)) {
bool is_unsigned = is_unsigned_cond(c);
bool in_range;
S390Opcode opc;
@@ -1519,7 +1519,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
cross pages using the address of the last byte of the access. */
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
- if ((s390_facilities & FACILITY_GEN_INST_EXT) && a_off == 0) {
+ if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
} else {
tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
@@ -1810,7 +1810,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, RI, AHI, a0, a2);
break;
}
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
tcg_out_insn(s, RIL, AFI, a0, a2);
break;
}
@@ -2056,7 +2056,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, RI, AGHI, a0, a2);
break;
}
- if (s390_facilities & FACILITY_EXT_IMM) {
+ if (HAVE_FACILITY(EXT_IMM)) {
if (a2 == (int32_t)a2) {
tcg_out_insn(s, RIL, AGFI, a0, a2);
break;
@@ -2281,8 +2281,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
if (args[0] & TCG_MO_ST_LD) {
- tcg_out_insn(s, RR, BCR,
- s390_facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
+ tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
}
break;
@@ -2345,7 +2344,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_or_i64:
case INDEX_op_xor_i32:
case INDEX_op_xor_i64:
- return (s390_facilities & FACILITY_DISTINCT_OPS
+ return (HAVE_FACILITY(DISTINCT_OPS)
? C_O1_I2(r, r, ri)
: C_O1_I2(r, 0, ri));
@@ -2353,19 +2352,19 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
/* If we have the general-instruction-extensions, then we have
MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
- return (s390_facilities & FACILITY_GEN_INST_EXT
+ return (HAVE_FACILITY(GEN_INST_EXT)
? C_O1_I2(r, 0, ri)
: C_O1_I2(r, 0, rI));
case INDEX_op_mul_i64:
- return (s390_facilities & FACILITY_GEN_INST_EXT
+ return (HAVE_FACILITY(GEN_INST_EXT)
? C_O1_I2(r, 0, rJ)
: C_O1_I2(r, 0, rI));
case INDEX_op_shl_i32:
case INDEX_op_shr_i32:
case INDEX_op_sar_i32:
- return (s390_facilities & FACILITY_DISTINCT_OPS
+ return (HAVE_FACILITY(DISTINCT_OPS)
? C_O1_I2(r, r, ri)
: C_O1_I2(r, 0, ri));
@@ -2409,7 +2408,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64:
- return (s390_facilities & FACILITY_LOAD_ON_COND2
+ return (HAVE_FACILITY(LOAD_ON_COND2)
? C_O1_I4(r, r, ri, rI, 0)
: C_O1_I4(r, r, ri, r, 0));
@@ -2424,13 +2423,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_add2_i32:
case INDEX_op_sub2_i32:
- return (s390_facilities & FACILITY_EXT_IMM
+ return (HAVE_FACILITY(EXT_IMM)
? C_O2_I4(r, r, 0, 1, ri, r)
: C_O2_I4(r, r, 0, 1, r, r));
case INDEX_op_add2_i64:
case INDEX_op_sub2_i64:
- return (s390_facilities & FACILITY_EXT_IMM
+ return (HAVE_FACILITY(EXT_IMM)
? C_O2_I4(r, r, 0, 1, rA, r)
: C_O2_I4(r, r, 0, 1, r, r));
@@ -2446,13 +2445,12 @@ static void query_s390_facilities(void)
/* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
is present on all 64-bit systems, but let's check for it anyway. */
if (hwcap & HWCAP_S390_STFLE) {
- register int r0 __asm__("0");
- register void *r1 __asm__("1");
+ register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
+ register void *r1 __asm__("1") = s390_facilities;
/* stfle 0(%r1) */
- r1 = &s390_facilities;
asm volatile(".word 0xb2b0,0x1000"
- : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc");
+ : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
}
}
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 04/16] tcg/s390x: Merge TCG_AREG0 and TCG_REG_CALL_STACK into TCGReg
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (2 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 03/16] tcg/s390x: Change FACILITY representation Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 05/16] tcg/s390x: Add host vector framework Richard Henderson
` (11 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
They are rightly values in the same enumeration.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 28 +++++++---------------------
1 file changed, 7 insertions(+), 21 deletions(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 18d0d330e6..0174357f1b 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -32,22 +32,13 @@
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
typedef enum TCGReg {
- TCG_REG_R0 = 0,
- TCG_REG_R1,
- TCG_REG_R2,
- TCG_REG_R3,
- TCG_REG_R4,
- TCG_REG_R5,
- TCG_REG_R6,
- TCG_REG_R7,
- TCG_REG_R8,
- TCG_REG_R9,
- TCG_REG_R10,
- TCG_REG_R11,
- TCG_REG_R12,
- TCG_REG_R13,
- TCG_REG_R14,
- TCG_REG_R15
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
+ TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7,
+ TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
+ TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
+
+ TCG_AREG0 = TCG_REG_R10,
+ TCG_REG_CALL_STACK = TCG_REG_R15
} TCGReg;
#define TCG_TARGET_NB_REGS 16
@@ -138,7 +129,6 @@ extern uint64_t s390_facilities[1];
#define TCG_TARGET_HAS_mulsh_i64 0
/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_R15
#define TCG_TARGET_STACK_ALIGN 8
#define TCG_TARGET_CALL_STACK_OFFSET 160
@@ -147,10 +137,6 @@ extern uint64_t s390_facilities[1];
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
-enum {
- TCG_AREG0 = TCG_REG_R10,
-};
-
static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
uintptr_t jmp_rw, uintptr_t addr)
{
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 05/16] tcg/s390x: Add host vector framework
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (3 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 04/16] tcg/s390x: Merge TCG_AREG0 and TCG_REG_CALL_STACK into TCGReg Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 06/16] tcg/s390x: Implement tcg_out_ld/st for vector types Richard Henderson
` (10 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Add registers and function stubs. The functionality
is disabled via squashing s390_facilities[2] to 0.
We must still include results for the mandatory opcodes in
tcg_target_op_def, as all opcodes are checked during tcg init.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 4 +
tcg/s390x/tcg-target-con-str.h | 1 +
tcg/s390x/tcg-target.h | 35 ++++++++-
tcg/s390x/tcg-target.opc.h | 12 +++
tcg/s390x/tcg-target.c.inc | 137 ++++++++++++++++++++++++++++++++-
5 files changed, 184 insertions(+), 5 deletions(-)
create mode 100644 tcg/s390x/tcg-target.opc.h
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 31985e4903..ce9432cfe3 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -13,13 +13,17 @@ C_O0_I1(r)
C_O0_I2(L, L)
C_O0_I2(r, r)
C_O0_I2(r, ri)
+C_O0_I2(v, r)
C_O1_I1(r, L)
C_O1_I1(r, r)
+C_O1_I1(v, r)
+C_O1_I1(v, vr)
C_O1_I2(r, 0, ri)
C_O1_I2(r, 0, rI)
C_O1_I2(r, 0, rJ)
C_O1_I2(r, r, ri)
C_O1_I2(r, rZ, r)
+C_O1_I2(v, v, v)
C_O1_I4(r, r, ri, r, 0)
C_O1_I4(r, r, ri, rI, 0)
C_O2_I2(b, a, 0, r)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
index 892d8f8c06..8bb0358ae5 100644
--- a/tcg/s390x/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -10,6 +10,7 @@
*/
REGS('r', ALL_GENERAL_REGS)
REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+REGS('v', ALL_VECTOR_REGS)
/*
* A (single) even/odd pair for division.
* TODO: Add something to the register allocator to allow
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 0174357f1b..5a03c5f2f4 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -37,11 +37,20 @@ typedef enum TCGReg {
TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
+ TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
TCG_AREG0 = TCG_REG_R10,
TCG_REG_CALL_STACK = TCG_REG_R15
} TCGReg;
-#define TCG_TARGET_NB_REGS 16
+#define TCG_TARGET_NB_REGS 64
/* A list of relevant facilities used by this translator. Some of these
are required for proper operation, and these are checked at startup. */
@@ -54,8 +63,9 @@ typedef enum TCGReg {
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
#define FACILITY_LOAD_ON_COND2 53
+#define FACILITY_VECTOR 129
-extern uint64_t s390_facilities[1];
+extern uint64_t s390_facilities[3];
#define HAVE_FACILITY(X) \
((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
@@ -128,6 +138,27 @@ extern uint64_t s390_facilities[1];
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
+#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 0
+#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_not_vec 0
+#define TCG_TARGET_HAS_neg_vec 0
+#define TCG_TARGET_HAS_abs_vec 0
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_shi_vec 0
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_mul_vec 0
+#define TCG_TARGET_HAS_sat_vec 0
+#define TCG_TARGET_HAS_minmax_vec 0
+#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_cmpsel_vec 0
+
/* used for function call generation */
#define TCG_TARGET_STACK_ALIGN 8
#define TCG_TARGET_CALL_STACK_OFFSET 160
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target.opc.h
new file mode 100644
index 0000000000..67afc82a93
--- /dev/null
+++ b/tcg/s390x/tcg-target.opc.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2021 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index f7e4a619e7..c438751834 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -43,6 +43,8 @@
#define TCG_CT_CONST_ZERO 0x800
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
+#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
+
/*
* For softmmu, we need to avoid conflicts with the first 3
* argument registers to perform the tlb lookup, and to call
@@ -268,8 +270,13 @@ typedef enum S390Opcode {
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
- "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
- "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15"
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
+ "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
+ "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
+ "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
};
#endif
@@ -295,6 +302,32 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R4,
TCG_REG_R3,
TCG_REG_R2,
+
+ /* V8-V15 are call saved, and omitted. */
+ TCG_REG_V0,
+ TCG_REG_V1,
+ TCG_REG_V2,
+ TCG_REG_V3,
+ TCG_REG_V4,
+ TCG_REG_V5,
+ TCG_REG_V6,
+ TCG_REG_V7,
+ TCG_REG_V16,
+ TCG_REG_V17,
+ TCG_REG_V18,
+ TCG_REG_V19,
+ TCG_REG_V20,
+ TCG_REG_V21,
+ TCG_REG_V22,
+ TCG_REG_V23,
+ TCG_REG_V24,
+ TCG_REG_V25,
+ TCG_REG_V26,
+ TCG_REG_V27,
+ TCG_REG_V28,
+ TCG_REG_V29,
+ TCG_REG_V30,
+ TCG_REG_V31,
};
static const int tcg_target_call_iarg_regs[] = {
@@ -377,7 +410,7 @@ static void * const qemu_st_helpers[16] = {
#endif
static const tcg_insn_unit *tb_ret_addr;
-uint64_t s390_facilities[1];
+uint64_t s390_facilities[3];
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
intptr_t value, intptr_t addend)
@@ -2293,6 +2326,42 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
}
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg src)
+{
+ g_assert_not_reached();
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg base, intptr_t offset)
+{
+ g_assert_not_reached();
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t val)
+{
+ g_assert_not_reached();
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg *args, const int *const_args)
+{
+ g_assert_not_reached();
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ return 0;
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ g_assert_not_reached();
+}
+
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
{
switch (op) {
@@ -2433,11 +2502,34 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
? C_O2_I4(r, r, 0, 1, rA, r)
: C_O2_I4(r, r, 0, 1, r, r));
+ case INDEX_op_st_vec:
+ return C_O0_I2(v, r);
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(v, r);
+ case INDEX_op_dup_vec:
+ return C_O1_I1(v, vr);
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(v, v, v);
+
default:
g_assert_not_reached();
}
}
+/*
+ * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
+ * Some distros have fixed this up locally, others have not.
+ */
+#ifndef HWCAP_S390_VXRS
+#define HWCAP_S390_VXRS 2048
+#endif
+
static void query_s390_facilities(void)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
@@ -2452,6 +2544,16 @@ static void query_s390_facilities(void)
asm volatile(".word 0xb2b0,0x1000"
: "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
}
+
+ /*
+ * Use of vector registers requires os support beyond the facility bit.
+ * If the kernel does not advertise support, disable the facility bits.
+ * There is nothing else we currently care about in the 3rd word, so
+ * disable VECTOR with one store.
+ */
+ if (1 || !(hwcap & HWCAP_S390_VXRS)) {
+ s390_facilities[2] = 0;
+ }
}
static void tcg_target_init(TCGContext *s)
@@ -2460,6 +2562,10 @@ static void tcg_target_init(TCGContext *s)
tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
+ if (HAVE_FACILITY(VECTOR)) {
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
+ }
tcg_target_call_clobber_regs = 0;
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
@@ -2474,6 +2580,31 @@ static void tcg_target_init(TCGContext *s)
/* The return register can be considered call-clobbered. */
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
+
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
/* XXX many insns can't be used with R0, so we better avoid it for now */
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 06/16] tcg/s390x: Implement tcg_out_ld/st for vector types
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (4 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 05/16] tcg/s390x: Add host vector framework Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-16 8:09 ` David Hildenbrand
2021-09-15 21:31 ` [PATCH v5 07/16] tcg/s390x: Implement tcg_out_mov " Richard Henderson
` (9 subsequent siblings)
15 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 132 +++++++++++++++++++++++++++++++++----
1 file changed, 120 insertions(+), 12 deletions(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index c438751834..ea04aefe98 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -265,6 +265,12 @@ typedef enum S390Opcode {
RX_STC = 0x42,
RX_STH = 0x40,
+ VRX_VL = 0xe706,
+ VRX_VLLEZ = 0xe704,
+ VRX_VST = 0xe70e,
+ VRX_VSTEF = 0xe70b,
+ VRX_VSTEG = 0xe70a,
+
NOP = 0x0707,
} S390Opcode;
@@ -412,6 +418,16 @@ static void * const qemu_st_helpers[16] = {
static const tcg_insn_unit *tb_ret_addr;
uint64_t s390_facilities[3];
+static inline bool is_general_reg(TCGReg r)
+{
+ return r <= TCG_REG_R15;
+}
+
+static inline bool is_vector_reg(TCGReg r)
+{
+ return r >= TCG_REG_V0 && r <= TCG_REG_V31;
+}
+
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
intptr_t value, intptr_t addend)
{
@@ -529,6 +545,31 @@ static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
#define tcg_out_insn_RX tcg_out_insn_RS
#define tcg_out_insn_RXY tcg_out_insn_RSY
+static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+ /*
+ * Shift bit 4 of each regno to its corresponding bit of RXB.
+ * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
+ * is the left-shift of the 4th operand.
+ */
+ return ((v1 & 0x10) << (4 + 3))
+ | ((v2 & 0x10) << (4 + 2))
+ | ((v3 & 0x10) << (4 + 1))
+ | ((v4 & 0x10) << (4 + 0));
+}
+
+static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
+ TCGReg b2, TCGReg x2, intptr_t d2, int m3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(x2));
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
+ tcg_out16(s, (b2 << 12) | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
/* Emit an opcode with "type-checking" of the format. */
#define tcg_out_insn(S, FMT, OP, ...) \
glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
@@ -705,25 +746,92 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
}
}
+static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
+ TCGReg data, TCGReg base, TCGReg index,
+ tcg_target_long ofs, int m3)
+{
+ if (ofs < 0 || ofs >= 0x1000) {
+ if (ofs >= -0x80000 && ofs < 0x80000) {
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
+ base = TCG_TMP0;
+ index = TCG_REG_NONE;
+ ofs = 0;
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
+ if (index != TCG_REG_NONE) {
+ tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+ }
+ index = TCG_TMP0;
+ ofs = 0;
+ }
+ }
+ tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
+}
/* load data without address translation or endianness conversion */
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
- TCGReg base, intptr_t ofs)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
+ TCGReg base, intptr_t ofs)
{
- if (type == TCG_TYPE_I32) {
- tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
- } else {
- tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
+ break;
+ }
+ tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
+ break;
+
+ case TCG_TYPE_I64:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+ break;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_V64:
+ tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
+ break;
+
+ case TCG_TYPE_V128:
+ /* Hint quadword aligned. */
+ tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
+ break;
+
+ default:
+ g_assert_not_reached();
}
}
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
- TCGReg base, intptr_t ofs)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
+ TCGReg base, intptr_t ofs)
{
- if (type == TCG_TYPE_I32) {
- tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
- } else {
- tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
+ } else {
+ tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
+ }
+ break;
+
+ case TCG_TYPE_I64:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+ break;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_V64:
+ tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
+ break;
+
+ case TCG_TYPE_V128:
+ /* Hint quadword aligned. */
+ tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
+ break;
+
+ default:
+ g_assert_not_reached();
}
}
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 07/16] tcg/s390x: Implement tcg_out_mov for vector types
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (5 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 06/16] tcg/s390x: Implement tcg_out_ld/st for vector types Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 08/16] tcg/s390x: Implement tcg_out_dup*_vec Richard Henderson
` (8 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 72 +++++++++++++++++++++++++++++++++++---
1 file changed, 68 insertions(+), 4 deletions(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index ea04aefe98..76061bfd80 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -265,6 +265,11 @@ typedef enum S390Opcode {
RX_STC = 0x42,
RX_STH = 0x40,
+ VRRa_VLR = 0xe756,
+
+ VRSb_VLVG = 0xe722,
+ VRSc_VLGV = 0xe721,
+
VRX_VL = 0xe706,
VRX_VLLEZ = 0xe704,
VRX_VST = 0xe70e,
@@ -558,6 +563,39 @@ static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
| ((v4 & 0x10) << (4 + 0));
}
+static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg v2, int m3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v2));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+ tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
+}
+
+static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
+ intptr_t d2, TCGReg b2, TCGReg r3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_debug_assert(is_general_reg(r3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
+ tcg_out16(s, b2 << 12 | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
+ intptr_t d2, TCGReg b2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_general_reg(r1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
+ tcg_out16(s, b2 << 12 | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
+}
+
static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
TCGReg b2, TCGReg x2, intptr_t d2, int m3)
{
@@ -591,12 +629,38 @@ static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
{
- if (src != dst) {
- if (type == TCG_TYPE_I32) {
+ if (src == dst) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (likely(is_general_reg(dst) && is_general_reg(src))) {
tcg_out_insn(s, RR, LR, dst, src);
- } else {
- tcg_out_insn(s, RRE, LGR, dst, src);
+ break;
}
+ /* fallthru */
+
+ case TCG_TYPE_I64:
+ if (likely(is_general_reg(dst))) {
+ if (likely(is_general_reg(src))) {
+ tcg_out_insn(s, RRE, LGR, dst, src);
+ } else {
+ tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
+ }
+ break;
+ } else if (is_general_reg(src)) {
+ tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
+ break;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ tcg_out_insn(s, VRRa, VLR, dst, src, 0);
+ break;
+
+ default:
+ g_assert_not_reached();
}
return true;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 08/16] tcg/s390x: Implement tcg_out_dup*_vec
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (6 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 07/16] tcg/s390x: Implement tcg_out_mov " Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 09/16] tcg/s390x: Implement minimal vector operations Richard Henderson
` (7 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 122 ++++++++++++++++++++++++++++++++++++-
1 file changed, 119 insertions(+), 3 deletions(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 76061bfd80..b9de4dc821 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -265,13 +265,20 @@ typedef enum S390Opcode {
RX_STC = 0x42,
RX_STH = 0x40,
+ VRIa_VGBM = 0xe744,
+ VRIa_VREPI = 0xe745,
+ VRIb_VGM = 0xe746,
+ VRIc_VREP = 0xe74d,
+
VRRa_VLR = 0xe756,
+ VRRf_VLVGP = 0xe762,
VRSb_VLVG = 0xe722,
VRSc_VLGV = 0xe721,
VRX_VL = 0xe706,
VRX_VLLEZ = 0xe704,
+ VRX_VLREP = 0xe705,
VRX_VST = 0xe70e,
VRX_VSTEF = 0xe70b,
VRX_VSTEG = 0xe70a,
@@ -563,6 +570,34 @@ static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
| ((v4 & 0x10) << (4 + 0));
}
+static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
+ TCGReg v1, uint16_t i2, int m3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
+ tcg_out16(s, i2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
+static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
+ TCGReg v1, uint8_t i2, uint8_t i3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
+ tcg_out16(s, (i2 << 8) | (i3 & 0xff));
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
+ TCGReg v1, uint16_t i2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
+ tcg_out16(s, i2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
+}
+
static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
TCGReg v1, TCGReg v2, int m3)
{
@@ -572,6 +607,17 @@ static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
}
+static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg r2, TCGReg r3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_general_reg(r2));
+ tcg_debug_assert(is_general_reg(r3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
+ tcg_out16(s, r3 << 12);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
+}
+
static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
intptr_t d2, TCGReg b2, TCGReg r3, int m4)
{
@@ -2501,19 +2547,89 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src)
{
- g_assert_not_reached();
+ if (is_general_reg(src)) {
+ /* Replicate general register into two MO_64. */
+ tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
+ if (vece == MO_64) {
+ return true;
+ }
+ }
+
+ /*
+ * Recall that the "standard" integer, within a vector, is the
+ * rightmost element of the leftmost doubleword, a-la VLLEZ.
+ */
+ tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
+ return true;
}
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset)
{
- g_assert_not_reached();
+ tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
+ return true;
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, int64_t val)
{
- g_assert_not_reached();
+ int i, mask, msb, lsb;
+
+ /* Look for int16_t elements. */
+ if (vece <= MO_16 ||
+ (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
+ tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
+ return;
+ }
+
+ /* Look for bit masks. */
+ if (vece == MO_32) {
+ if (risbg_mask((int32_t)val)) {
+ /* Handle wraparound by swapping msb and lsb. */
+ if ((val & 0x80000001u) == 0x80000001u) {
+ msb = 32 - ctz32(~val);
+ lsb = clz32(~val) - 1;
+ } else {
+ msb = clz32(val);
+ lsb = 31 - ctz32(val);
+ }
+ tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
+ return;
+ }
+ } else {
+ if (risbg_mask(val)) {
+ /* Handle wraparound by swapping msb and lsb. */
+ if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+ /* Handle wraparound by swapping msb and lsb. */
+ msb = 64 - ctz64(~val);
+ lsb = clz64(~val) - 1;
+ } else {
+ msb = clz64(val);
+ lsb = 63 - ctz64(val);
+ }
+ tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
+ return;
+ }
+ }
+
+ /* Look for all bytes 0x00 or 0xff. */
+ for (i = mask = 0; i < 8; i++) {
+ uint8_t byte = val >> (i * 8);
+ if (byte == 0xff) {
+ mask |= 1 << i;
+ } else if (byte != 0) {
+ break;
+ }
+ }
+ if (i == 8) {
+ tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
+ return;
+ }
+
+ /* Otherwise, stuff it in the constant pool. */
+ tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
+ new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
+ tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
}
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 09/16] tcg/s390x: Implement minimal vector operations
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (7 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 08/16] tcg/s390x: Implement tcg_out_dup*_vec Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 10/16] tcg/s390x: Implement andc, orc, abs, neg, not " Richard Henderson
` (6 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Implementing add, sub, and, or, xor as the minimal set.
This allows us to actually enable vectors in query_s390_facilities.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 154 ++++++++++++++++++++++++++++++++++++-
1 file changed, 150 insertions(+), 4 deletions(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index b9de4dc821..28729b6ffa 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -271,6 +271,14 @@ typedef enum S390Opcode {
VRIc_VREP = 0xe74d,
VRRa_VLR = 0xe756,
+ VRRc_VA = 0xe7f3,
+ VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
+ VRRc_VCH = 0xe7fb, /* " */
+ VRRc_VCHL = 0xe7f9, /* " */
+ VRRc_VN = 0xe768,
+ VRRc_VO = 0xe76a,
+ VRRc_VS = 0xe7f7,
+ VRRc_VX = 0xe76d,
VRRf_VLVGP = 0xe762,
VRSb_VLVG = 0xe722,
@@ -607,6 +615,17 @@ static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
}
+static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg v2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+ tcg_out16(s, v3 << 12);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
+}
+
static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
TCGReg v1, TCGReg r2, TCGReg r3)
{
@@ -2636,18 +2655,145 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg *args, const int *const_args)
{
- g_assert_not_reached();
+ TCGType type = vecl + TCG_TYPE_V64;
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ break;
+
+ case INDEX_op_add_vec:
+ tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
+ break;
+ case INDEX_op_sub_vec:
+ tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
+ break;
+ case INDEX_op_and_vec:
+ tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
+ break;
+ case INDEX_op_or_vec:
+ tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
+ break;
+ case INDEX_op_xor_vec:
+ tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
+ break;
+
+ case INDEX_op_cmp_vec:
+ switch ((TCGCond)args[3]) {
+ case TCG_COND_EQ:
+ tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
+ break;
+ case TCG_COND_GT:
+ tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
}
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
{
- return 0;
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_xor_vec:
+ return 1;
+ case INDEX_op_cmp_vec:
+ return -1;
+ default:
+ return 0;
+ }
+}
+
+static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+ bool need_swap = false, need_inv = false;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ break;
+ case TCG_COND_NE:
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ need_inv = true;
+ break;
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ need_swap = true;
+ break;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ need_swap = need_inv = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (need_inv) {
+ cond = tcg_invert_cond(cond);
+ }
+ if (need_swap) {
+ TCGv_vec t1;
+ t1 = v1, v1 = v2, v2 = t1;
+ cond = tcg_swap_cond(cond);
+ }
+
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
+
+ return need_inv;
+}
+
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+ if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
+ tcg_gen_not_vec(vece, v0, v0);
+ }
}
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
- g_assert_not_reached();
+ va_list va;
+ TCGv_vec v0, v1, v2;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+
+ switch (opc) {
+ case INDEX_op_cmp_vec:
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ va_end(va);
}
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
@@ -2839,7 +2985,7 @@ static void query_s390_facilities(void)
* There is nothing else we currently care about in the 3rd word, so
* disable VECTOR with one store.
*/
- if (1 || !(hwcap & HWCAP_S390_VXRS)) {
+ if (!(hwcap & HWCAP_S390_VXRS)) {
s390_facilities[2] = 0;
}
}
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 10/16] tcg/s390x: Implement andc, orc, abs, neg, not vector operations
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (8 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 09/16] tcg/s390x: Implement minimal vector operations Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-16 8:12 ` David Hildenbrand
2021-09-15 21:31 ` [PATCH v5 11/16] tcg/s390x: Implement TCG_TARGET_HAS_mul_vec Richard Henderson
` (5 subsequent siblings)
15 siblings, 1 reply; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
These logical and arithmetic operations are optional but trivial.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 1 +
tcg/s390x/tcg-target.h | 11 ++++++-----
tcg/s390x/tcg-target.c.inc | 32 ++++++++++++++++++++++++++++++++
3 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index ce9432cfe3..cb953896d5 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -17,6 +17,7 @@ C_O0_I2(v, r)
C_O1_I1(r, L)
C_O1_I1(r, r)
C_O1_I1(v, r)
+C_O1_I1(v, v)
C_O1_I1(v, vr)
C_O1_I2(r, 0, ri)
C_O1_I2(r, 0, rI)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 5a03c5f2f4..a42074e451 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -64,6 +64,7 @@ typedef enum TCGReg {
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
#define FACILITY_LOAD_ON_COND2 53
#define FACILITY_VECTOR 129
+#define FACILITY_VECTOR_ENH1 135
extern uint64_t s390_facilities[3];
@@ -142,11 +143,11 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_v256 0
-#define TCG_TARGET_HAS_andc_vec 0
-#define TCG_TARGET_HAS_orc_vec 0
-#define TCG_TARGET_HAS_not_vec 0
-#define TCG_TARGET_HAS_neg_vec 0
-#define TCG_TARGET_HAS_abs_vec 0
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec 1
+#define TCG_TARGET_HAS_abs_vec 1
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 28729b6ffa..fa4a142818 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -270,13 +270,18 @@ typedef enum S390Opcode {
VRIb_VGM = 0xe746,
VRIc_VREP = 0xe74d,
+ VRRa_VLC = 0xe7de,
+ VRRa_VLP = 0xe7df,
VRRa_VLR = 0xe756,
VRRc_VA = 0xe7f3,
VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
VRRc_VCH = 0xe7fb, /* " */
VRRc_VCHL = 0xe7f9, /* " */
VRRc_VN = 0xe768,
+ VRRc_VNC = 0xe769,
+ VRRc_VNO = 0xe76b,
VRRc_VO = 0xe76a,
+ VRRc_VOC = 0xe76f,
VRRc_VS = 0xe7f7,
VRRc_VX = 0xe76d,
VRRf_VLVGP = 0xe762,
@@ -2669,6 +2674,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
+ case INDEX_op_abs_vec:
+ tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
+ break;
+ case INDEX_op_neg_vec:
+ tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
+ break;
+ case INDEX_op_not_vec:
+ tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
+ break;
+
case INDEX_op_add_vec:
tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
break;
@@ -2678,9 +2693,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_and_vec:
tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
break;
+ case INDEX_op_andc_vec:
+ tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
+ break;
case INDEX_op_or_vec:
tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
break;
+ case INDEX_op_orc_vec:
+ tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
+ break;
case INDEX_op_xor_vec:
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
break;
@@ -2711,9 +2732,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
{
switch (opc) {
+ case INDEX_op_abs_vec:
case INDEX_op_add_vec:
case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
case INDEX_op_sub_vec:
case INDEX_op_xor_vec:
return 1;
@@ -2943,10 +2969,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
return C_O1_I1(v, r);
case INDEX_op_dup_vec:
return C_O1_I1(v, vr);
+ case INDEX_op_abs_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
+ return C_O1_I1(v, v);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
case INDEX_op_xor_vec:
case INDEX_op_cmp_vec:
return C_O1_I2(v, v, v);
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v5 10/16] tcg/s390x: Implement andc, orc, abs, neg, not vector operations
2021-09-15 21:31 ` [PATCH v5 10/16] tcg/s390x: Implement andc, orc, abs, neg, not " Richard Henderson
@ 2021-09-16 8:12 ` David Hildenbrand
0 siblings, 0 replies; 23+ messages in thread
From: David Hildenbrand @ 2021-09-16 8:12 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 15.09.21 23:31, Richard Henderson wrote:
> These logical and arithmetic operations are optional but trivial.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/s390x/tcg-target-con-set.h | 1 +
> tcg/s390x/tcg-target.h | 11 ++++++-----
> tcg/s390x/tcg-target.c.inc | 32 ++++++++++++++++++++++++++++++++
> 3 files changed, 39 insertions(+), 5 deletions(-)
>
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v5 11/16] tcg/s390x: Implement TCG_TARGET_HAS_mul_vec
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (9 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 10/16] tcg/s390x: Implement andc, orc, abs, neg, not " Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 12/16] tcg/s390x: Implement vector shift operations Richard Henderson
` (4 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 2 +-
tcg/s390x/tcg-target.c.inc | 7 +++++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index a42074e451..1c581a2f60 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -154,7 +154,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 0
-#define TCG_TARGET_HAS_mul_vec 0
+#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 0
#define TCG_TARGET_HAS_minmax_vec 0
#define TCG_TARGET_HAS_bitsel_vec 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index fa4a142818..aa75d779be 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -277,6 +277,7 @@ typedef enum S390Opcode {
VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
VRRc_VCH = 0xe7fb, /* " */
VRRc_VCHL = 0xe7f9, /* " */
+ VRRc_VML = 0xe7a2,
VRRc_VN = 0xe768,
VRRc_VNC = 0xe769,
VRRc_VNO = 0xe76b,
@@ -2696,6 +2697,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_andc_vec:
tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
break;
+ case INDEX_op_mul_vec:
+ tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
+ break;
case INDEX_op_or_vec:
tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
break;
@@ -2745,6 +2749,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
return 1;
case INDEX_op_cmp_vec:
return -1;
+ case INDEX_op_mul_vec:
+ return vece < MO_64;
default:
return 0;
}
@@ -2981,6 +2987,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_orc_vec:
case INDEX_op_xor_vec:
case INDEX_op_cmp_vec:
+ case INDEX_op_mul_vec:
return C_O1_I2(v, v, v);
default:
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 12/16] tcg/s390x: Implement vector shift operations
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (10 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 11/16] tcg/s390x: Implement TCG_TARGET_HAS_mul_vec Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 13/16] tcg/s390x: Implement TCG_TARGET_HAS_minmax_vec Richard Henderson
` (3 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 1 +
tcg/s390x/tcg-target.h | 12 ++---
tcg/s390x/tcg-target.c.inc | 93 +++++++++++++++++++++++++++++++++-
3 files changed, 99 insertions(+), 7 deletions(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index cb953896d5..49b98f33b9 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -24,6 +24,7 @@ C_O1_I2(r, 0, rI)
C_O1_I2(r, 0, rJ)
C_O1_I2(r, r, ri)
C_O1_I2(r, rZ, r)
+C_O1_I2(v, v, r)
C_O1_I2(v, v, v)
C_O1_I4(r, r, ri, r, 0)
C_O1_I4(r, r, ri, rI, 0)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 1c581a2f60..d7d204b782 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -148,12 +148,12 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
-#define TCG_TARGET_HAS_roti_vec 0
-#define TCG_TARGET_HAS_rots_vec 0
-#define TCG_TARGET_HAS_rotv_vec 0
-#define TCG_TARGET_HAS_shi_vec 0
-#define TCG_TARGET_HAS_shs_vec 0
-#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_roti_vec 1
+#define TCG_TARGET_HAS_rots_vec 1
+#define TCG_TARGET_HAS_rotv_vec 1
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 1
+#define TCG_TARGET_HAS_shv_vec 1
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 0
#define TCG_TARGET_HAS_minmax_vec 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index aa75d779be..a56733e09a 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -277,6 +277,10 @@ typedef enum S390Opcode {
VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
VRRc_VCH = 0xe7fb, /* " */
VRRc_VCHL = 0xe7f9, /* " */
+ VRRc_VERLLV = 0xe773,
+ VRRc_VESLV = 0xe770,
+ VRRc_VESRAV = 0xe77a,
+ VRRc_VESRLV = 0xe778,
VRRc_VML = 0xe7a2,
VRRc_VN = 0xe768,
VRRc_VNC = 0xe769,
@@ -287,6 +291,10 @@ typedef enum S390Opcode {
VRRc_VX = 0xe76d,
VRRf_VLVGP = 0xe762,
+ VRSa_VERLL = 0xe733,
+ VRSa_VESL = 0xe730,
+ VRSa_VESRA = 0xe73a,
+ VRSa_VESRL = 0xe738,
VRSb_VLVG = 0xe722,
VRSc_VLGV = 0xe721,
@@ -643,6 +651,18 @@ static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
}
+static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
+ intptr_t d2, TCGReg b2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
+ tcg_out16(s, b2 << 12 | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
+}
+
static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
intptr_t d2, TCGReg b2, TCGReg r3, int m4)
{
@@ -2710,6 +2730,43 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
break;
+ case INDEX_op_shli_vec:
+ tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_shri_vec:
+ tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_sari_vec:
+ tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_rotli_vec:
+ tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_shls_vec:
+ tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_shrs_vec:
+ tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_sars_vec:
+ tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_rotls_vec:
+ tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_shlv_vec:
+ tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
+ break;
+ case INDEX_op_shrv_vec:
+ tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
+ break;
+ case INDEX_op_sarv_vec:
+ tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
+ break;
+ case INDEX_op_rotlv_vec:
+ tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
+ break;
+
case INDEX_op_cmp_vec:
switch ((TCGCond)args[3]) {
case TCG_COND_EQ:
@@ -2744,10 +2801,23 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_not_vec:
case INDEX_op_or_vec:
case INDEX_op_orc_vec:
+ case INDEX_op_rotli_vec:
+ case INDEX_op_rotls_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_sars_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shls_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_shrv_vec:
case INDEX_op_sub_vec:
case INDEX_op_xor_vec:
return 1;
case INDEX_op_cmp_vec:
+ case INDEX_op_rotrv_vec:
return -1;
case INDEX_op_mul_vec:
return vece < MO_64;
@@ -2810,7 +2880,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
- TCGv_vec v0, v1, v2;
+ TCGv_vec v0, v1, v2, t0;
va_start(va, a0);
v0 = temp_tcgv_vec(arg_temp(a0));
@@ -2822,6 +2892,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
break;
+ case INDEX_op_rotrv_vec:
+ t0 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t0, v2);
+ tcg_gen_rotlv_vec(vece, v0, v1, t0);
+ tcg_temp_free_vec(t0);
+ break;
+
default:
g_assert_not_reached();
}
@@ -2978,6 +3055,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_abs_vec:
case INDEX_op_neg_vec:
case INDEX_op_not_vec:
+ case INDEX_op_rotli_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
return C_O1_I1(v, v);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
@@ -2988,7 +3069,17 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_xor_vec:
case INDEX_op_cmp_vec:
case INDEX_op_mul_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
return C_O1_I2(v, v, v);
+ case INDEX_op_rotls_vec:
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_sars_vec:
+ return C_O1_I2(v, v, r);
default:
g_assert_not_reached();
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 13/16] tcg/s390x: Implement TCG_TARGET_HAS_minmax_vec
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (11 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 12/16] tcg/s390x: Implement vector shift operations Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 14/16] tcg/s390x: Implement TCG_TARGET_HAS_sat_vec Richard Henderson
` (2 subsequent siblings)
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 2 +-
tcg/s390x/tcg-target.c.inc | 25 +++++++++++++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index d7d204b782..a79f4f187a 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -156,7 +156,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_shv_vec 1
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 0
-#define TCG_TARGET_HAS_minmax_vec 0
+#define TCG_TARGET_HAS_minmax_vec 1
#define TCG_TARGET_HAS_bitsel_vec 0
#define TCG_TARGET_HAS_cmpsel_vec 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index a56733e09a..c3bd22b116 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -282,6 +282,10 @@ typedef enum S390Opcode {
VRRc_VESRAV = 0xe77a,
VRRc_VESRLV = 0xe778,
VRRc_VML = 0xe7a2,
+ VRRc_VMN = 0xe7fe,
+ VRRc_VMNL = 0xe7fc,
+ VRRc_VMX = 0xe7ff,
+ VRRc_VMXL = 0xe7fd,
VRRc_VN = 0xe768,
VRRc_VNC = 0xe769,
VRRc_VNO = 0xe76b,
@@ -2767,6 +2771,19 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
break;
+ case INDEX_op_smin_vec:
+ tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
+ break;
+ case INDEX_op_smax_vec:
+ tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
+ break;
+ case INDEX_op_umin_vec:
+ tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
+ break;
+ case INDEX_op_umax_vec:
+ tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
+ break;
+
case INDEX_op_cmp_vec:
switch ((TCGCond)args[3]) {
case TCG_COND_EQ:
@@ -2813,7 +2830,11 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shri_vec:
case INDEX_op_shrs_vec:
case INDEX_op_shrv_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
case INDEX_op_sub_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
case INDEX_op_xor_vec:
return 1;
case INDEX_op_cmp_vec:
@@ -3074,6 +3095,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
return C_O1_I2(v, v, v);
case INDEX_op_rotls_vec:
case INDEX_op_shls_vec:
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 14/16] tcg/s390x: Implement TCG_TARGET_HAS_sat_vec
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (12 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 13/16] tcg/s390x: Implement TCG_TARGET_HAS_minmax_vec Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 15/16] tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec Richard Henderson
2021-09-15 21:31 ` [PATCH v5 16/16] tcg/s390x: Implement TCG_TARGET_HAS_cmpsel_vec Richard Henderson
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
The unsigned saturations are handled via generic code
using min/max. The signed saturations are expanded using
double-sized arithmetic and a saturating pack.
Since all operations are done via expansion, do not
actually set TCG_TARGET_HAS_sat_vec.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.opc.h | 3 ++
tcg/s390x/tcg-target.c.inc | 63 ++++++++++++++++++++++++++++++++++++++
2 files changed, 66 insertions(+)
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target.opc.h
index 67afc82a93..0eb2350fb3 100644
--- a/tcg/s390x/tcg-target.opc.h
+++ b/tcg/s390x/tcg-target.opc.h
@@ -10,3 +10,6 @@
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
* consider these to be UNSPEC with names.
*/
+DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index c3bd22b116..4a56532011 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -291,7 +291,10 @@ typedef enum S390Opcode {
VRRc_VNO = 0xe76b,
VRRc_VO = 0xe76a,
VRRc_VOC = 0xe76f,
+ VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
VRRc_VS = 0xe7f7,
+ VRRa_VUPH = 0xe7d7,
+ VRRa_VUPL = 0xe7d6,
VRRc_VX = 0xe76d,
VRRf_VLVGP = 0xe762,
@@ -2800,6 +2803,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
}
break;
+ case INDEX_op_s390_vuph_vec:
+ tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
+ break;
+ case INDEX_op_s390_vupl_vec:
+ tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
+ break;
+ case INDEX_op_s390_vpks_vec:
+ tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
+ break;
+
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
@@ -2842,6 +2855,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
return -1;
case INDEX_op_mul_vec:
return vece < MO_64;
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ return vece < MO_64 ? -1 : 0;
default:
return 0;
}
@@ -2897,6 +2913,43 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
}
}
+static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
+{
+ TCGv_vec h1 = tcg_temp_new_vec(type);
+ TCGv_vec h2 = tcg_temp_new_vec(type);
+ TCGv_vec l1 = tcg_temp_new_vec(type);
+ TCGv_vec l2 = tcg_temp_new_vec(type);
+
+ tcg_debug_assert (vece < MO_64);
+
+ /* Unpack with sign-extension. */
+ vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+ tcgv_vec_arg(h1), tcgv_vec_arg(v1));
+ vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+ tcgv_vec_arg(h2), tcgv_vec_arg(v2));
+
+ vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+ tcgv_vec_arg(l1), tcgv_vec_arg(v1));
+ vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+ tcgv_vec_arg(l2), tcgv_vec_arg(v2));
+
+ /* Arithmetic on a wider element size. */
+ vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
+ tcgv_vec_arg(h1), tcgv_vec_arg(h2));
+ vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
+ tcgv_vec_arg(l1), tcgv_vec_arg(l2));
+
+ /* Pack with saturation. */
+ vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
+ tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
+
+ tcg_temp_free_vec(h1);
+ tcg_temp_free_vec(h2);
+ tcg_temp_free_vec(l1);
+ tcg_temp_free_vec(l2);
+}
+
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
@@ -2920,6 +2973,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
tcg_temp_free_vec(t0);
break;
+ case INDEX_op_ssadd_vec:
+ expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
+ break;
+ case INDEX_op_sssub_vec:
+ expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
+ break;
+
default:
g_assert_not_reached();
}
@@ -3080,6 +3140,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_sari_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
+ case INDEX_op_s390_vuph_vec:
+ case INDEX_op_s390_vupl_vec:
return C_O1_I1(v, v);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
@@ -3099,6 +3161,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_smin_vec:
case INDEX_op_umax_vec:
case INDEX_op_umin_vec:
+ case INDEX_op_s390_vpks_vec:
return C_O1_I2(v, v, v);
case INDEX_op_rotls_vec:
case INDEX_op_shls_vec:
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 15/16] tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (13 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 14/16] tcg/s390x: Implement TCG_TARGET_HAS_sat_vec Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
2021-09-15 21:31 ` [PATCH v5 16/16] tcg/s390x: Implement TCG_TARGET_HAS_cmpsel_vec Richard Henderson
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 1 +
tcg/s390x/tcg-target.h | 2 +-
tcg/s390x/tcg-target.c.inc | 20 ++++++++++++++++++++
3 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 49b98f33b9..426dd92e51 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -26,6 +26,7 @@ C_O1_I2(r, r, ri)
C_O1_I2(r, rZ, r)
C_O1_I2(v, v, r)
C_O1_I2(v, v, v)
+C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, r, 0)
C_O1_I4(r, r, ri, rI, 0)
C_O2_I2(b, a, 0, r)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index a79f4f187a..527ada0f63 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -157,7 +157,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 0
#define TCG_TARGET_HAS_minmax_vec 1
-#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_bitsel_vec 1
#define TCG_TARGET_HAS_cmpsel_vec 0
/* used for function call generation */
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 4a56532011..5530c974a6 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -296,6 +296,7 @@ typedef enum S390Opcode {
VRRa_VUPH = 0xe7d7,
VRRa_VUPL = 0xe7d6,
VRRc_VX = 0xe76d,
+ VRRe_VSEL = 0xe78d,
VRRf_VLVGP = 0xe762,
VRSa_VERLL = 0xe733,
@@ -647,6 +648,18 @@ static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
}
+static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_debug_assert(is_vector_reg(v4));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+ tcg_out16(s, v3 << 12);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
+}
+
static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
TCGReg v1, TCGReg r2, TCGReg r3)
{
@@ -2787,6 +2800,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
break;
+ case INDEX_op_bitsel_vec:
+ tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
+ break;
+
case INDEX_op_cmp_vec:
switch ((TCGCond)args[3]) {
case TCG_COND_EQ:
@@ -2827,6 +2844,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_add_vec:
case INDEX_op_and_vec:
case INDEX_op_andc_vec:
+ case INDEX_op_bitsel_vec:
case INDEX_op_neg_vec:
case INDEX_op_not_vec:
case INDEX_op_or_vec:
@@ -3168,6 +3186,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_shrs_vec:
case INDEX_op_sars_vec:
return C_O1_I2(v, v, r);
+ case INDEX_op_bitsel_vec:
+ return C_O1_I3(v, v, v, v);
default:
g_assert_not_reached();
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v5 16/16] tcg/s390x: Implement TCG_TARGET_HAS_cmpsel_vec
2021-09-15 21:30 [PATCH v5 00/16] tcg/s390x: host vector support Richard Henderson
` (14 preceding siblings ...)
2021-09-15 21:31 ` [PATCH v5 15/16] tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec Richard Henderson
@ 2021-09-15 21:31 ` Richard Henderson
15 siblings, 0 replies; 23+ messages in thread
From: Richard Henderson @ 2021-09-15 21:31 UTC (permalink / raw)
To: qemu-devel; +Cc: david
This is via expansion; don't actually set TCG_TARGET_HAS_cmpsel_vec.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 5530c974a6..aef24d0795 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -2869,6 +2869,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_xor_vec:
return 1;
case INDEX_op_cmp_vec:
+ case INDEX_op_cmpsel_vec:
case INDEX_op_rotrv_vec:
return -1;
case INDEX_op_mul_vec:
@@ -2931,6 +2932,21 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
}
}
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec c1, TCGv_vec c2,
+ TCGv_vec v3, TCGv_vec v4, TCGCond cond)
+{
+ TCGv_vec t = tcg_temp_new_vec(type);
+
+ if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
+ /* Invert the sense of the compare by swapping arguments. */
+ tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
+ } else {
+ tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
+ }
+ tcg_temp_free_vec(t);
+}
+
static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
{
@@ -2972,7 +2988,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
- TCGv_vec v0, v1, v2, t0;
+ TCGv_vec v0, v1, v2, v3, v4, t0;
va_start(va, a0);
v0 = temp_tcgv_vec(arg_temp(a0));
@@ -2984,6 +3000,12 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
break;
+ case INDEX_op_cmpsel_vec:
+ v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
+ break;
+
case INDEX_op_rotrv_vec:
t0 = tcg_temp_new_vec(type);
tcg_gen_neg_vec(vece, t0, v2);
--
2.25.1
^ permalink raw reply related [flat|nested] 23+ messages in thread