qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] tcg: Add 32-bit vector operations
@ 2021-06-24 10:50 LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

This patch set is split from RISC-V Packed extension where needs
some i32 vector operations, accorind to Richard Henderson's suggestion.

The original implementation is on
https://www.mail-archive.com/qemu-devel@nongnu.org/msg814538.html.

LIU Zhiwei (5):
  tcg: Add tcg_gen_vec_add{sub}16_i32
  tcg: Add tcg_gen_vec_add{sub}8_i32
  tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32
  tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32
  tcg: Implement tcg_gen_vec_add{sub}32_tl

 include/tcg/tcg-op-gvec.h |  43 ++++++++++++++
 tcg/tcg-op-gvec.c         | 122 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+)

-- 
2.17.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32 LIU Zhiwei
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_add{sub}16_tl by adding corresponding i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 13 +++++++++++++
 tcg/tcg-op-gvec.c         | 28 ++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index c69a7de984..9b67822f54 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -401,4 +401,17 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
 void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 
+/* 32-bit vector operations. */
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#else
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#endif
+
 #endif
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 498a959839..a8898ba7bf 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -1742,6 +1742,20 @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, a, ~0xffff);
+    tcg_gen_add_i32(t2, a, b);
+    tcg_gen_add_i32(t1, t1, b);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1892,6 +1906,20 @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, b, ~0xffff);
+    tcg_gen_sub_i32(t2, a, b);
+    tcg_gen_sub_i32(t1, a, t1);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32 LIU Zhiwei
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_add{sub}8_tl by adding corresponging i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h |  6 ++++++
 tcg/tcg-op-gvec.c         | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 9b67822f54..2d5ad6ce12 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -402,14 +402,20 @@ void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 
 /* 32-bit vector operations. */
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
 #if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i64
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
 #else
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
 #endif
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index a8898ba7bf..78b86194a7 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -1736,6 +1736,25 @@ void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
+
+    tcg_gen_andc_i32(t1, a, m);
+    tcg_gen_andc_i32(t2, b, m);
+    tcg_gen_xor_i32(t3, a, b);
+    tcg_gen_add_i32(d, t1, t2);
+    tcg_gen_and_i32(t3, t3, m);
+    tcg_gen_xor_i32(d, d, t3);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+}
+
 void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
@@ -1900,6 +1919,25 @@ void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
+
+    tcg_gen_or_i32(t1, a, m);
+    tcg_gen_andc_i32(t2, b, m);
+    tcg_gen_eqv_i32(t3, a, b);
+    tcg_gen_sub_i32(d, t1, t2);
+    tcg_gen_and_i32(t3, t3, m);
+    tcg_gen_xor_i32(d, d, t3);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+}
+
 void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32 LIU Zhiwei
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_shl{shr}{sar}16i_tl by adding corresponging i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 10 ++++++++++
 tcg/tcg-op-gvec.c         | 28 ++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 2d5ad6ce12..e3c9f45926 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -408,16 +408,26 @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+
 #if TARGET_LONG_BITS == 64
 #define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i64
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
 #else
 #define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
 #endif
 
 #endif
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 78b86194a7..c2ce05e9ee 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2678,6 +2678,13 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_16, 0xffff << c);
+    tcg_gen_shli_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
@@ -2729,6 +2736,13 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_16, 0xffff >> c);
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
@@ -2794,6 +2808,20 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_temp_free_i64(s);
 }
 
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
+    uint32_t c_mask = dup_const(MO_16, 0xffff >> c);
+    TCGv_i32 s = tcg_temp_new_i32();
+
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(s, d, s_mask);  /* isolate (shifted) sign bit */
+    tcg_gen_andi_i32(d, d, c_mask);  /* clear out bits above sign  */
+    tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+    tcg_gen_or_i32(d, d, s);         /* include sign extension */
+    tcg_temp_free_i32(s);
+}
+
 void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
                   ` (2 preceding siblings ...)
  2021-06-24 10:50 ` [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl LIU Zhiwei
  2021-06-26  3:53 ` [PATCH 0/5] tcg: Add 32-bit vector operations Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_shl{shr}{sar}8i_tl by adding corresponging i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 10 ++++++++++
 tcg/tcg-op-gvec.c         | 28 ++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index e3c9f45926..e3b274502c 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -408,8 +408,11 @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 
 #if TARGET_LONG_BITS == 64
@@ -417,14 +420,21 @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
 #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
+
 #else
 #define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
 #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index c2ce05e9ee..fabad7cc00 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2678,6 +2678,13 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_8, 0xff << c);
+    tcg_gen_shli_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
 {
     uint32_t mask = dup_const(MO_16, 0xffff << c);
@@ -2736,6 +2743,13 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_8, 0xff >> c);
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
 {
     uint32_t mask = dup_const(MO_16, 0xffff >> c);
@@ -2808,6 +2822,20 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_temp_free_i64(s);
 }
 
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t s_mask = dup_const(MO_8, 0x80 >> c);
+    uint32_t c_mask = dup_const(MO_8, 0xff >> c);
+    TCGv_i32 s = tcg_temp_new_i32();
+
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(s, d, s_mask);  /* isolate (shifted) sign bit */
+    tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+    tcg_gen_andi_i32(d, d, c_mask);  /* clear out bits above sign  */
+    tcg_gen_or_i32(d, d, s);         /* include sign extension */
+    tcg_temp_free_i32(s);
+}
+
 void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
 {
     uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
                   ` (3 preceding siblings ...)
  2021-06-24 10:50 ` [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-26  3:53 ` [PATCH 0/5] tcg: Add 32-bit vector operations Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index e3b274502c..da55fed870 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -420,6 +420,8 @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_add32_tl tcg_gen_vec_add32_i64
+#define tcg_gen_vec_sub32_tl tcg_gen_vec_sub32_i64
 #define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
 #define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
 #define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
@@ -432,6 +434,8 @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_add32_tl tcg_gen_add_i32
+#define tcg_gen_vec_sub32_tl tcg_gen_sub_i32
 #define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
 #define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
 #define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/5] tcg: Add 32-bit vector operations
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
                   ` (4 preceding siblings ...)
  2021-06-24 10:50 ` [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl LIU Zhiwei
@ 2021-06-26  3:53 ` Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2021-06-26  3:53 UTC (permalink / raw)
  To: LIU Zhiwei, qemu-devel

On 6/24/21 3:50 AM, LIU Zhiwei wrote:
> This patch set is split from RISC-V Packed extension where needs
> some i32 vector operations, accorind to Richard Henderson's suggestion.
> 
> The original implementation is on
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg814538.html.
> 
> LIU Zhiwei (5):
>    tcg: Add tcg_gen_vec_add{sub}16_i32
>    tcg: Add tcg_gen_vec_add{sub}8_i32
>    tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32
>    tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32
>    tcg: Implement tcg_gen_vec_add{sub}32_tl
> 
>   include/tcg/tcg-op-gvec.h |  43 ++++++++++++++
>   tcg/tcg-op-gvec.c         | 122 ++++++++++++++++++++++++++++++++++++++
>   2 files changed, 165 insertions(+)
> 

Queued to tcg-next, thanks.

r~


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-06-26  3:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl LIU Zhiwei
2021-06-26  3:53 ` [PATCH 0/5] tcg: Add 32-bit vector operations Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).