All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] tcg: Add 32-bit vector operations
@ 2021-06-24 10:50 LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

This patch set is split from RISC-V Packed extension where needs
some i32 vector operations, accorind to Richard Henderson's suggestion.

The original implementation is on
https://www.mail-archive.com/qemu-devel@nongnu.org/msg814538.html.

LIU Zhiwei (5):
  tcg: Add tcg_gen_vec_add{sub}16_i32
  tcg: Add tcg_gen_vec_add{sub}8_i32
  tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32
  tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32
  tcg: Implement tcg_gen_vec_add{sub}32_tl

 include/tcg/tcg-op-gvec.h |  43 ++++++++++++++
 tcg/tcg-op-gvec.c         | 122 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+)

-- 
2.17.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32 LIU Zhiwei
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_add{sub}16_tl by adding corresponding i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 13 +++++++++++++
 tcg/tcg-op-gvec.c         | 28 ++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index c69a7de984..9b67822f54 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -401,4 +401,17 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
 void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 
+/* 32-bit vector operations. */
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#else
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#endif
+
 #endif
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 498a959839..a8898ba7bf 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -1742,6 +1742,20 @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, a, ~0xffff);
+    tcg_gen_add_i32(t2, a, b);
+    tcg_gen_add_i32(t1, t1, b);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1892,6 +1906,20 @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, b, ~0xffff);
+    tcg_gen_sub_i32(t2, a, b);
+    tcg_gen_sub_i32(t1, a, t1);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32 LIU Zhiwei
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_add{sub}8_tl by adding corresponging i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h |  6 ++++++
 tcg/tcg-op-gvec.c         | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 9b67822f54..2d5ad6ce12 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -402,14 +402,20 @@ void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 
 /* 32-bit vector operations. */
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
 #if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i64
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
 #else
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
 #endif
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index a8898ba7bf..78b86194a7 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -1736,6 +1736,25 @@ void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
+
+    tcg_gen_andc_i32(t1, a, m);
+    tcg_gen_andc_i32(t2, b, m);
+    tcg_gen_xor_i32(t3, a, b);
+    tcg_gen_add_i32(d, t1, t2);
+    tcg_gen_and_i32(t3, t3, m);
+    tcg_gen_xor_i32(d, d, t3);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+}
+
 void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
@@ -1900,6 +1919,25 @@ void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
+
+    tcg_gen_or_i32(t1, a, m);
+    tcg_gen_andc_i32(t2, b, m);
+    tcg_gen_eqv_i32(t3, a, b);
+    tcg_gen_sub_i32(d, t1, t2);
+    tcg_gen_and_i32(t3, t3, m);
+    tcg_gen_xor_i32(d, d, t3);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+}
+
 void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32 LIU Zhiwei
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_shl{shr}{sar}16i_tl by adding corresponging i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 10 ++++++++++
 tcg/tcg-op-gvec.c         | 28 ++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 2d5ad6ce12..e3c9f45926 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -408,16 +408,26 @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+
 #if TARGET_LONG_BITS == 64
 #define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i64
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
 #else
 #define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
 #endif
 
 #endif
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 78b86194a7..c2ce05e9ee 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2678,6 +2678,13 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_16, 0xffff << c);
+    tcg_gen_shli_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
@@ -2729,6 +2736,13 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_16, 0xffff >> c);
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
@@ -2794,6 +2808,20 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_temp_free_i64(s);
 }
 
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
+    uint32_t c_mask = dup_const(MO_16, 0xffff >> c);
+    TCGv_i32 s = tcg_temp_new_i32();
+
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(s, d, s_mask);  /* isolate (shifted) sign bit */
+    tcg_gen_andi_i32(d, d, c_mask);  /* clear out bits above sign  */
+    tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+    tcg_gen_or_i32(d, d, s);         /* include sign extension */
+    tcg_temp_free_i32(s);
+}
+
 void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
                   ` (2 preceding siblings ...)
  2021-06-24 10:50 ` [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-24 10:50 ` [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl LIU Zhiwei
  2021-06-26  3:53 ` [PATCH 0/5] tcg: Add 32-bit vector operations Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Implement tcg_gen_vec_shl{shr}{sar}8i_tl by adding corresponging i32 OP.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 10 ++++++++++
 tcg/tcg-op-gvec.c         | 28 ++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index e3c9f45926..e3b274502c 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -408,8 +408,11 @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
 
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 
 #if TARGET_LONG_BITS == 64
@@ -417,14 +420,21 @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
 #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
+
 #else
 #define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
 #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index c2ce05e9ee..fabad7cc00 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2678,6 +2678,13 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_8, 0xff << c);
+    tcg_gen_shli_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
 {
     uint32_t mask = dup_const(MO_16, 0xffff << c);
@@ -2736,6 +2743,13 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_8, 0xff >> c);
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
 {
     uint32_t mask = dup_const(MO_16, 0xffff >> c);
@@ -2808,6 +2822,20 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_temp_free_i64(s);
 }
 
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t s_mask = dup_const(MO_8, 0x80 >> c);
+    uint32_t c_mask = dup_const(MO_8, 0xff >> c);
+    TCGv_i32 s = tcg_temp_new_i32();
+
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(s, d, s_mask);  /* isolate (shifted) sign bit */
+    tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+    tcg_gen_andi_i32(d, d, c_mask);  /* clear out bits above sign  */
+    tcg_gen_or_i32(d, d, s);         /* include sign extension */
+    tcg_temp_free_i32(s);
+}
+
 void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
 {
     uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
                   ` (3 preceding siblings ...)
  2021-06-24 10:50 ` [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32 LIU Zhiwei
@ 2021-06-24 10:50 ` LIU Zhiwei
  2021-06-26  3:53 ` [PATCH 0/5] tcg: Add 32-bit vector operations Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: LIU Zhiwei @ 2021-06-24 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson, LIU Zhiwei

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 include/tcg/tcg-op-gvec.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index e3b274502c..da55fed870 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -420,6 +420,8 @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_add32_tl tcg_gen_vec_add32_i64
+#define tcg_gen_vec_sub32_tl tcg_gen_vec_sub32_i64
 #define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
 #define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
 #define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
@@ -432,6 +434,8 @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
 #define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_add32_tl tcg_gen_add_i32
+#define tcg_gen_vec_sub32_tl tcg_gen_sub_i32
 #define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
 #define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
 #define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/5] tcg: Add 32-bit vector operations
  2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
                   ` (4 preceding siblings ...)
  2021-06-24 10:50 ` [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl LIU Zhiwei
@ 2021-06-26  3:53 ` Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2021-06-26  3:53 UTC (permalink / raw)
  To: LIU Zhiwei, qemu-devel

On 6/24/21 3:50 AM, LIU Zhiwei wrote:
> This patch set is split from RISC-V Packed extension where needs
> some i32 vector operations, accorind to Richard Henderson's suggestion.
> 
> The original implementation is on
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg814538.html.
> 
> LIU Zhiwei (5):
>    tcg: Add tcg_gen_vec_add{sub}16_i32
>    tcg: Add tcg_gen_vec_add{sub}8_i32
>    tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32
>    tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32
>    tcg: Implement tcg_gen_vec_add{sub}32_tl
> 
>   include/tcg/tcg-op-gvec.h |  43 ++++++++++++++
>   tcg/tcg-op-gvec.c         | 122 ++++++++++++++++++++++++++++++++++++++
>   2 files changed, 165 insertions(+)
> 

Queued to tcg-next, thanks.

r~


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-06-26  3:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-24 10:50 [PATCH 0/5] tcg: Add 32-bit vector operations LIU Zhiwei
2021-06-24 10:50 ` [PATCH 1/5] tcg: Add tcg_gen_vec_add{sub}16_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 2/5] tcg: Add tcg_gen_vec_add{sub}8_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 3/5] tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 4/5] tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32 LIU Zhiwei
2021-06-24 10:50 ` [PATCH 5/5] tcg: Implement tcg_gen_vec_add{sub}32_tl LIU Zhiwei
2021-06-26  3:53 ` [PATCH 0/5] tcg: Add 32-bit vector operations Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.