All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4
@ 2016-08-07 17:36 Nikunj A Dadhania
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction Nikunj A Dadhania
                   ` (6 more replies)
  0 siblings, 7 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-07 17:36 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, benh

This series contains 10 new instructions for POWER9 ISA3.0.

Patches:
    01:  xxspltib: VSX Vector Splat Immediate Byte
    02:  darn: Deliver A Random Number
    03:  lxsibzx - Load VSX Scalar as Integer Byte & Zero Indexed
         lxsihzx - Load VSX Scalar as Integer Halfword & Zero Indexed
    04:  stxsibx - Store VSX Scalar as Integer Byte Indexed
         stxsihx - Store VSX Scalar as Integer Halfword Indexed
    05:  lxvb16x: Load VSX Vector Byte*16
         lxvh8x:  Load VSX Vector Halfword*8
    06:  stxvb16x: Store VSX Vector Byte*16
         stxvh8x:  Store VSX Vector Halfword*8

Nikunj A Dadhania (5):
  target-ppc: add xxspltib instruction
  target-ppc: add lxsi[bw]zx instruction
  target-ppc: add stxsi[bh]x instruction
  target-ppc: add lxvb16x and lxvh8x
  target-ppc: add stxvb16x and stxvh8x

Ravi Bangoria (1):
  target-ppc: Implement darn instruction

 target-ppc/helper.h                 |  5 +++
 target-ppc/int_helper.c             | 14 ++++++++
 target-ppc/mem_helper.c             | 65 +++++++++++++++++++++++++++++++++++++
 target-ppc/translate.c              | 61 ++++++++++++++++++++++------------
 target-ppc/translate/vsx-impl.inc.c | 64 ++++++++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  | 13 ++++++++
 6 files changed, 202 insertions(+), 20 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction
  2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
@ 2016-08-07 17:36 ` Nikunj A Dadhania
  2016-08-08  4:43   ` Richard Henderson
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction Nikunj A Dadhania
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-07 17:36 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, benh

xxspltib: VSX Vector Splat Immediate Byte

Copy the immediate byte in each byte of target VSR

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c              |  2 ++
 target-ppc/translate/vsx-impl.inc.c | 20 ++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  5 +++++
 3 files changed, 27 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0a5a3e2..2a87d1a 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -589,6 +589,8 @@ EXTRACT_HELPER(DM, 8, 2);
 EXTRACT_HELPER(UIM, 16, 2);
 EXTRACT_HELPER(SHW, 8, 2);
 EXTRACT_HELPER(SP, 19, 2);
+EXTRACT_HELPER(IMM8, 11, 8);
+
 /*****************************************************************************/
 /* PowerPC instructions table                                                */
 
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 9f77b06..6e789cb 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -647,6 +647,26 @@ static void gen_xxspltw(DisasContext *ctx)
     tcg_temp_free_i64(b2);
 }
 
+#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
+
+static void gen_xxspltib(DisasContext *ctx)
+{
+    unsigned char uim8 = IMM8(ctx->opcode);
+    if (xS(ctx->opcode) < 32) {
+        if (unlikely(!ctx->altivec_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VPU);
+            return;
+        }
+    } else {
+        if (unlikely(!ctx->vsx_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VSXU);
+            return;
+        }
+    }
+    tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), pattern(uim8));
+    tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), pattern(uim8));
+}
+
 static void gen_xxsldwi(DisasContext *ctx)
 {
     TCGv_i64 xth, xtl;
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 8b9da65..62a6251 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -20,6 +20,10 @@ GEN_HANDLER_E(mfvsrd, 0x1F, 0x13, 0x01, 0x0000F800, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(mtvsrd, 0x1F, 0x13, 0x05, 0x0000F800, PPC_NONE, PPC2_VSX207),
 #endif
 
+#define GEN_XX1FORM(name, opc2, opc3, fl2)                              \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
+
 #define GEN_XX2FORM(name, opc2, opc3, fl2)                           \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
@@ -222,6 +226,7 @@ VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
 GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
 GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
 GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
+GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
 GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
 
 #define GEN_XXSEL_ROW(opc3) \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction Nikunj A Dadhania
@ 2016-08-07 17:36 ` Nikunj A Dadhania
  2016-08-07 21:33   ` Benjamin Herrenschmidt
  2016-08-08  5:01   ` [Qemu-devel] " Richard Henderson
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 3/6] target-ppc: add lxsi[bw]zx instruction Nikunj A Dadhania
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-07 17:36 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, benh, Ravi Bangoria

From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>

darn: Deliver A Random Number

For both CRN and RRN, returning 64-bit random number.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h     |  1 +
 target-ppc/int_helper.c | 14 ++++++++++++++
 target-ppc/translate.c  | 11 +++++++++++
 3 files changed, 26 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 8eada2f..257bfca 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -50,6 +50,7 @@ DEF_HELPER_FLAGS_1(cnttzd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
+DEF_HELPER_1(darn, tl, i32)
 #endif
 
 DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 552b2e0..2b9fe13 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -182,6 +182,20 @@ target_ulong helper_cnttzd(target_ulong t)
 {
     return ctz64(t);
 }
+
+target_ulong helper_darn(uint32_t l)
+{
+    target_ulong r = UINT64_MAX;
+
+    if (l <= 2) {
+        do {
+            r = random() * random();
+            r &= l ? UINT64_MAX : UINT32_MAX;
+        } while (r == UINT64_MAX);
+    }
+
+    return r;
+}
 #endif
 
 #if defined(TARGET_PPC64)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 2a87d1a..6a79fc1 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -526,6 +526,8 @@ EXTRACT_HELPER(FPW, 16, 1);
 
 /* addpcis */
 EXTRACT_HELPER_DXFORM(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
+/* darn */
+EXTRACT_HELPER(L, 16, 2);
 
 /***                            Jump target decoding                       ***/
 /* Immediate address */
@@ -1893,6 +1895,14 @@ static void gen_cnttzd(DisasContext *ctx)
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
     }
 }
+
+/* darn */
+static void gen_darn(DisasContext *ctx)
+{
+    TCGv_i32 l = tcg_const_i32(L(ctx->opcode));
+    gen_helper_darn(cpu_gpr[rD(ctx->opcode)], l);
+    tcg_temp_free_i32(l);
+}
 #endif
 
 /***                             Integer rotate                            ***/
@@ -6238,6 +6248,7 @@ GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD),
 GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B),
 GEN_HANDLER_E(cnttzd, 0x1F, 0x1A, 0x11, 0x00000000, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(darn, 0x1F, 0x13, 0x17, 0x001CF801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(bpermd, 0x1F, 0x1C, 0x07, 0x00000001, PPC_NONE, PPC2_PERM_ISA206),
 #endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 3/6] target-ppc: add lxsi[bw]zx instruction
  2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction Nikunj A Dadhania
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction Nikunj A Dadhania
@ 2016-08-07 17:36 ` Nikunj A Dadhania
  2016-08-08  5:08   ` Richard Henderson
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 4/6] target-ppc: add stxsi[bh]x instruction Nikunj A Dadhania
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-07 17:36 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, benh

lxsibzx - Load VSX Scalar as Integer Byte & Zero Indexed
lxsihzx - Load VSX Scalar as Integer Halfword & Zero Indexed

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c              | 29 +++++++++++++++--------------
 target-ppc/translate/vsx-impl.inc.c |  2 ++
 target-ppc/translate/vsx-ops.inc.c  |  2 ++
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 6a79fc1..4bdf5ba 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -2495,28 +2495,29 @@ static inline void gen_qemu_ld32u(DisasContext *ctx, TCGv arg1, TCGv arg2)
     tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
 }
 
-static void gen_qemu_ld32u_i64(DisasContext *ctx, TCGv_i64 val, TCGv addr)
-{
-    TCGv tmp = tcg_temp_new();
-    gen_qemu_ld32u(ctx, tmp, addr);
-    tcg_gen_extu_tl_i64(val, tmp);
-    tcg_temp_free(tmp);
-}
-
 static inline void gen_qemu_ld32s(DisasContext *ctx, TCGv arg1, TCGv arg2)
 {
     TCGMemOp op = MO_SL | ctx->default_tcg_memop_mask;
     tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
 }
 
-static void gen_qemu_ld32s_i64(DisasContext *ctx, TCGv_i64 val, TCGv addr)
-{
-    TCGv tmp = tcg_temp_new();
-    gen_qemu_ld32s(ctx, tmp, addr);
-    tcg_gen_ext_tl_i64(val, tmp);
-    tcg_temp_free(tmp);
+
+#define GEN_QEMU_LOAD_64(ldop, ext)                                     \
+static void glue(gen_qemu_, glue(ldop, _i64))(DisasContext *ctx,        \
+                                               TCGv_i64 val,            \
+                                               TCGv addr)               \
+{                                                                       \
+    TCGv tmp = tcg_temp_new();                                          \
+    gen_qemu_##ldop(ctx, tmp, addr);                                    \
+    tcg_gen_##ext##_tl_i64(val, tmp);                                   \
+    tcg_temp_free(tmp);                                                 \
 }
 
+GEN_QEMU_LOAD_64(ld8u,  extu)
+GEN_QEMU_LOAD_64(ld16u, extu)
+GEN_QEMU_LOAD_64(ld32u, extu)
+GEN_QEMU_LOAD_64(ld32s, ext)
+
 static inline void gen_qemu_ld64(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
 {
     TCGMemOp op = MO_Q | ctx->default_tcg_memop_mask;
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 6e789cb..f438a50 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -36,6 +36,8 @@ static void gen_##name(DisasContext *ctx)                     \
 
 VSX_LOAD_SCALAR(lxsdx, ld64)
 VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
+VSX_LOAD_SCALAR(lxsibzx, ld8u_i64)
+VSX_LOAD_SCALAR(lxsihzx, ld16u_i64)
 VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
 VSX_LOAD_SCALAR(lxsspx, ld32fs)
 
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 62a6251..4cd742c 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -1,6 +1,8 @@
 GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(lxsibzx, 0x1F, 0x0D, 0x18, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(lxsihzx, 0x1F, 0x0D, 0x19, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 4/6] target-ppc: add stxsi[bh]x instruction
  2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
                   ` (2 preceding siblings ...)
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 3/6] target-ppc: add lxsi[bw]zx instruction Nikunj A Dadhania
@ 2016-08-07 17:36 ` Nikunj A Dadhania
  2016-08-08  5:08   ` Richard Henderson
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x Nikunj A Dadhania
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-07 17:36 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, benh

stxsibx - Store VSX Scalar as Integer Byte Indexed
stxsihx - Store VSX Scalar as Integer Halfword Indexed

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c              | 19 +++++++++++++------
 target-ppc/translate/vsx-impl.inc.c |  2 ++
 target-ppc/translate/vsx-ops.inc.c  |  2 ++
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 4bdf5ba..cb5bbeb 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -2541,14 +2541,21 @@ static inline void gen_qemu_st32(DisasContext *ctx, TCGv arg1, TCGv arg2)
     tcg_gen_qemu_st_tl(arg1, arg2, ctx->mem_idx, op);
 }
 
-static void gen_qemu_st32_i64(DisasContext *ctx, TCGv_i64 val, TCGv addr)
-{
-    TCGv tmp = tcg_temp_new();
-    tcg_gen_trunc_i64_tl(tmp, val);
-    gen_qemu_st32(ctx, tmp, addr);
-    tcg_temp_free(tmp);
+#define GEN_QEMU_STORE_64(stop)                                         \
+static void gen_qemu_##stop##_i64(DisasContext *ctx,                    \
+                                  TCGv_i64 val,                         \
+                                  TCGv addr)                            \
+{                                                                       \
+    TCGv tmp = tcg_temp_new();                                          \
+    tcg_gen_trunc_i64_tl(tmp, val);                                     \
+    gen_qemu_##stop(ctx, tmp, addr);                                    \
+    tcg_temp_free(tmp);                                                 \
 }
 
+GEN_QEMU_STORE_64(st8)
+GEN_QEMU_STORE_64(st16)
+GEN_QEMU_STORE_64(st32)
+
 static inline void gen_qemu_st64(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
 {
     TCGMemOp op = MO_Q | ctx->default_tcg_memop_mask;
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index f438a50..fb29e6d 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -118,6 +118,8 @@ static void gen_##name(DisasContext *ctx)                     \
 }
 
 VSX_STORE_SCALAR(stxsdx, st64)
+VSX_STORE_SCALAR(stxsibx, st8_i64)
+VSX_STORE_SCALAR(stxsihx, st16_i64)
 VSX_STORE_SCALAR(stxsiwx, st32_i64)
 VSX_STORE_SCALAR(stxsspx, st32fs)
 
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 4cd742c..414b73b 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -9,6 +9,8 @@ GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
 
 GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(stxsihx, 0x1F, 0xD, 0x1D, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(stxsiwx, 0x1F, 0xC, 0x04, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(stxsspx, 0x1F, 0xC, 0x14, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x
  2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
                   ` (3 preceding siblings ...)
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 4/6] target-ppc: add stxsi[bh]x instruction Nikunj A Dadhania
@ 2016-08-07 17:36 ` Nikunj A Dadhania
  2016-08-08  5:27   ` Richard Henderson
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 6/6] target-ppc: add stxvb16x and stxvh8x Nikunj A Dadhania
  2016-08-09  3:30 ` [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 David Gibson
  6 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-07 17:36 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, benh

lxvb16x: Load VSX Vector Byte*16
lxvh8x:  Load VSX Vector Halfword*8

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  2 ++
 target-ppc/mem_helper.c             | 33 +++++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c | 20 ++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  2 ++
 4 files changed, 57 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 257bfca..2fe93ae 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -287,6 +287,8 @@ DEF_HELPER_2(mtvscr, void, env, avr)
 DEF_HELPER_3(lvebx, void, env, avr, tl)
 DEF_HELPER_3(lvehx, void, env, avr, tl)
 DEF_HELPER_3(lvewx, void, env, avr, tl)
+DEF_HELPER_2(lxvb16x, i64, env, tl)
+DEF_HELPER_2(lxvh8x, i64, env, tl)
 DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index bf6c44a..cf32f73 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -326,6 +326,39 @@ LVE(lvewx, cpu_ldl_data_ra, bswap32, u32)
 #undef I
 #undef LVE
 
+#define LXV(name, access, swap, type, elems)                     \
+uint64_t helper_##name(CPUPPCState *env,                         \
+                       target_ulong addr)                        \
+{                                                                \
+    type r[elems] = {0};                                         \
+    int i, index, bound, step;                                   \
+    if (msr_le) {                                                \
+        index = elems - 1;                                       \
+        bound = -1;                                              \
+        step = -1;                                               \
+    } else  {                                                    \
+        index = 0;                                               \
+        bound = elems;                                           \
+        step = 1;                                                \
+    }                                                            \
+                                                                 \
+    for (i = index; i != bound; i += step) {                     \
+        if (needs_byteswap(env)) {                               \
+            r[i] = swap(access(env, addr, GETPC()));             \
+        } else {                                                 \
+            r[i] =  access(env, addr, GETPC());                  \
+        }                                                        \
+        addr = addr_add(env, addr, sizeof(type));                \
+    }                                                            \
+    return  *((uint64_t *)r);                                    \
+}
+
+#define I(x) (x)
+LXV(lxvb16x, cpu_ldub_data_ra, I, uint8_t, 8)
+LXV(lxvh8x, cpu_lduw_data_ra, bswap16, uint16_t, 4)
+#undef I
+#undef LXV
+
 #define STVE(name, access, swap, element)                               \
     void helper_##name(CPUPPCState *env, ppc_avr_t *r,                  \
                        target_ulong addr)                               \
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index fb29e6d..0c3a0dd 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -102,6 +102,26 @@ static void gen_lxvw4x(DisasContext *ctx)
     tcg_temp_free_i64(tmp);
 }
 
+#define VSX_LOAD_VECTOR(name)                                 \
+static void gen_##name(DisasContext *ctx)                     \
+{                                                               \
+    TCGv EA;                                                    \
+    if (unlikely(!ctx->vsx_enabled)) {                          \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
+        return;                                                 \
+    }                                                           \
+    gen_set_access_type(ctx, ACCESS_INT);                       \
+    EA = tcg_temp_new();                                        \
+    gen_addr_reg_index(ctx, EA);                                \
+    gen_helper_##name(cpu_vsrh(xT(ctx->opcode)), cpu_env, EA);  \
+    tcg_gen_addi_tl(EA, EA, 8);                                 \
+    gen_helper_##name(cpu_vsrl(xT(ctx->opcode)), cpu_env, EA);  \
+    tcg_temp_free(EA);                                          \
+}
+
+VSX_LOAD_VECTOR(lxvb16x)
+VSX_LOAD_VECTOR(lxvh8x)
+
 #define VSX_STORE_SCALAR(name, operation)                     \
 static void gen_##name(DisasContext *ctx)                     \
 {                                                             \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 414b73b..598b349 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -7,6 +7,8 @@ GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
+GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
 
 GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 6/6] target-ppc: add stxvb16x and stxvh8x
  2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
                   ` (4 preceding siblings ...)
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x Nikunj A Dadhania
@ 2016-08-07 17:36 ` Nikunj A Dadhania
  2016-08-08  5:27   ` Richard Henderson
  2016-08-09  3:30 ` [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 David Gibson
  6 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-07 17:36 UTC (permalink / raw)
  To: qemu-ppc, david, rth; +Cc: qemu-devel, nikunj, benh

stxvb16x: Store VSX Vector Byte*16
stxvh8x:  Store VSX Vector Halfword*8

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  2 ++
 target-ppc/mem_helper.c             | 32 ++++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c | 20 ++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  2 ++
 4 files changed, 56 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 2fe93ae..4e73836 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -292,6 +292,8 @@ DEF_HELPER_2(lxvh8x, i64, env, tl)
 DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
+DEF_HELPER_3(stxvb16x, void, env, i64, tl)
+DEF_HELPER_3(stxvh8x, void, env, i64, tl)
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum4sbs, void, env, avr, avr, avr)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index cf32f73..a7a920c 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -359,6 +359,38 @@ LXV(lxvh8x, cpu_lduw_data_ra, bswap16, uint16_t, 4)
 #undef I
 #undef LXV
 
+#define STXV(name, access, swap, type, elems)                      \
+void helper_##name(CPUPPCState *env, uint64_t vsr,                 \
+                   target_ulong addr)                              \
+{                                                                  \
+    type *r;                                                       \
+    int i, index, bound, step;                                     \
+    if (msr_le) {                                                  \
+        index = elems - 1;                                         \
+        bound = -1;                                                \
+        step = -1;                                                 \
+    } else  {                                                      \
+        index = 0;                                                 \
+        bound = elems;                                             \
+        step = 1;                                                  \
+    }                                                              \
+    r = (type *) &vsr;                                             \
+    for (i = index; i != bound; i += step) {                       \
+        if (needs_byteswap(env)) {                                 \
+            access(env, addr, swap(r[i]), GETPC());                \
+        } else {                                                   \
+            access(env, addr, r[i], GETPC());                      \
+        }                                                          \
+        addr = addr_add(env, addr, sizeof(type));                  \
+    }                                                              \
+}
+
+#define I(x) (x)
+STXV(stxvb16x, cpu_stb_data_ra, I, uint8_t, 8)
+STXV(stxvh8x, cpu_stw_data_ra, bswap16, uint16_t, 4)
+#undef I
+#undef STXV
+
 #define STVE(name, access, swap, element)                               \
     void helper_##name(CPUPPCState *env, ppc_avr_t *r,                  \
                        target_ulong addr)                               \
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 0c3a0dd..f82ee59 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -122,6 +122,26 @@ static void gen_##name(DisasContext *ctx)                     \
 VSX_LOAD_VECTOR(lxvb16x)
 VSX_LOAD_VECTOR(lxvh8x)
 
+#define VSX_STORE_VECTOR(name)                                \
+static void gen_##name(DisasContext *ctx)                     \
+{                                                               \
+    TCGv EA;                                                    \
+    if (unlikely(!ctx->vsx_enabled)) {                          \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
+        return;                                                 \
+    }                                                           \
+    gen_set_access_type(ctx, ACCESS_INT);                       \
+    EA = tcg_temp_new();                                        \
+    gen_addr_reg_index(ctx, EA);                                \
+    gen_helper_##name(cpu_env, cpu_vsrh(xT(ctx->opcode)), EA);  \
+    tcg_gen_addi_tl(EA, EA, 8);                                 \
+    gen_helper_##name(cpu_env, cpu_vsrl(xT(ctx->opcode)), EA);  \
+    tcg_temp_free(EA);                                          \
+}
+
+VSX_STORE_VECTOR(stxvb16x)
+VSX_STORE_VECTOR(stxvh8x)
+
 #define VSX_STORE_SCALAR(name, operation)                     \
 static void gen_##name(DisasContext *ctx)                     \
 {                                                             \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 598b349..f5afa0f 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -17,6 +17,8 @@ GEN_HANDLER_E(stxsiwx, 0x1F, 0xC, 0x04, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(stxsspx, 0x1F, 0xC, 0x14, 0, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE,  PPC2_ISA300),
+GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300),
 
 GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207),
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction Nikunj A Dadhania
@ 2016-08-07 21:33   ` Benjamin Herrenschmidt
  2016-08-08  1:52     ` Nikunj A Dadhania
  2016-08-09  3:28     ` David Gibson
  2016-08-08  5:01   ` [Qemu-devel] " Richard Henderson
  1 sibling, 2 replies; 42+ messages in thread
From: Benjamin Herrenschmidt @ 2016-08-07 21:33 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david, rth; +Cc: qemu-devel, Ravi Bangoria

On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
> +target_ulong helper_darn(uint32_t l)
> +{
> +    target_ulong r = UINT64_MAX;
> +
> +    if (l <= 2) {
> +        do {
> +            r = random() * random();
> +            r &= l ? UINT64_MAX : UINT32_MAX;
> +        } while (r == UINT64_MAX);
> +    }
> +
> +    return r;
> +}
>  #endif

Isn't this a bit week ? Look at the implementation of H_RANDOM...

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-07 21:33   ` Benjamin Herrenschmidt
@ 2016-08-08  1:52     ` Nikunj A Dadhania
  2016-08-08  3:22       ` Benjamin Herrenschmidt
  2016-08-09  3:28     ` David Gibson
  1 sibling, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-08  1:52 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc, david, rth; +Cc: qemu-devel, Ravi Bangoria

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>> +target_ulong helper_darn(uint32_t l)
>> +{
>> +    target_ulong r = UINT64_MAX;
>> +
>> +    if (l <= 2) {
>> +        do {
>> +            r = random() * random();
>> +            r &= l ? UINT64_MAX : UINT32_MAX;
>> +        } while (r == UINT64_MAX);
>> +    }
>> +
>> +    return r;
>> +}
>>  #endif
>
> Isn't this a bit week ? Look at the implementation of H_RANDOM...

Sure, will have a look.

Regards,
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-08  1:52     ` Nikunj A Dadhania
@ 2016-08-08  3:22       ` Benjamin Herrenschmidt
  2016-08-08  3:32         ` Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: Benjamin Herrenschmidt @ 2016-08-08  3:22 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david, rth; +Cc: qemu-devel, Ravi Bangoria

On Mon, 2016-08-08 at 07:22 +0530, Nikunj A Dadhania wrote:
> 
> > Isn't this a bit week ? Look at the implementation of H_RANDOM...
> 
> Sure, will have a look.

And if course I meant "weak" :-) That's what happens when I reply
before breakfast !

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-08  3:22       ` Benjamin Herrenschmidt
@ 2016-08-08  3:32         ` Nikunj A Dadhania
  0 siblings, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-08  3:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, qemu-ppc, david, rth; +Cc: qemu-devel, Ravi Bangoria

Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> On Mon, 2016-08-08 at 07:22 +0530, Nikunj A Dadhania wrote:
>> 
>> > Isn't this a bit week ? Look at the implementation of H_RANDOM...
>> 
>> Sure, will have a look.
>
> And if course I meant "weak" :-) That's what happens when I reply
> before breakfast !

I guessed that :-)

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction Nikunj A Dadhania
@ 2016-08-08  4:43   ` Richard Henderson
  2016-08-08  6:19     ` Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: Richard Henderson @ 2016-08-08  4:43 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, benh

On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
> +#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
> +
> +static void gen_xxspltib(DisasContext *ctx)
> +{
> +    unsigned char uim8 = IMM8(ctx->opcode);
> +    if (xS(ctx->opcode) < 32) {
> +        if (unlikely(!ctx->altivec_enabled)) {
> +            gen_exception(ctx, POWERPC_EXCP_VPU);
> +            return;
> +        }
> +    } else {
> +        if (unlikely(!ctx->vsx_enabled)) {
> +            gen_exception(ctx, POWERPC_EXCP_VSXU);
> +            return;
> +        }
> +    }
> +    tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), pattern(uim8));
> +    tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), pattern(uim8));
> +}
> +

Is this protected by TARGET_PPC64?  Otherwise the combination of movi_i64 and 
target_ulong looks odd.


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction Nikunj A Dadhania
  2016-08-07 21:33   ` Benjamin Herrenschmidt
@ 2016-08-08  5:01   ` Richard Henderson
  2016-08-08  6:20     ` Nikunj A Dadhania
  1 sibling, 1 reply; 42+ messages in thread
From: Richard Henderson @ 2016-08-08  5:01 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, benh, Ravi Bangoria

On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
> +target_ulong helper_darn(uint32_t l)
> +{
> +    target_ulong r = UINT64_MAX;
> +
> +    if (l <= 2) {
> +        do {
> +            r = random() * random();
> +            r &= l ? UINT64_MAX : UINT32_MAX;
> +        } while (r == UINT64_MAX);
> +    }
> +
> +    return r;
> +}

Without considering the rng, I think you should split this into darn32 and 
darn64 based on L in translate.c.  You can diagnose l==2 in translate.c as well.


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 3/6] target-ppc: add lxsi[bw]zx instruction
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 3/6] target-ppc: add lxsi[bw]zx instruction Nikunj A Dadhania
@ 2016-08-08  5:08   ` Richard Henderson
  2016-08-08  6:21     ` Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: Richard Henderson @ 2016-08-08  5:08 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, benh

On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
> +#define GEN_QEMU_LOAD_64(ldop, ext)                                     \
> +static void glue(gen_qemu_, glue(ldop, _i64))(DisasContext *ctx,        \
> +                                               TCGv_i64 val,            \
> +                                               TCGv addr)               \
> +{                                                                       \
> +    TCGv tmp = tcg_temp_new();                                          \
> +    gen_qemu_##ldop(ctx, tmp, addr);                                    \
> +    tcg_gen_##ext##_tl_i64(val, tmp);                                   \
> +    tcg_temp_free(tmp);                                                 \
>  }
>
> +GEN_QEMU_LOAD_64(ld8u,  extu)
> +GEN_QEMU_LOAD_64(ld16u, extu)
> +GEN_QEMU_LOAD_64(ld32u, extu)
> +GEN_QEMU_LOAD_64(ld32s, ext)

This is a good opportunity to clean up a bit of the ppc translator and convert 
to the newer tcg_gen_qemu_ld_i64 function.  This will eliminate the need for 
the extension that you're performing here.


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 4/6] target-ppc: add stxsi[bh]x instruction
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 4/6] target-ppc: add stxsi[bh]x instruction Nikunj A Dadhania
@ 2016-08-08  5:08   ` Richard Henderson
  0 siblings, 0 replies; 42+ messages in thread
From: Richard Henderson @ 2016-08-08  5:08 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, benh

On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
> +#define GEN_QEMU_STORE_64(stop)                                         \
> +static void gen_qemu_##stop##_i64(DisasContext *ctx,                    \
> +                                  TCGv_i64 val,                         \
> +                                  TCGv addr)                            \
> +{                                                                       \
> +    TCGv tmp = tcg_temp_new();                                          \
> +    tcg_gen_trunc_i64_tl(tmp, val);                                     \
> +    gen_qemu_##stop(ctx, tmp, addr);                                    \
> +    tcg_temp_free(tmp);                                                 \
>  }

Similarly, use tcg_gen_qemu_st_i64.


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x Nikunj A Dadhania
@ 2016-08-08  5:27   ` Richard Henderson
  2016-08-08  6:35     ` Richard Henderson
  0 siblings, 1 reply; 42+ messages in thread
From: Richard Henderson @ 2016-08-08  5:27 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, benh

On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
> +#define LXV(name, access, swap, type, elems)                     \
> +uint64_t helper_##name(CPUPPCState *env,                         \
> +                       target_ulong addr)                        \
> +{                                                                \
> +    type r[elems] = {0};                                         \
> +    int i, index, bound, step;                                   \
> +    if (msr_le) {                                                \
> +        index = elems - 1;                                       \
> +        bound = -1;                                              \
> +        step = -1;                                               \
> +    } else  {                                                    \
> +        index = 0;                                               \
> +        bound = elems;                                           \
> +        step = 1;                                                \
> +    }                                                            \
> +                                                                 \
> +    for (i = index; i != bound; i += step) {                     \
> +        if (needs_byteswap(env)) {                               \
> +            r[i] = swap(access(env, addr, GETPC()));             \
> +        } else {                                                 \
> +            r[i] =  access(env, addr, GETPC());                  \
> +        }                                                        \
> +        addr = addr_add(env, addr, sizeof(type));                \
> +    }                                                            \
> +    return  *((uint64_t *)r);                                    \
> +}

This looks more complicated than necessary.

(1) In big-endian mode, surely this simplifies to two 64-bit big-endian loads.

(2) In little-endian mode, the overhead of accessing memory surely dominates, 
and therefore we should perform two 64-bit loads and manipulate the data after.

AFAICS, this is easiest done by requesting two 64-bit *big-endian* loads, and 
then swapping bytes.  E.g.

uint64_t helper_bswap16x4(uint64_t x)
{
     uint64_t m = 0x00ff00ff00ff00ffull;
     return ((x & m) << 8) | ((x >> 8) & m);
}

uint64_t helper_bswap32x2(uint64_t x)
{
     return deposit64(bswap32(x >> 32), 32, 32, bswap32(x));
}


     tcg_gen_qemu_ld_i64(dest, addr, MO_BEQ, s->mem_index);
     if (ctx->le_mode) {
         gen_helper_bswap16x4(dest, dest);
     }


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 6/6] target-ppc: add stxvb16x and stxvh8x
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 6/6] target-ppc: add stxvb16x and stxvh8x Nikunj A Dadhania
@ 2016-08-08  5:27   ` Richard Henderson
  0 siblings, 0 replies; 42+ messages in thread
From: Richard Henderson @ 2016-08-08  5:27 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel, benh

On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
> +#define STXV(name, access, swap, type, elems)                      \
> +void helper_##name(CPUPPCState *env, uint64_t vsr,                 \
> +                   target_ulong addr)                              \
> +{                                                                  \
> +    type *r;                                                       \
> +    int i, index, bound, step;                                     \
> +    if (msr_le) {                                                  \
> +        index = elems - 1;                                         \
> +        bound = -1;                                                \
> +        step = -1;                                                 \
> +    } else  {                                                      \
> +        index = 0;                                                 \
> +        bound = elems;                                             \
> +        step = 1;                                                  \
> +    }                                                              \
> +    r = (type *) &vsr;                                             \
> +    for (i = index; i != bound; i += step) {                       \
> +        if (needs_byteswap(env)) {                                 \
> +            access(env, addr, swap(r[i]), GETPC());                \
> +        } else {                                                   \
> +            access(env, addr, r[i], GETPC());                      \
> +        }                                                          \
> +        addr = addr_add(env, addr, sizeof(type));                  \
> +    }                                                              \
> +}

Similarly, with the same helpers as for load.


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction
  2016-08-08  4:43   ` Richard Henderson
@ 2016-08-08  6:19     ` Nikunj A Dadhania
  0 siblings, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-08  6:19 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, benh

Richard Henderson <rth@twiddle.net> writes:

> On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
>> +#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
>> +
>> +static void gen_xxspltib(DisasContext *ctx)
>> +{
>> +    unsigned char uim8 = IMM8(ctx->opcode);
>> +    if (xS(ctx->opcode) < 32) {
>> +        if (unlikely(!ctx->altivec_enabled)) {
>> +            gen_exception(ctx, POWERPC_EXCP_VPU);
>> +            return;
>> +        }
>> +    } else {
>> +        if (unlikely(!ctx->vsx_enabled)) {
>> +            gen_exception(ctx, POWERPC_EXCP_VSXU);
>> +            return;
>> +        }
>> +    }
>> +    tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), pattern(uim8));
>> +    tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), pattern(uim8));
>> +}
>> +
>
> Is this protected by TARGET_PPC64?  Otherwise the combination of movi_i64 and 
> target_ulong looks odd.

No it is not, will add that.

Regards
NIkunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-08  5:01   ` [Qemu-devel] " Richard Henderson
@ 2016-08-08  6:20     ` Nikunj A Dadhania
  0 siblings, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-08  6:20 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, benh, Ravi Bangoria

Richard Henderson <rth@twiddle.net> writes:

> On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
>> +target_ulong helper_darn(uint32_t l)
>> +{
>> +    target_ulong r = UINT64_MAX;
>> +
>> +    if (l <= 2) {
>> +        do {
>> +            r = random() * random();
>> +            r &= l ? UINT64_MAX : UINT32_MAX;
>> +        } while (r == UINT64_MAX);
>> +    }
>> +
>> +    return r;
>> +}
>
> Without considering the rng, I think you should split this into darn32 and 
> darn64 based on L in translate.c.  You can diagnose l==2 in translate.c as well.

Sure.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 3/6] target-ppc: add lxsi[bw]zx instruction
  2016-08-08  5:08   ` Richard Henderson
@ 2016-08-08  6:21     ` Nikunj A Dadhania
  0 siblings, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-08  6:21 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel, benh

Richard Henderson <rth@twiddle.net> writes:

> On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
>> +#define GEN_QEMU_LOAD_64(ldop, ext)                                     \
>> +static void glue(gen_qemu_, glue(ldop, _i64))(DisasContext *ctx,        \
>> +                                               TCGv_i64 val,            \
>> +                                               TCGv addr)               \
>> +{                                                                       \
>> +    TCGv tmp = tcg_temp_new();                                          \
>> +    gen_qemu_##ldop(ctx, tmp, addr);                                    \
>> +    tcg_gen_##ext##_tl_i64(val, tmp);                                   \
>> +    tcg_temp_free(tmp);                                                 \
>>  }
>>
>> +GEN_QEMU_LOAD_64(ld8u,  extu)
>> +GEN_QEMU_LOAD_64(ld16u, extu)
>> +GEN_QEMU_LOAD_64(ld32u, extu)
>> +GEN_QEMU_LOAD_64(ld32s, ext)
>
> This is a good opportunity to clean up a bit of the ppc translator and convert 
> to the newer tcg_gen_qemu_ld_i64 function.  This will eliminate the need for 
> the extension that you're performing here.

I will have a look and change.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x
  2016-08-08  5:27   ` Richard Henderson
@ 2016-08-08  6:35     ` Richard Henderson
  2016-08-10  9:21       ` Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: Richard Henderson @ 2016-08-08  6:35 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel

On 08/08/2016 10:57 AM, Richard Henderson wrote:
> On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
>> +#define LXV(name, access, swap, type, elems)                     \
>> +uint64_t helper_##name(CPUPPCState *env,                         \
>> +                       target_ulong addr)                        \
>> +{                                                                \
>> +    type r[elems] = {0};                                         \
>> +    int i, index, bound, step;                                   \
>> +    if (msr_le) {                                                \
>> +        index = elems - 1;                                       \
>> +        bound = -1;                                              \
>> +        step = -1;                                               \
>> +    } else  {                                                    \
>> +        index = 0;                                               \
>> +        bound = elems;                                           \
>> +        step = 1;                                                \
>> +    }                                                            \
>> +                                                                 \
>> +    for (i = index; i != bound; i += step) {                     \
>> +        if (needs_byteswap(env)) {                               \
>> +            r[i] = swap(access(env, addr, GETPC()));             \
>> +        } else {                                                 \
>> +            r[i] =  access(env, addr, GETPC());                  \
>> +        }                                                        \
>> +        addr = addr_add(env, addr, sizeof(type));                \
>> +    }                                                            \
>> +    return  *((uint64_t *)r);                                    \
>> +}
>
> This looks more complicated than necessary.
>
> (1) In big-endian mode, surely this simplifies to two 64-bit big-endian loads.
>
> (2) In little-endian mode, the overhead of accessing memory surely dominates,
> and therefore we should perform two 64-bit loads and manipulate the data after.
>
> AFAICS, this is easiest done by requesting two 64-bit *big-endian* loads, and
> then swapping bytes.  E.g.
>
> uint64_t helper_bswap16x4(uint64_t x)
> {
>     uint64_t m = 0x00ff00ff00ff00ffull;
>     return ((x & m) << 8) | ((x >> 8) & m);
> }
>
> uint64_t helper_bswap32x2(uint64_t x)
> {
>     return deposit64(bswap32(x >> 32), 32, 32, bswap32(x));
> }

To correct myself, this big-endian load really only makes sense for lxvh8x.
For lxvw4x, a little-endian load with a word swap is fewer operations.  I.e.

   tcg_gen_qemu_ld_i64(t0, addr, ctx->mem_idx, MO_LEQ);
   tcg_gen_shri_i64(t1, t0, 32);
   tcg_gen_deposit_i64(dest, t1, t0, 32, 32);


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-07 21:33   ` Benjamin Herrenschmidt
  2016-08-08  1:52     ` Nikunj A Dadhania
@ 2016-08-09  3:28     ` David Gibson
  2016-08-09  4:54       ` Nikunj A Dadhania
  1 sibling, 1 reply; 42+ messages in thread
From: David Gibson @ 2016-08-09  3:28 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Nikunj A Dadhania, qemu-ppc, rth, qemu-devel, Ravi Bangoria

[-- Attachment #1: Type: text/plain, Size: 1119 bytes --]

On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
> > +target_ulong helper_darn(uint32_t l)
> > +{
> > +    target_ulong r = UINT64_MAX;
> > +
> > +    if (l <= 2) {
> > +        do {
> > +            r = random() * random();
> > +            r &= l ? UINT64_MAX : UINT32_MAX;
> > +        } while (r == UINT64_MAX);
> > +    }
> > +
> > +    return r;
> > +}
> >  #endif
> 
> Isn't this a bit week ? Look at the implementation of H_RANDOM...

Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
and the Intel random number instruction all use.

But, worse than that: even if random() was a good RNG, I'm pretty sure
than although random() * random() will give you a random number with
twice as many bits as random() alone, it won't be uniformly
distributed.  That's probably not what you want.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4
  2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
                   ` (5 preceding siblings ...)
  2016-08-07 17:36 ` [Qemu-devel] [PATCH 6/6] target-ppc: add stxvb16x and stxvh8x Nikunj A Dadhania
@ 2016-08-09  3:30 ` David Gibson
  6 siblings, 0 replies; 42+ messages in thread
From: David Gibson @ 2016-08-09  3:30 UTC (permalink / raw)
  To: Nikunj A Dadhania; +Cc: qemu-ppc, rth, qemu-devel, benh

[-- Attachment #1: Type: text/plain, Size: 1745 bytes --]

On Sun, Aug 07, 2016 at 11:06:49PM +0530, Nikunj A Dadhania wrote:
> This series contains 10 new instructions for POWER9 ISA3.0.
> 
> Patches:
>     01:  xxspltib: VSX Vector Splat Immediate Byte
>     02:  darn: Deliver A Random Number
>     03:  lxsibzx - Load VSX Scalar as Integer Byte & Zero Indexed
>          lxsihzx - Load VSX Scalar as Integer Halfword & Zero Indexed
>     04:  stxsibx - Store VSX Scalar as Integer Byte Indexed
>          stxsihx - Store VSX Scalar as Integer Halfword Indexed
>     05:  lxvb16x: Load VSX Vector Byte*16
>          lxvh8x:  Load VSX Vector Halfword*8
>     06:  stxvb16x: Store VSX Vector Byte*16
>          stxvh8x:  Store VSX Vector Halfword*8
> 
> Nikunj A Dadhania (5):
>   target-ppc: add xxspltib instruction
>   target-ppc: add lxsi[bw]zx instruction
>   target-ppc: add stxsi[bh]x instruction
>   target-ppc: add lxvb16x and lxvh8x
>   target-ppc: add stxvb16x and stxvh8x
> 
> Ravi Bangoria (1):
>   target-ppc: Implement darn instruction
> 
>  target-ppc/helper.h                 |  5 +++
>  target-ppc/int_helper.c             | 14 ++++++++
>  target-ppc/mem_helper.c             | 65 +++++++++++++++++++++++++++++++++++++
>  target-ppc/translate.c              | 61 ++++++++++++++++++++++------------
>  target-ppc/translate/vsx-impl.inc.c | 64 ++++++++++++++++++++++++++++++++++++
>  target-ppc/translate/vsx-ops.inc.c  | 13 ++++++++
>  6 files changed, 202 insertions(+), 20 deletions(-)
> 

As with part3, I'll wait for a respin with comments addressed.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-09  3:28     ` David Gibson
@ 2016-08-09  4:54       ` Nikunj A Dadhania
  2016-08-09  9:17         ` [Qemu-devel] [Qemu-ppc] " Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-09  4:54 UTC (permalink / raw)
  To: David Gibson, Benjamin Herrenschmidt
  Cc: qemu-ppc, rth, qemu-devel, Ravi Bangoria

David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>> > +target_ulong helper_darn(uint32_t l)
>> > +{
>> > +    target_ulong r = UINT64_MAX;
>> > +
>> > +    if (l <= 2) {
>> > +        do {
>> > +            r = random() * random();
>> > +            r &= l ? UINT64_MAX : UINT32_MAX;
>> > +        } while (r == UINT64_MAX);
>> > +    }
>> > +
>> > +    return r;
>> > +}
>> >  #endif
>> 
>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
>
> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
> and the Intel random number instruction all use.

I was looking at implementing this, AFAIU, I have to get a new RNG
object in the initialization routine. We would need an instance of this
per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
in case of linux-user where should this be initialized.

One other place was init_proc_POWER9(), but that will be per cpu and
member of CPUPPCState structure. Advantage is it will work for system
emulation and linux-user both and we would not need a lock.

>
> But, worse than that: even if random() was a good RNG, I'm pretty sure
> than although random() * random() will give you a random number with
> twice as many bits as random() alone, it won't be uniformly
> distributed.  That's probably not what you want.

Right, I had seen that issue.

Regards,
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-09  4:54       ` Nikunj A Dadhania
@ 2016-08-09  9:17         ` Nikunj A Dadhania
  2016-08-12  6:29           ` David Gibson
  0 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-09  9:17 UTC (permalink / raw)
  To: David Gibson, Benjamin Herrenschmidt
  Cc: qemu-ppc, rth, qemu-devel, Ravi Bangoria

Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:

> David Gibson <david@gibson.dropbear.id.au> writes:
>
>> [ Unknown signature status ]
>> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
>>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>>> > +target_ulong helper_darn(uint32_t l)
>>> > +{
>>> > +    target_ulong r = UINT64_MAX;
>>> > +
>>> > +    if (l <= 2) {
>>> > +        do {
>>> > +            r = random() * random();
>>> > +            r &= l ? UINT64_MAX : UINT32_MAX;
>>> > +        } while (r == UINT64_MAX);
>>> > +    }
>>> > +
>>> > +    return r;
>>> > +}
>>> >  #endif
>>> 
>>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
>>
>> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
>> and the Intel random number instruction all use.

Can you point me to the intel instruction, I couldn't get rdrand
implementation.

> I was looking at implementing this, AFAIU, I have to get a new RNG
> object in the initialization routine. We would need an instance of this
> per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
> in case of linux-user where should this be initialized.
>
> One other place was init_proc_POWER9(), but that will be per cpu and
> member of CPUPPCState structure. Advantage is it will work for system
> emulation and linux-user both and we would not need a lock.

More issues here. Random backend is not compiled for linux-user, adding
that wasn't difficult, but then rng_backend_request_entropy() uses
qemu_set_fd_handler() which is a stub for linux-user
(stubs/set-fd-handler.c)

Regards,
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x
  2016-08-08  6:35     ` Richard Henderson
@ 2016-08-10  9:21       ` Nikunj A Dadhania
  2016-08-10 10:12         ` Richard Henderson
  0 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-10  9:21 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel

Richard Henderson <rth@twiddle.net> writes:

> On 08/08/2016 10:57 AM, Richard Henderson wrote:
>> On 08/07/2016 11:06 PM, Nikunj A Dadhania wrote:
>>> +#define LXV(name, access, swap, type, elems)                     \
>>> +uint64_t helper_##name(CPUPPCState *env,                         \
>>> +                       target_ulong addr)                        \
>>> +{                                                                \
>>> +    type r[elems] = {0};                                         \
>>> +    int i, index, bound, step;                                   \
>>> +    if (msr_le) {                                                \
>>> +        index = elems - 1;                                       \
>>> +        bound = -1;                                              \
>>> +        step = -1;                                               \
>>> +    } else  {                                                    \
>>> +        index = 0;                                               \
>>> +        bound = elems;                                           \
>>> +        step = 1;                                                \
>>> +    }                                                            \
>>> +                                                                 \
>>> +    for (i = index; i != bound; i += step) {                     \
>>> +        if (needs_byteswap(env)) {                               \
>>> +            r[i] = swap(access(env, addr, GETPC()));             \
>>> +        } else {                                                 \
>>> +            r[i] =  access(env, addr, GETPC());                  \
>>> +        }                                                        \
>>> +        addr = addr_add(env, addr, sizeof(type));                \
>>> +    }                                                            \
>>> +    return  *((uint64_t *)r);                                    \
>>> +}
>>
>> This looks more complicated than necessary.
>>
>> (1) In big-endian mode, surely this simplifies to two 64-bit big-endian loads.
>>
>> (2) In little-endian mode, the overhead of accessing memory surely dominates,
>> and therefore we should perform two 64-bit loads and manipulate the data after.
>>
>> AFAICS, this is easiest done by requesting two 64-bit *big-endian* loads, and
>> then swapping bytes.  E.g.
>>
>> uint64_t helper_bswap16x4(uint64_t x)
>> {
>>     uint64_t m = 0x00ff00ff00ff00ffull;
>>     return ((x & m) << 8) | ((x >> 8) & m);
>> }
>>
>> uint64_t helper_bswap32x2(uint64_t x)
>> {
>>     return deposit64(bswap32(x >> 32), 32, 32, bswap32(x));
>> }
>
> To correct myself, this big-endian load really only makes sense for lxvh8x.
> For lxvw4x, a little-endian load with a word swap is fewer operations.  I.e.
>
>    tcg_gen_qemu_ld_i64(t0, addr, ctx->mem_idx, MO_LEQ);
>    tcg_gen_shri_i64(t1, t0, 32);
>    tcg_gen_deposit_i64(dest, t1, t0, 32, 32);


I have following for lxvw4x:

    if (ctx->le_mode) {
        t0 = tcg_temp_new_i64();
        t1 = tcg_temp_new_i64();
        tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
        tcg_gen_shri_i64(t1, t0, 32);
        tcg_gen_deposit_i64(xth, t1, t0, 32, 32);
        tcg_gen_addi_tl(EA, EA, 8);
        tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
        tcg_gen_shri_i64(t1, t0, 32);
        tcg_gen_deposit_i64(xtl, t1, t0, 32, 32);
        tcg_temp_free_i64(t0);
        tcg_temp_free_i64(t1);
    } else {
        tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
        tcg_gen_addi_tl(EA, EA, 8);
        tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
    }

Here is the test code:

__vector uint32_t vrt32;
uint32_t rb32[4] = {0x00010203, 0x20212223, 0x30313233, 0x40414243};
asm("lxvw4x %x0, 0, %1 \n\t" \
     : "=ws"(vrt32) : "r"(&rb32));
printf("VRT32 = "); vec_put_u32(vrt32);

Result On LE:
VRT32 = 40414243 30313233 20212223 00010203

Result On BE:
VRT32 = 03020100 23222120 33323130 43424140

I had have expected the the BE result to be
VRT32 = 00010203 20212223 30313233 40414243

But seems there is something going under the hoods. Am I missing
something here?

I can fix the BE case using following but not sure if that will be
correct !

        tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_Q);
        gen_helper_bswap32x2(xth, xth);
        tcg_gen_addi_tl(EA, EA, 8);
        tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_Q);
        gen_helper_bswap32x2(xtl, xtl);

uint64_t helper_bswap32x2(uint64_t x)
{
    return deposit64((x >> 32), 32, 32, (x));
}

And then I get the expected result:
VRT32 = 00010203 20212223 30313233 40414243

Regards,
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x
  2016-08-10  9:21       ` Nikunj A Dadhania
@ 2016-08-10 10:12         ` Richard Henderson
  2016-08-10 10:33           ` Nikunj A Dadhania
  2016-08-10 15:21           ` Nikunj A Dadhania
  0 siblings, 2 replies; 42+ messages in thread
From: Richard Henderson @ 2016-08-10 10:12 UTC (permalink / raw)
  To: Nikunj A Dadhania, qemu-ppc, david; +Cc: qemu-devel

On 08/10/2016 02:51 PM, Nikunj A Dadhania wrote:
> I can fix the BE case using following but not sure if that will be
> correct !
>
>         tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_Q);
>         gen_helper_bswap32x2(xth, xth);
>         tcg_gen_addi_tl(EA, EA, 8);
>         tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_Q);
>         gen_helper_bswap32x2(xtl, xtl);

This cannot be correct, because it assumes a host-dependent byte ordering.  You 
should be able to see different results depending on a BE or LE host.


> __vector uint32_t vrt32;
> uint32_t rb32[4] = {0x00010203, 0x20212223, 0x30313233, 0x40414243};
> asm("lxvw4x %x0, 0, %1 \n\t" \
>      : "=ws"(vrt32) : "r"(&rb32));
> printf("VRT32 = "); vec_put_u32(vrt32);
>
> Result On LE:
> VRT32 = 40414243 30313233 20212223 00010203
>
> Result On BE:
> VRT32 = 03020100 23222120 33323130 43424140

Did you really recompile the test program for BE and LE, or did you just use a 
different command-line switch to qemu with the same executable image?  I can't 
see how you could possibly get these results for BE.


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x
  2016-08-10 10:12         ` Richard Henderson
@ 2016-08-10 10:33           ` Nikunj A Dadhania
  2016-08-10 15:21           ` Nikunj A Dadhania
  1 sibling, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-10 10:33 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel

Richard Henderson <rth@twiddle.net> writes:

> On 08/10/2016 02:51 PM, Nikunj A Dadhania wrote:
>> I can fix the BE case using following but not sure if that will be
>> correct !
>>
>>         tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_Q);
>>         gen_helper_bswap32x2(xth, xth);
>>         tcg_gen_addi_tl(EA, EA, 8);
>>         tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_Q);
>>         gen_helper_bswap32x2(xtl, xtl);
>
> This cannot be correct, because it assumes a host-dependent byte ordering.  You 
> should be able to see different results depending on a BE or LE host.
>
>
>> __vector uint32_t vrt32;
>> uint32_t rb32[4] = {0x00010203, 0x20212223, 0x30313233, 0x40414243};
>> asm("lxvw4x %x0, 0, %1 \n\t" \
>>      : "=ws"(vrt32) : "r"(&rb32));
>> printf("VRT32 = "); vec_put_u32(vrt32);
>>
>> Result On LE:
>> VRT32 = 40414243 30313233 20212223 00010203
>>
>> Result On BE:
>> VRT32 = 03020100 23222120 33323130 43424140
>
> Did you really recompile the test program for BE and LE, or did you just use a 
> different command-line switch to qemu with the same executable image?

I have two different images one LE and other BE and run it like this;

./ppc64le-linux-user/qemu-ppc64le  -cpu POWER9 test-tcg/LElxvx
./ppc64-linux-user/qemu-ppc64  -cpu POWER9 test-tcg/BElxvx

$ file LElxvx 
LElxvx: ELF 64-bit LSB executable, 64-bit PowerPC or cisco 7500, version 1 (GNU/Linux), statically linked, for GNU/Linux 2.6.32, not stripped
$ file BElxvx 
BElxvx: ELF 64-bit MSB executable, 64-bit PowerPC or cisco 7500, version 1 (GNU/Linux), statically linked, for GNU/Linux 4.4.3, not stripped

> I can't see how you could possibly get these results for BE.

I have been confused since yesterday, so thought of asking you.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x
  2016-08-10 10:12         ` Richard Henderson
  2016-08-10 10:33           ` Nikunj A Dadhania
@ 2016-08-10 15:21           ` Nikunj A Dadhania
  1 sibling, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-10 15:21 UTC (permalink / raw)
  To: Richard Henderson, qemu-ppc, david; +Cc: qemu-devel

Richard Henderson <rth@twiddle.net> writes:

> On 08/10/2016 02:51 PM, Nikunj A Dadhania wrote:
>> I can fix the BE case using following but not sure if that will be
>> correct !
>>
>>         tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_Q);
>>         gen_helper_bswap32x2(xth, xth);
>>         tcg_gen_addi_tl(EA, EA, 8);
>>         tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_Q);
>>         gen_helper_bswap32x2(xtl, xtl);
>
> This cannot be correct, because it assumes a host-dependent byte ordering.  You 
> should be able to see different results depending on a BE or LE host.

Forgot to mention, this was just for the BE part. Because MO_BEQ wasnt
giving result as expected.

if (ctx->le_mode) {
    /* Same as before */
} else {
    tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_Q);
    gen_helper_bswap32x2(xth, xth);
    tcg_gen_addi_tl(EA, EA, 8);
    tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_Q);
    gen_helper_bswap32x2(xtl, xtl);
}

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-09  9:17         ` [Qemu-devel] [Qemu-ppc] " Nikunj A Dadhania
@ 2016-08-12  6:29           ` David Gibson
  2016-08-12  6:43             ` Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: David Gibson @ 2016-08-12  6:29 UTC (permalink / raw)
  To: Nikunj A Dadhania
  Cc: Benjamin Herrenschmidt, qemu-ppc, rth, qemu-devel, Ravi Bangoria

[-- Attachment #1: Type: text/plain, Size: 2263 bytes --]

On Tue, Aug 09, 2016 at 02:47:46PM +0530, Nikunj A Dadhania wrote:
> Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:
> 
> > David Gibson <david@gibson.dropbear.id.au> writes:
> >
> >> [ Unknown signature status ]
> >> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
> >>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
> >>> > +target_ulong helper_darn(uint32_t l)
> >>> > +{
> >>> > +    target_ulong r = UINT64_MAX;
> >>> > +
> >>> > +    if (l <= 2) {
> >>> > +        do {
> >>> > +            r = random() * random();
> >>> > +            r &= l ? UINT64_MAX : UINT32_MAX;
> >>> > +        } while (r == UINT64_MAX);
> >>> > +    }
> >>> > +
> >>> > +    return r;
> >>> > +}
> >>> >  #endif
> >>> 
> >>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
> >>
> >> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
> >> and the Intel random number instruction all use.
> 
> Can you point me to the intel instruction, I couldn't get rdrand
> implementation.

Ah.. turns out no.  I'd assumed it was there and used the same backend
as virtio-rng and H_RANDOM, but I hadn't actually looked at the code,
and now that I'm trying I can't see it either.

> 
> > I was looking at implementing this, AFAIU, I have to get a new RNG
> > object in the initialization routine. We would need an instance of this
> > per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
> > in case of linux-user where should this be initialized.
> >
> > One other place was init_proc_POWER9(), but that will be per cpu and
> > member of CPUPPCState structure. Advantage is it will work for system
> > emulation and linux-user both and we would not need a lock.
> 
> More issues here. Random backend is not compiled for linux-user, adding
> that wasn't difficult, but then rng_backend_request_entropy() uses
> qemu_set_fd_handler() which is a stub for linux-user
> (stubs/set-fd-handler.c)7


Ah.. yeah, not sure how we'll need to handle that.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  6:29           ` David Gibson
@ 2016-08-12  6:43             ` Nikunj A Dadhania
  2016-08-12  6:56               ` David Gibson
  2016-08-12  7:29               ` Thomas Huth
  0 siblings, 2 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-12  6:43 UTC (permalink / raw)
  To: David Gibson
  Cc: Benjamin Herrenschmidt, qemu-ppc, rth, qemu-devel, Ravi Bangoria

David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Tue, Aug 09, 2016 at 02:47:46PM +0530, Nikunj A Dadhania wrote:
>> Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:
>> 
>> > David Gibson <david@gibson.dropbear.id.au> writes:
>> >
>> >> [ Unknown signature status ]
>> >> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
>> >>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>> >>> > +target_ulong helper_darn(uint32_t l)
>> >>> > +{
>> >>> > +    target_ulong r = UINT64_MAX;
>> >>> > +
>> >>> > +    if (l <= 2) {
>> >>> > +        do {
>> >>> > +            r = random() * random();
>> >>> > +            r &= l ? UINT64_MAX : UINT32_MAX;
>> >>> > +        } while (r == UINT64_MAX);
>> >>> > +    }
>> >>> > +
>> >>> > +    return r;
>> >>> > +}
>> >>> >  #endif
>> >>> 
>> >>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
>> >>
>> >> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
>> >> and the Intel random number instruction all use.
>> 
>> Can you point me to the intel instruction, I couldn't get rdrand
>> implementation.
>
> Ah.. turns out no.  I'd assumed it was there and used the same backend
> as virtio-rng and H_RANDOM, but I hadn't actually looked at the code,
> and now that I'm trying I can't see it either.
>
>> 
>> > I was looking at implementing this, AFAIU, I have to get a new RNG
>> > object in the initialization routine. We would need an instance of this
>> > per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
>> > in case of linux-user where should this be initialized.
>> >
>> > One other place was init_proc_POWER9(), but that will be per cpu and
>> > member of CPUPPCState structure. Advantage is it will work for system
>> > emulation and linux-user both and we would not need a lock.
>> 
>> More issues here. Random backend is not compiled for linux-user, adding
>> that wasn't difficult, but then rng_backend_request_entropy() uses
>> qemu_set_fd_handler() which is a stub for linux-user
>> (stubs/set-fd-handler.c)7
>
>
> Ah.. yeah, not sure how we'll need to handle that.

I have sent updated patch, reading from /dev/random. Not sure if that is
allowed in tcg. Works fine though.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  6:43             ` Nikunj A Dadhania
@ 2016-08-12  6:56               ` David Gibson
  2016-08-12  7:13                 ` Nikunj A Dadhania
  2016-08-12  7:29               ` Thomas Huth
  1 sibling, 1 reply; 42+ messages in thread
From: David Gibson @ 2016-08-12  6:56 UTC (permalink / raw)
  To: Nikunj A Dadhania
  Cc: Benjamin Herrenschmidt, qemu-ppc, rth, qemu-devel, Ravi Bangoria

[-- Attachment #1: Type: text/plain, Size: 2945 bytes --]

On Fri, Aug 12, 2016 at 12:13:49PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Tue, Aug 09, 2016 at 02:47:46PM +0530, Nikunj A Dadhania wrote:
> >> Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:
> >> 
> >> > David Gibson <david@gibson.dropbear.id.au> writes:
> >> >
> >> >> [ Unknown signature status ]
> >> >> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
> >> >>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
> >> >>> > +target_ulong helper_darn(uint32_t l)
> >> >>> > +{
> >> >>> > +    target_ulong r = UINT64_MAX;
> >> >>> > +
> >> >>> > +    if (l <= 2) {
> >> >>> > +        do {
> >> >>> > +            r = random() * random();
> >> >>> > +            r &= l ? UINT64_MAX : UINT32_MAX;
> >> >>> > +        } while (r == UINT64_MAX);
> >> >>> > +    }
> >> >>> > +
> >> >>> > +    return r;
> >> >>> > +}
> >> >>> >  #endif
> >> >>> 
> >> >>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
> >> >>
> >> >> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
> >> >> and the Intel random number instruction all use.
> >> 
> >> Can you point me to the intel instruction, I couldn't get rdrand
> >> implementation.
> >
> > Ah.. turns out no.  I'd assumed it was there and used the same backend
> > as virtio-rng and H_RANDOM, but I hadn't actually looked at the code,
> > and now that I'm trying I can't see it either.
> >
> >> 
> >> > I was looking at implementing this, AFAIU, I have to get a new RNG
> >> > object in the initialization routine. We would need an instance of this
> >> > per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
> >> > in case of linux-user where should this be initialized.
> >> >
> >> > One other place was init_proc_POWER9(), but that will be per cpu and
> >> > member of CPUPPCState structure. Advantage is it will work for system
> >> > emulation and linux-user both and we would not need a lock.
> >> 
> >> More issues here. Random backend is not compiled for linux-user, adding
> >> that wasn't difficult, but then rng_backend_request_entropy() uses
> >> qemu_set_fd_handler() which is a stub for linux-user
> >> (stubs/set-fd-handler.c)7
> >
> >
> > Ah.. yeah, not sure how we'll need to handle that.
> 
> I have sent updated patch, reading from /dev/random. Not sure if that is
> allowed in tcg. Works fine though.

Hrm.   From a helper, there's nothing inherently wrong with reading
from /dev/random, AFAIK.  However, it may not work on non-Linux hosts,
and doesn't let you connect it to urandom instead which probably make
it unsuitable to be merged.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  6:56               ` David Gibson
@ 2016-08-12  7:13                 ` Nikunj A Dadhania
  0 siblings, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-12  7:13 UTC (permalink / raw)
  To: David Gibson
  Cc: Benjamin Herrenschmidt, qemu-ppc, rth, qemu-devel, Ravi Bangoria

David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Fri, Aug 12, 2016 at 12:13:49PM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > [ Unknown signature status ]
>> > On Tue, Aug 09, 2016 at 02:47:46PM +0530, Nikunj A Dadhania wrote:
>> >> Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:
>> >> 
>> >> > David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >
>> >> >> [ Unknown signature status ]
>> >> >> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
>> >> >>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>> >> >>> > +target_ulong helper_darn(uint32_t l)
>> >> >>> > +{
>> >> >>> > +    target_ulong r = UINT64_MAX;
>> >> >>> > +
>> >> >>> > +    if (l <= 2) {
>> >> >>> > +        do {
>> >> >>> > +            r = random() * random();
>> >> >>> > +            r &= l ? UINT64_MAX : UINT32_MAX;
>> >> >>> > +        } while (r == UINT64_MAX);
>> >> >>> > +    }
>> >> >>> > +
>> >> >>> > +    return r;
>> >> >>> > +}
>> >> >>> >  #endif
>> >> >>> 
>> >> >>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
>> >> >>
>> >> >> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
>> >> >> and the Intel random number instruction all use.
>> >> 
>> >> Can you point me to the intel instruction, I couldn't get rdrand
>> >> implementation.
>> >
>> > Ah.. turns out no.  I'd assumed it was there and used the same backend
>> > as virtio-rng and H_RANDOM, but I hadn't actually looked at the code,
>> > and now that I'm trying I can't see it either.
>> >
>> >> 
>> >> > I was looking at implementing this, AFAIU, I have to get a new RNG
>> >> > object in the initialization routine. We would need an instance of this
>> >> > per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
>> >> > in case of linux-user where should this be initialized.
>> >> >
>> >> > One other place was init_proc_POWER9(), but that will be per cpu and
>> >> > member of CPUPPCState structure. Advantage is it will work for system
>> >> > emulation and linux-user both and we would not need a lock.
>> >> 
>> >> More issues here. Random backend is not compiled for linux-user, adding
>> >> that wasn't difficult, but then rng_backend_request_entropy() uses
>> >> qemu_set_fd_handler() which is a stub for linux-user
>> >> (stubs/set-fd-handler.c)7
>> >
>> >
>> > Ah.. yeah, not sure how we'll need to handle that.
>> 
>> I have sent updated patch, reading from /dev/random. Not sure if that is
>> allowed in tcg. Works fine though.
>
> Hrm.   From a helper, there's nothing inherently wrong with reading
> from /dev/random, AFAIK.  However, it may not work on non-Linux hosts,
> and doesn't let you connect it to urandom instead which probably make
> it unsuitable to be merged.

Maybe two implementation, one weaker one using random() for non-linux
hosts, other using /dev/random ?

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  6:43             ` Nikunj A Dadhania
  2016-08-12  6:56               ` David Gibson
@ 2016-08-12  7:29               ` Thomas Huth
  2016-08-12  7:39                 ` Nikunj A Dadhania
  1 sibling, 1 reply; 42+ messages in thread
From: Thomas Huth @ 2016-08-12  7:29 UTC (permalink / raw)
  To: Nikunj A Dadhania, David Gibson; +Cc: Ravi Bangoria, qemu-ppc, qemu-devel, rth

On 12.08.2016 08:43, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
>> [ Unknown signature status ]
>> On Tue, Aug 09, 2016 at 02:47:46PM +0530, Nikunj A Dadhania wrote:
>>> Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:
>>>
>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>
>>>>> [ Unknown signature status ]
>>>>> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
>>>>>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>>>>>>> +target_ulong helper_darn(uint32_t l)
>>>>>>> +{
>>>>>>> +    target_ulong r = UINT64_MAX;
>>>>>>> +
>>>>>>> +    if (l <= 2) {
>>>>>>> +        do {
>>>>>>> +            r = random() * random();
>>>>>>> +            r &= l ? UINT64_MAX : UINT32_MAX;
>>>>>>> +        } while (r == UINT64_MAX);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return r;
>>>>>>> +}
>>>>>>>  #endif
>>>>>>
>>>>>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
>>>>>
>>>>> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
>>>>> and the Intel random number instruction all use.
>>>
>>> Can you point me to the intel instruction, I couldn't get rdrand
>>> implementation.
>>
>> Ah.. turns out no.  I'd assumed it was there and used the same backend
>> as virtio-rng and H_RANDOM, but I hadn't actually looked at the code,
>> and now that I'm trying I can't see it either.
>>
>>>
>>>> I was looking at implementing this, AFAIU, I have to get a new RNG
>>>> object in the initialization routine. We would need an instance of this
>>>> per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
>>>> in case of linux-user where should this be initialized.
>>>>
>>>> One other place was init_proc_POWER9(), but that will be per cpu and
>>>> member of CPUPPCState structure. Advantage is it will work for system
>>>> emulation and linux-user both and we would not need a lock.
>>>
>>> More issues here. Random backend is not compiled for linux-user, adding
>>> that wasn't difficult, but then rng_backend_request_entropy() uses
>>> qemu_set_fd_handler() which is a stub for linux-user
>>> (stubs/set-fd-handler.c)7
>>
>>
>> Ah.. yeah, not sure how we'll need to handle that.
> 
> I have sent updated patch, reading from /dev/random. Not sure if that is
> allowed in tcg. Works fine though.

You can not rely on /dev/random for this job, since it might block. So
your guest would stop executing when there is not enough random data
available in the host, and I think that's quite a bad behavior...

 Thomas

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  7:29               ` Thomas Huth
@ 2016-08-12  7:39                 ` Nikunj A Dadhania
  2016-08-12  7:55                   ` Thomas Huth
  0 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-12  7:39 UTC (permalink / raw)
  To: Thomas Huth, David Gibson; +Cc: Ravi Bangoria, qemu-ppc, qemu-devel, rth

Thomas Huth <thuth@redhat.com> writes:

> On 12.08.2016 08:43, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>>> [ Unknown signature status ]
>>> On Tue, Aug 09, 2016 at 02:47:46PM +0530, Nikunj A Dadhania wrote:
>>>> Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:
>>>>
>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>
>>>>>> [ Unknown signature status ]
>>>>>> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
>>>>>>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>>>>>>>> +target_ulong helper_darn(uint32_t l)
>>>>>>>> +{
>>>>>>>> +    target_ulong r = UINT64_MAX;
>>>>>>>> +
>>>>>>>> +    if (l <= 2) {
>>>>>>>> +        do {
>>>>>>>> +            r = random() * random();
>>>>>>>> +            r &= l ? UINT64_MAX : UINT32_MAX;
>>>>>>>> +        } while (r == UINT64_MAX);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    return r;
>>>>>>>> +}
>>>>>>>>  #endif
>>>>>>>
>>>>>>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
>>>>>>
>>>>>> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
>>>>>> and the Intel random number instruction all use.
>>>>
>>>> Can you point me to the intel instruction, I couldn't get rdrand
>>>> implementation.
>>>
>>> Ah.. turns out no.  I'd assumed it was there and used the same backend
>>> as virtio-rng and H_RANDOM, but I hadn't actually looked at the code,
>>> and now that I'm trying I can't see it either.
>>>
>>>>
>>>>> I was looking at implementing this, AFAIU, I have to get a new RNG
>>>>> object in the initialization routine. We would need an instance of this
>>>>> per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
>>>>> in case of linux-user where should this be initialized.
>>>>>
>>>>> One other place was init_proc_POWER9(), but that will be per cpu and
>>>>> member of CPUPPCState structure. Advantage is it will work for system
>>>>> emulation and linux-user both and we would not need a lock.
>>>>
>>>> More issues here. Random backend is not compiled for linux-user, adding
>>>> that wasn't difficult, but then rng_backend_request_entropy() uses
>>>> qemu_set_fd_handler() which is a stub for linux-user
>>>> (stubs/set-fd-handler.c)7
>>>
>>>
>>> Ah.. yeah, not sure how we'll need to handle that.
>> 
>> I have sent updated patch, reading from /dev/random. Not sure if that is
>> allowed in tcg. Works fine though.
>
> You can not rely on /dev/random for this job, since it might block. So
> your guest would stop executing when there is not enough random data
> available in the host, and I think that's quite a bad behavior...

Hmm.. rng-random does use this, but it might have way to time out probably:

backends/rng-random.c:    s->filename = g_strdup("/dev/random");

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  7:39                 ` Nikunj A Dadhania
@ 2016-08-12  7:55                   ` Thomas Huth
  2016-08-12  8:41                     ` Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: Thomas Huth @ 2016-08-12  7:55 UTC (permalink / raw)
  To: Nikunj A Dadhania, David Gibson; +Cc: Ravi Bangoria, qemu-ppc, qemu-devel, rth

On 12.08.2016 09:39, Nikunj A Dadhania wrote:
> Thomas Huth <thuth@redhat.com> writes:
> 
>> On 12.08.2016 08:43, Nikunj A Dadhania wrote:
>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>
>>>> [ Unknown signature status ]
>>>> On Tue, Aug 09, 2016 at 02:47:46PM +0530, Nikunj A Dadhania wrote:
>>>>> Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:
>>>>>
>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>
>>>>>>> [ Unknown signature status ]
>>>>>>> On Mon, Aug 08, 2016 at 07:33:37AM +1000, Benjamin Herrenschmidt wrote:
>>>>>>>> On Sun, 2016-08-07 at 23:06 +0530, Nikunj A Dadhania wrote:
>>>>>>>>> +target_ulong helper_darn(uint32_t l)
>>>>>>>>> +{
>>>>>>>>> +    target_ulong r = UINT64_MAX;
>>>>>>>>> +
>>>>>>>>> +    if (l <= 2) {
>>>>>>>>> +        do {
>>>>>>>>> +            r = random() * random();
>>>>>>>>> +            r &= l ? UINT64_MAX : UINT32_MAX;
>>>>>>>>> +        } while (r == UINT64_MAX);
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    return r;
>>>>>>>>> +}
>>>>>>>>>  #endif
>>>>>>>>
>>>>>>>> Isn't this a bit week ? Look at the implementation of H_RANDOM...
>>>>>>>
>>>>>>> Indeed, you should be using the rng backend that H_RANDOM, virtio-rng
>>>>>>> and the Intel random number instruction all use.
>>>>>
>>>>> Can you point me to the intel instruction, I couldn't get rdrand
>>>>> implementation.
>>>>
>>>> Ah.. turns out no.  I'd assumed it was there and used the same backend
>>>> as virtio-rng and H_RANDOM, but I hadn't actually looked at the code,
>>>> and now that I'm trying I can't see it either.
>>>>
>>>>>
>>>>>> I was looking at implementing this, AFAIU, I have to get a new RNG
>>>>>> object in the initialization routine. We would need an instance of this
>>>>>> per machine. So for pseries I can add in ppc_spapr_init(). I am not sure
>>>>>> in case of linux-user where should this be initialized.
>>>>>>
>>>>>> One other place was init_proc_POWER9(), but that will be per cpu and
>>>>>> member of CPUPPCState structure. Advantage is it will work for system
>>>>>> emulation and linux-user both and we would not need a lock.
>>>>>
>>>>> More issues here. Random backend is not compiled for linux-user, adding
>>>>> that wasn't difficult, but then rng_backend_request_entropy() uses
>>>>> qemu_set_fd_handler() which is a stub for linux-user
>>>>> (stubs/set-fd-handler.c)7
>>>>
>>>>
>>>> Ah.. yeah, not sure how we'll need to handle that.
>>>
>>> I have sent updated patch, reading from /dev/random. Not sure if that is
>>> allowed in tcg. Works fine though.
>>
>> You can not rely on /dev/random for this job, since it might block. So
>> your guest would stop executing when there is not enough random data
>> available in the host, and I think that's quite a bad behavior...
> 
> Hmm.. rng-random does use this, but it might have way to time out probably:
> 
> backends/rng-random.c:    s->filename = g_strdup("/dev/random");

This is only the default value, which is likely suitable for virtio-rng,
but not for something like this instruction (or the H_RANDOM hypercall).
You can set the filename property to another file when instantiating it,
like:

 qemu-system-xxx ... -object rng-random,filename=/dev/hwrng,id=rng0 ...

That's the whole point these backends - you give the users the
possibility to chose the right source of entropy.

For example, on the POWER8 machine that I have here, /dev/random blocks
after a couple of bytes, you can see that easily by typing something
like "hexdump /dev/random". It only delivers more random data when I run
"rngd -r /dev/hwrng" in the background.

 Thomas

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  7:55                   ` Thomas Huth
@ 2016-08-12  8:41                     ` Nikunj A Dadhania
  2016-08-12  9:29                       ` Thomas Huth
  0 siblings, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-12  8:41 UTC (permalink / raw)
  To: Thomas Huth, David Gibson; +Cc: Ravi Bangoria, qemu-ppc, qemu-devel, rth

Thomas Huth <thuth@redhat.com> writes:
>>> You can not rely on /dev/random for this job, since it might block. So
>>> your guest would stop executing when there is not enough random data
>>> available in the host, and I think that's quite a bad behavior...
>> 
>> Hmm.. rng-random does use this, but it might have way to time out probably:
>> 
>> backends/rng-random.c:    s->filename = g_strdup("/dev/random");
>
> This is only the default value, which is likely suitable for
> virtio-rng,

Right, we will need to find maybe an algorithm that can give us
sufficient distribution, or use random() with time-of-day and get some
function.

MIPS does have some thing in cpu_mips_get_random(). Its for 32-bit,
probably can be extended to 64-bit. Mathematically, I am not sure. :-)

> but not for something like this instruction (or the H_RANDOM hypercall).
> You can set the filename property to another file when instantiating it,
> like:
>
>  qemu-system-xxx ... -object rng-random,filename=/dev/hwrng,id=rng0 ...
>
> That's the whole point these backends - you give the users the
> possibility to chose the right source of entropy.
>
> For example, on the POWER8 machine that I have here, /dev/random blocks
> after a couple of bytes, you can see that easily by typing something
> like "hexdump /dev/random". It only delivers more random data when I run
> "rngd -r /dev/hwrng" in the background.


Regards,
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  8:41                     ` Nikunj A Dadhania
@ 2016-08-12  9:29                       ` Thomas Huth
  2016-08-12  9:51                         ` Nikunj A Dadhania
  2016-08-15 10:28                         ` David Gibson
  0 siblings, 2 replies; 42+ messages in thread
From: Thomas Huth @ 2016-08-12  9:29 UTC (permalink / raw)
  To: Nikunj A Dadhania, David Gibson
  Cc: Ravi Bangoria, qemu-ppc, qemu-devel, rth, Amit Shah

On 12.08.2016 10:41, Nikunj A Dadhania wrote:
> Thomas Huth <thuth@redhat.com> writes:
>>>> You can not rely on /dev/random for this job, since it might block. So
>>>> your guest would stop executing when there is not enough random data
>>>> available in the host, and I think that's quite a bad behavior...
>>>
>>> Hmm.. rng-random does use this, but it might have way to time out probably:
>>>
>>> backends/rng-random.c:    s->filename = g_strdup("/dev/random");
>>
>> This is only the default value, which is likely suitable for
>> virtio-rng,
> 
> Right, we will need to find maybe an algorithm that can give us
> sufficient distribution, or use random() with time-of-day and get some
> function.

First, hands off rand() and random()! These are certainly not suited for
implementing an opcode that is likely thought to deliver
cryptographically suitable random data!

This topic is really not that easy, I had the same problems also when
thinking about the implementation of H_RANDOM. You need a source that
gives you cryptographically suitable data, you've got to make sure that
your algorithm does not block the guest, and you've got to do it in a
somewhat system-independent manner (i.e. QEMU should still run on
Windows and Mac OS X afterwards). That's why I came to the conclusion
that it's best to use the rng backends for this job when implementing
the H_RANDOM stuff, and let the users decide which source is best suited
on their system.

> MIPS does have some thing in cpu_mips_get_random(). Its for 32-bit,
> probably can be extended to 64-bit. Mathematically, I am not sure. :-)

That's certainly not an algorithm which is suitable for crypto data!

>> but not for something like this instruction (or the H_RANDOM hypercall).
>> You can set the filename property to another file when instantiating it,
>> like:
>>
>>  qemu-system-xxx ... -object rng-random,filename=/dev/hwrng,id=rng0 ...
>>
>> That's the whole point these backends - you give the users the
>> possibility to chose the right source of entropy.

Of course this is getting ugly here, since a CPU instruction is not as
optional as the H_RANDOM hypercall for example.
So I basically see to ways to follow here:

1) Implement it similar to the H_RANDOM hypercall, i.e. make it optional
and let the user decide the right source of entropy with a "-object
rng-random" backend. If such a backend has not been given on the command
line, let the "darn" instruction simply always return 0xFFFFFFFFFFFFFFFF
to signal an error condition - the guest is then forced to use another
way to get random data instead.

2) Try to introduce a generic get_crypto_random_data() function to QEMU
that can be called to get cryptographically suitable random data in any
case. The function then should probe for /dev/random, /dev/hwrng or
other suitable sources on its own when being called for the first time
(might be some work to get this running on Windows and Mac OS X, too).
You then still have to deal with a blocking /dev/random device, though,
so maybe the data with the good entropy should not be returned directly
but rather used to seed a good deterministic RNG instead, like the one
from the glib (have a look at the g_rand_int() function and friends).
Anyway, someone with a good knowledge of crypto stuff should review such
an implementation first before we include that, I think.

 Thomas

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  9:29                       ` Thomas Huth
@ 2016-08-12  9:51                         ` Nikunj A Dadhania
  2016-08-12 13:26                           ` Richard Henderson
  2016-08-15 10:28                         ` David Gibson
  1 sibling, 1 reply; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-12  9:51 UTC (permalink / raw)
  To: Thomas Huth, David Gibson
  Cc: Ravi Bangoria, qemu-ppc, qemu-devel, rth, Amit Shah

Thomas Huth <thuth@redhat.com> writes:

> On 12.08.2016 10:41, Nikunj A Dadhania wrote:
>> Thomas Huth <thuth@redhat.com> writes:
>>>>> You can not rely on /dev/random for this job, since it might block. So
>>>>> your guest would stop executing when there is not enough random data
>>>>> available in the host, and I think that's quite a bad behavior...
>>>>
>>>> Hmm.. rng-random does use this, but it might have way to time out probably:
>>>>
>>>> backends/rng-random.c:    s->filename = g_strdup("/dev/random");
>>>
>>> This is only the default value, which is likely suitable for
>>> virtio-rng,
>> 
>> Right, we will need to find maybe an algorithm that can give us
>> sufficient distribution, or use random() with time-of-day and get some
>> function.
>
> First, hands off rand() and random()! These are certainly not suited for
> implementing an opcode that is likely thought to deliver
> cryptographically suitable random data!

Sure :-)


> This topic is really not that easy, I had the same problems also when
> thinking about the implementation of H_RANDOM. You need a source that
> gives you cryptographically suitable data, you've got to make sure that
> your algorithm does not block the guest, and you've got to do it in a
> somewhat system-independent manner (i.e. QEMU should still run on
> Windows and Mac OS X afterwards). That's why I came to the conclusion
> that it's best to use the rng backends for this job when implementing
> the H_RANDOM stuff, and let the users decide which source is best suited
> on their system.

Right, that was the first attempt, as we need to cater to linux-user as
well, that does not have bells and whistles of system. Many
functionality missing, right from the rng-backend isnt part of the
linux-user emulator.

>> MIPS does have some thing in cpu_mips_get_random(). Its for 32-bit,
>> probably can be extended to 64-bit. Mathematically, I am not sure. :-)
>
> That's certainly not an algorithm which is suitable for crypto data!

Thanks for confirming.

>>> but not for something like this instruction (or the H_RANDOM hypercall).
>>> You can set the filename property to another file when instantiating it,
>>> like:
>>>
>>>  qemu-system-xxx ... -object rng-random,filename=/dev/hwrng,id=rng0 ...
>>>
>>> That's the whole point these backends - you give the users the
>>> possibility to chose the right source of entropy.
>
> Of course this is getting ugly here, since a CPU instruction is not as
> optional as the H_RANDOM hypercall for example.
> So I basically see to ways to follow here:
>
> 1) Implement it similar to the H_RANDOM hypercall, i.e. make it optional
> and let the user decide the right source of entropy with a "-object
> rng-random" backend. If such a backend has not been given on the command
> line, let the "darn" instruction simply always return 0xFFFFFFFFFFFFFFFF
> to signal an error condition - the guest is then forced to use another
> way to get random data instead.

We might need to differentiate between linux-user and system maybe to
use the rng backend.

> 2) Try to introduce a generic get_crypto_random_data() function to QEMU
> that can be called to get cryptographically suitable random data in any
> case. The function then should probe for /dev/random, /dev/hwrng or
> other suitable sources on its own when being called for the first time

Yes, I can try that.

> (might be some work to get this running on Windows and Mac OS X, too).

I am not to sure about this. Would need some help.

> You then still have to deal with a blocking /dev/random device, though,
> so maybe the data with the good entropy should not be returned directly
> but rather used to seed a good deterministic RNG instead, like the one
> from the glib (have a look at the g_rand_int() function and friends).
> Anyway, someone with a good knowledge of crypto stuff should review such
> an implementation first before we include that, I think.

I can drop this from my patch series, until we have clarity here.

Regards
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  9:51                         ` Nikunj A Dadhania
@ 2016-08-12 13:26                           ` Richard Henderson
  2016-08-12 13:39                             ` Nikunj A Dadhania
  0 siblings, 1 reply; 42+ messages in thread
From: Richard Henderson @ 2016-08-12 13:26 UTC (permalink / raw)
  To: Nikunj A Dadhania, Thomas Huth, David Gibson
  Cc: Ravi Bangoria, qemu-ppc, qemu-devel, Amit Shah

On 08/12/2016 10:51 AM, Nikunj A Dadhania wrote:
> I can drop this from my patch series, until we have clarity here.

Why don't you just drop the internals from helper_darn*, leaving both to always 
return -1.  That way we can keep the translation work that you did.


r~

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12 13:26                           ` Richard Henderson
@ 2016-08-12 13:39                             ` Nikunj A Dadhania
  0 siblings, 0 replies; 42+ messages in thread
From: Nikunj A Dadhania @ 2016-08-12 13:39 UTC (permalink / raw)
  To: Richard Henderson, Thomas Huth, David Gibson
  Cc: Ravi Bangoria, qemu-ppc, qemu-devel, Amit Shah

Richard Henderson <rth@twiddle.net> writes:

> On 08/12/2016 10:51 AM, Nikunj A Dadhania wrote:
>> I can drop this from my patch series, until we have clarity here.
>
> Why don't you just drop the internals from helper_darn*, leaving both to always 
> return -1.  That way we can keep the translation work that you did.

Sure, will add it with some comments.

Regards,
Nikunj

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/6] target-ppc: Implement darn instruction
  2016-08-12  9:29                       ` Thomas Huth
  2016-08-12  9:51                         ` Nikunj A Dadhania
@ 2016-08-15 10:28                         ` David Gibson
  1 sibling, 0 replies; 42+ messages in thread
From: David Gibson @ 2016-08-15 10:28 UTC (permalink / raw)
  To: Thomas Huth
  Cc: Nikunj A Dadhania, Ravi Bangoria, qemu-ppc, qemu-devel, rth, Amit Shah

[-- Attachment #1: Type: text/plain, Size: 4671 bytes --]

On Fri, Aug 12, 2016 at 11:29:17AM +0200, Thomas Huth wrote:
> On 12.08.2016 10:41, Nikunj A Dadhania wrote:
> > Thomas Huth <thuth@redhat.com> writes:
> >>>> You can not rely on /dev/random for this job, since it might block. So
> >>>> your guest would stop executing when there is not enough random data
> >>>> available in the host, and I think that's quite a bad behavior...
> >>>
> >>> Hmm.. rng-random does use this, but it might have way to time out probably:
> >>>
> >>> backends/rng-random.c:    s->filename = g_strdup("/dev/random");
> >>
> >> This is only the default value, which is likely suitable for
> >> virtio-rng,
> > 
> > Right, we will need to find maybe an algorithm that can give us
> > sufficient distribution, or use random() with time-of-day and get some
> > function.
> 
> First, hands off rand() and random()! These are certainly not suited for
> implementing an opcode that is likely thought to deliver
> cryptographically suitable random data!
> 
> This topic is really not that easy, I had the same problems also when
> thinking about the implementation of H_RANDOM. You need a source that
> gives you cryptographically suitable data, you've got to make sure that
> your algorithm does not block the guest, and you've got to do it in a
> somewhat system-independent manner (i.e. QEMU should still run on
> Windows and Mac OS X afterwards). That's why I came to the conclusion
> that it's best to use the rng backends for this job when implementing
> the H_RANDOM stuff, and let the users decide which source is best suited
> on their system.
> 
> > MIPS does have some thing in cpu_mips_get_random(). Its for 32-bit,
> > probably can be extended to 64-bit. Mathematically, I am not sure. :-)
> 
> That's certainly not an algorithm which is suitable for crypto data!
> 
> >> but not for something like this instruction (or the H_RANDOM hypercall).
> >> You can set the filename property to another file when instantiating it,
> >> like:
> >>
> >>  qemu-system-xxx ... -object rng-random,filename=/dev/hwrng,id=rng0 ...
> >>
> >> That's the whole point these backends - you give the users the
> >> possibility to chose the right source of entropy.
> 
> Of course this is getting ugly here, since a CPU instruction is not as
> optional as the H_RANDOM hypercall for example.
> So I basically see to ways to follow here:
> 
> 1) Implement it similar to the H_RANDOM hypercall, i.e. make it optional
> and let the user decide the right source of entropy with a "-object
> rng-random" backend. If such a backend has not been given on the command
> line, let the "darn" instruction simply always return 0xFFFFFFFFFFFFFFFF
> to signal an error condition - the guest is then forced to use another
> way to get random data instead.
> 
> 2) Try to introduce a generic get_crypto_random_data() function to QEMU
> that can be called to get cryptographically suitable random data in any
> case. The function then should probe for /dev/random, /dev/hwrng or
> other suitable sources on its own when being called for the first time
> (might be some work to get this running on Windows and Mac OS X, too).
> You then still have to deal with a blocking /dev/random device, though,
> so maybe the data with the good entropy should not be returned directly
> but rather used to seed a good deterministic RNG instead, like the one
> from the glib (have a look at the g_rand_int() function and friends).
> Anyway, someone with a good knowledge of crypto stuff should review such
> an implementation first before we include that, I think.

So, a couple of options we have here.  The error condition option for
the instruction helps a fair bit.

Option 1 is to to have helper_darn() wait for a short timeout to get
random data, if it doesn't get something in time, return an error.

To reduce the latency further, you could have a one word buffer with
the next random number.  A background thread would attempt to refill
that from the rng backend.  darn would return the number in the
buffer, or an error if it hasn't been filled yet.  This implementation
could be reasonably done without a helper, I think, which might be an
advantage.

Unless there's some clever solution that x86 have already come up with
for rdrand in linux-user, I think we want to find some way to
construct the rng backend there as well.  Falling back to a different
rng for user only seems very risky to me.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2016-08-15 10:26 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-07 17:36 [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 Nikunj A Dadhania
2016-08-07 17:36 ` [Qemu-devel] [PATCH 1/6] target-ppc: add xxspltib instruction Nikunj A Dadhania
2016-08-08  4:43   ` Richard Henderson
2016-08-08  6:19     ` Nikunj A Dadhania
2016-08-07 17:36 ` [Qemu-devel] [PATCH 2/6] target-ppc: Implement darn instruction Nikunj A Dadhania
2016-08-07 21:33   ` Benjamin Herrenschmidt
2016-08-08  1:52     ` Nikunj A Dadhania
2016-08-08  3:22       ` Benjamin Herrenschmidt
2016-08-08  3:32         ` Nikunj A Dadhania
2016-08-09  3:28     ` David Gibson
2016-08-09  4:54       ` Nikunj A Dadhania
2016-08-09  9:17         ` [Qemu-devel] [Qemu-ppc] " Nikunj A Dadhania
2016-08-12  6:29           ` David Gibson
2016-08-12  6:43             ` Nikunj A Dadhania
2016-08-12  6:56               ` David Gibson
2016-08-12  7:13                 ` Nikunj A Dadhania
2016-08-12  7:29               ` Thomas Huth
2016-08-12  7:39                 ` Nikunj A Dadhania
2016-08-12  7:55                   ` Thomas Huth
2016-08-12  8:41                     ` Nikunj A Dadhania
2016-08-12  9:29                       ` Thomas Huth
2016-08-12  9:51                         ` Nikunj A Dadhania
2016-08-12 13:26                           ` Richard Henderson
2016-08-12 13:39                             ` Nikunj A Dadhania
2016-08-15 10:28                         ` David Gibson
2016-08-08  5:01   ` [Qemu-devel] " Richard Henderson
2016-08-08  6:20     ` Nikunj A Dadhania
2016-08-07 17:36 ` [Qemu-devel] [PATCH 3/6] target-ppc: add lxsi[bw]zx instruction Nikunj A Dadhania
2016-08-08  5:08   ` Richard Henderson
2016-08-08  6:21     ` Nikunj A Dadhania
2016-08-07 17:36 ` [Qemu-devel] [PATCH 4/6] target-ppc: add stxsi[bh]x instruction Nikunj A Dadhania
2016-08-08  5:08   ` Richard Henderson
2016-08-07 17:36 ` [Qemu-devel] [PATCH 5/6] target-ppc: add lxvb16x and lxvh8x Nikunj A Dadhania
2016-08-08  5:27   ` Richard Henderson
2016-08-08  6:35     ` Richard Henderson
2016-08-10  9:21       ` Nikunj A Dadhania
2016-08-10 10:12         ` Richard Henderson
2016-08-10 10:33           ` Nikunj A Dadhania
2016-08-10 15:21           ` Nikunj A Dadhania
2016-08-07 17:36 ` [Qemu-devel] [PATCH 6/6] target-ppc: add stxvb16x and stxvh8x Nikunj A Dadhania
2016-08-08  5:27   ` Richard Henderson
2016-08-09  3:30 ` [Qemu-devel] [PATCH 0/6] POWER9 TCG enablements - part4 David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.