qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v8 1/2] target/s390x: support SHA-512 extensions
@ 2022-09-21 10:07 Jason A. Donenfeld
  2022-09-21 10:07 ` [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction Jason A. Donenfeld
                   ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-21 10:07 UTC (permalink / raw)
  To: qemu-s390x, qemu-devel
  Cc: Jason A. Donenfeld, Thomas Huth, David Hildenbrand,
	Christian Borntraeger, Richard Henderson, Cornelia Huck,
	Harald Freudenberger, Holger Dengler

In order to fully support MSA_EXT_5, we have to support the SHA-512
special instructions. So implement those.

The implementation began as something TweetNacl-like, and then was
adjusted to be useful here. It's not very beautiful, but it is quite
short and compact, which is what we're going for.

Cc: Thomas Huth <thuth@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: Holger Dengler <dengler@linux.ibm.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 target/s390x/gen-features.c      |   3 +
 target/s390x/tcg/crypto_helper.c | 163 +++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 1558c52626..14a7f2ae90 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -751,6 +751,9 @@ static uint16_t qemu_V7_0[] = {
  */
 static uint16_t qemu_MAX[] = {
     S390_FEAT_VECTOR_ENH2,
+    S390_FEAT_MSA_EXT_5,
+    S390_FEAT_KIMD_SHA_512,
+    S390_FEAT_KLMD_SHA_512,
 };
 
 /****** END FEATURE DEFS ******/
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 138d9e7ad9..02073ec70b 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -1,10 +1,12 @@
 /*
  *  s390x crypto helpers
  *
+ *  Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  *  Copyright (c) 2017 Red Hat Inc
  *
  *  Authors:
  *   David Hildenbrand <david@redhat.com>
+ *   Jason A. Donenfeld <Jason@zx2c4.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +20,159 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
+static uint64_t R(uint64_t x, int c) { return (x >> c) | (x << (64 - c)); }
+static uint64_t Ch(uint64_t x, uint64_t y, uint64_t z) { return (x & y) ^ (~x & z); }
+static uint64_t Maj(uint64_t x, uint64_t y, uint64_t z) { return (x & y) ^ (x & z) ^ (y & z); }
+static uint64_t Sigma0(uint64_t x) { return R(x, 28) ^ R(x, 34) ^ R(x, 39); }
+static uint64_t Sigma1(uint64_t x) { return R(x, 14) ^ R(x, 18) ^ R(x, 41); }
+static uint64_t sigma0(uint64_t x) { return R(x, 1) ^ R(x, 8) ^ (x >> 7); }
+static uint64_t sigma1(uint64_t x) { return R(x, 19) ^ R(x, 61) ^ (x >> 6); }
+
+static const uint64_t K[80] = {
+    0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
+    0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+    0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL,
+    0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+    0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL,
+    0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+    0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL,
+    0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+    0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL,
+    0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+    0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL,
+    0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+    0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL,
+    0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+    0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL,
+    0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+    0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL,
+    0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+    0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL,
+    0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+    0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL,
+    0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+    0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL,
+    0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+    0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL,
+    0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+    0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+static int kimd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t parameter_block,
+                       uint64_t *message_reg, uint64_t *len_reg, uint8_t *stack_buffer)
+{
+    enum { MAX_BLOCKS_PER_RUN = 64 }; /* This is arbitrary, just to keep interactivity. */
+    uint64_t z[8], b[8], a[8], w[16], t;
+    uint64_t message = message_reg ? *message_reg : 0;
+    uint64_t len = *len_reg, processed = 0, addr;
+    int i, j, message_reg_len = 64, blocks = 0, cc = 0;
+
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        len = (uint32_t)len;
+        message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
+    }
+
+    for (i = 0; i < 8; ++i) {
+        addr = wrap_address(env, parameter_block + 8 * i);
+        z[i] = a[i] = cpu_ldq_be_data_ra(env, addr, ra);
+    }
+
+    while (len >= 128) {
+        if (++blocks > MAX_BLOCKS_PER_RUN) {
+            cc = 3;
+            break;
+        }
+
+        for (i = 0; i < 16; ++i) {
+            if (message) {
+                addr = wrap_address(env, message + 8 * i);
+                w[i] = cpu_ldq_be_data_ra(env, addr, ra);
+            } else {
+                w[i] = be64_to_cpu(((uint64_t *)stack_buffer)[i]);
+            }
+        }
+
+        for (i = 0; i < 80; ++i) {
+            for (j = 0; j < 8; ++j) {
+                b[j] = a[j];
+            }
+            t = a[7] + Sigma1(a[4]) + Ch(a[4], a[5], a[6]) + K[i] + w[i % 16];
+            b[7] = t + Sigma0(a[0]) + Maj(a[0], a[1], a[2]);
+            b[3] += t;
+            for (j = 0; j < 8; ++j) {
+                a[(j + 1) % 8] = b[j];
+            }
+            if (i % 16 == 15) {
+                for (j = 0; j < 16; ++j) {
+                    w[j] += w[(j + 9) % 16] + sigma0(w[(j + 1) % 16]) + sigma1(w[(j + 14) % 16]);
+                }
+            }
+        }
+
+        for (i = 0; i < 8; ++i) {
+            a[i] += z[i];
+            z[i] = a[i];
+        }
+
+        if (message) {
+            message += 128;
+        } else {
+            stack_buffer += 128;
+        }
+        len -= 128;
+        processed += 128;
+    }
+
+    for (i = 0; i < 8; ++i) {
+        addr = wrap_address(env, parameter_block + 8 * i);
+        cpu_stq_be_data_ra(env, addr, z[i], ra);
+    }
+
+    if (message_reg) {
+        *message_reg = deposit64(*message_reg, 0, message_reg_len, message);
+    }
+    *len_reg -= processed;
+    return cc;
+}
+
+static int klmd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t parameter_block,
+                        uint64_t *message_reg, uint64_t *len_reg)
+{
+    uint8_t x[256];
+    uint64_t i, message, len, addr;
+    int j, message_reg_len = 64, cc;
+
+    cc = kimd_sha512(env, ra, parameter_block, message_reg, len_reg, NULL);
+    if (cc) {
+        return cc;
+    }
+
+    message = *message_reg;
+    len = *len_reg;
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        len = (uint32_t)len;
+        message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
+    }
+
+    for (i = 0; i < len; ++i) {
+        addr = wrap_address(env, message + i);
+        x[i] = cpu_ldub_data_ra(env, addr, ra);
+    }
+    memset(x + i, 0, sizeof(x) - i);
+    x[i] = 128;
+    i = i < 112 ? 128 : 256;
+    for (j = 0; j < 16; ++j) {
+        addr = wrap_address(env, parameter_block + 64 + j);
+        x[i - 16 + j] = cpu_ldub_data_ra(env, addr, ra);
+    }
+    if (kimd_sha512(env, ra, parameter_block, NULL, &i, x)) {
+        g_assert_not_reached(); /* It must handle at least 2 blocks. */
+    }
+    *message_reg = deposit64(*message_reg, 0, message_reg_len, message + len);
+    *len_reg -= len;
+    return 0;
+}
+
 uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
                      uint32_t type)
 {
@@ -52,6 +207,14 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
             cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
         }
         break;
+    case 3: /* CPACF_*_SHA_512 */
+        switch (type) {
+        case S390_FEAT_TYPE_KIMD:
+            return kimd_sha512(env, ra, env->regs[1], &env->regs[r2], &env->regs[r2 + 1], NULL);
+        case S390_FEAT_TYPE_KLMD:
+            return klmd_sha512(env, ra, env->regs[1], &env->regs[r2], &env->regs[r2 + 1]);
+        }
+        break;
     default:
         /* we don't implement any other subfunction yet */
         g_assert_not_reached();
-- 
2.37.3



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction
  2022-09-21 10:07 [PATCH v8 1/2] target/s390x: support SHA-512 extensions Jason A. Donenfeld
@ 2022-09-21 10:07 ` Jason A. Donenfeld
  2022-09-22 13:09   ` David Hildenbrand
                     ` (2 more replies)
  2022-09-22 13:07 ` [PATCH v8 1/2] target/s390x: support SHA-512 extensions David Hildenbrand
  2022-09-22 15:38 ` [PATCH v8.1 " David Hildenbrand
  2 siblings, 3 replies; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-21 10:07 UTC (permalink / raw)
  To: qemu-s390x, qemu-devel
  Cc: Jason A. Donenfeld, Thomas Huth, David Hildenbrand,
	Christian Borntraeger, Richard Henderson, Cornelia Huck,
	Harald Freudenberger, Holger Dengler

In order for hosts running inside of TCG to initialize the kernel's
random number generator, we should support the PRNO_TRNG instruction,
backed in the usual way with the qemu_guest_getrandom helper. This is
confirmed working on Linux 5.19.

Cc: Thomas Huth <thuth@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: Holger Dengler <dengler@linux.ibm.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 target/s390x/gen-features.c      |  1 +
 target/s390x/tcg/crypto_helper.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 14a7f2ae90..aaade67574 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -754,6 +754,7 @@ static uint16_t qemu_MAX[] = {
     S390_FEAT_MSA_EXT_5,
     S390_FEAT_KIMD_SHA_512,
     S390_FEAT_KLMD_SHA_512,
+    S390_FEAT_PRNO_TRNG,
 };
 
 /****** END FEATURE DEFS ******/
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 02073ec70b..0daa9a2dd9 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -14,6 +14,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
+#include "qemu/guest-random.h"
 #include "s390x-internal.h"
 #include "tcg_s390x.h"
 #include "exec/helper-proto.h"
@@ -173,6 +174,31 @@ static int klmd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t parameter_bloc
     return 0;
 }
 
+static void fill_buf_random(CPUS390XState *env, uintptr_t ra,
+                            uint64_t *buf_reg, uint64_t *len_reg)
+{
+    uint8_t tmp[256];
+    uint64_t len = *len_reg;
+    int buf_reg_len = 64;
+
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        len = (uint32_t)len;
+        buf_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
+    }
+
+    while (len) {
+        size_t block = MIN(len, sizeof(tmp));
+
+        qemu_guest_getrandom_nofail(tmp, block);
+        for (size_t i = 0; i < block; ++i) {
+            cpu_stb_data_ra(env, wrap_address(env, *buf_reg), tmp[i], ra);
+            *buf_reg = deposit64(*buf_reg, 0, buf_reg_len, *buf_reg + 1);
+            --*len_reg;
+        }
+        len -= block;
+    }
+}
+
 uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
                      uint32_t type)
 {
@@ -215,6 +241,10 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
             return klmd_sha512(env, ra, env->regs[1], &env->regs[r2], &env->regs[r2 + 1]);
         }
         break;
+    case 114: /* CPACF_PRNO_TRNG */
+        fill_buf_random(env, ra, &env->regs[r1], &env->regs[r1 + 1]);
+        fill_buf_random(env, ra, &env->regs[r2], &env->regs[r2 + 1]);
+        break;
     default:
         /* we don't implement any other subfunction yet */
         g_assert_not_reached();
-- 
2.37.3



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 1/2] target/s390x: support SHA-512 extensions
  2022-09-21 10:07 [PATCH v8 1/2] target/s390x: support SHA-512 extensions Jason A. Donenfeld
  2022-09-21 10:07 ` [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction Jason A. Donenfeld
@ 2022-09-22 13:07 ` David Hildenbrand
  2022-09-22 14:35   ` Jason A. Donenfeld
  2022-09-23 10:14   ` Alex Bennée
  2022-09-22 15:38 ` [PATCH v8.1 " David Hildenbrand
  2 siblings, 2 replies; 29+ messages in thread
From: David Hildenbrand @ 2022-09-22 13:07 UTC (permalink / raw)
  To: Jason A. Donenfeld, qemu-s390x, qemu-devel
  Cc: Thomas Huth, Christian Borntraeger, Richard Henderson,
	Cornelia Huck, Harald Freudenberger, Holger Dengler

On 21.09.22 12:07, Jason A. Donenfeld wrote:
> In order to fully support MSA_EXT_5, we have to support the SHA-512
> special instructions. So implement those.
> 
> The implementation began as something TweetNacl-like, and then was
> adjusted to be useful here. It's not very beautiful, but it is quite
> short and compact, which is what we're going for.
> 
> Cc: Thomas Huth <thuth@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: Cornelia Huck <cohuck@redhat.com>
> Cc: Harald Freudenberger <freude@linux.ibm.com>
> Cc: Holger Dengler <dengler@linux.ibm.com>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---

As discussed offline, I'd restructure the code a bit to make it easier to
grasp/review. But of course, readability is subjective. I think there was one
specification check for kimd missing (length not aligned). It's a bit buried:

"
A specification exception is recognized and no other
action is taken if any of the following occurs:
...
4. The second-operand length is not a multiple of
the data block size of the designated function
(see Figure 7-207 on page 7-187 for COMPUTE
INTERMEDIATE MESSAGE DIGEST functions).
"

The biggest change is probably that we will now end up writing memory only
once (storing the ocv) after all inputs were read for KLMD. There is
still a rare chance of failing halfway through the single write, but that
could be tackled in the future comparatively easily by doing a
pre-translation.

This passes the Linux boot test, but I'm pretty sure I messed up some
corner case.


Interestingly, I discovered tests/tcg/multiarch/sha512.c.

I feel like we could write it up fairly easily at least for kimd, but
I'm a bit lost if klmd would still need special treatment. I'm fine with
keeping it like that for now, as it's not that much code, and we're doing
the bew64<->conversion upfront instead of inside the algorithm.



 From 90de57150134d56d31ee64748d929117f87a71a6 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 21 Sep 2022 12:07:28 +0200
Subject: [PATCH] target/s390x: support SHA-512 extensions

In order to fully support MSA_EXT_5, we have to support the SHA-512
special instructions. So implement those.

The implementation began as something TweetNacl-like, and then was
adjusted to be useful here. It's not very beautiful, but it is quite
short and compact, which is what we're going for.

Cc: Thomas Huth <thuth@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: Holger Dengler <dengler@linux.ibm.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
[ restructure code, add missing specification exception, add comments ]
Signed-off-by: David Hildenbrand <david@redhat.com>
---
  target/s390x/gen-features.c      |   3 +
  target/s390x/tcg/crypto_helper.c | 232 +++++++++++++++++++++++++++++++
  2 files changed, 235 insertions(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 1558c52626..14a7f2ae90 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -751,6 +751,9 @@ static uint16_t qemu_V7_0[] = {
   */
  static uint16_t qemu_MAX[] = {
      S390_FEAT_VECTOR_ENH2,
+    S390_FEAT_MSA_EXT_5,
+    S390_FEAT_KIMD_SHA_512,
+    S390_FEAT_KLMD_SHA_512,
  };
  
  /****** END FEATURE DEFS ******/
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 138d9e7ad9..98f628561e 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -1,10 +1,12 @@
  /*
   *  s390x crypto helpers
   *
+ *  Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
   *  Copyright (c) 2017 Red Hat Inc
   *
   *  Authors:
   *   David Hildenbrand <david@redhat.com>
+ *   Jason A. Donenfeld <Jason@zx2c4.com>
   *
   * This work is licensed under the terms of the GNU GPL, version 2 or later.
   * See the COPYING file in the top-level directory.
@@ -18,6 +20,233 @@
  #include "exec/exec-all.h"
  #include "exec/cpu_ldst.h"
  
+static uint64_t R(uint64_t x, int c)
+{
+    return (x >> c) | (x << (64 - c));
+}
+static uint64_t Ch(uint64_t x, uint64_t y, uint64_t z)
+{
+    return (x & y) ^ (~x & z);
+}
+static uint64_t Maj(uint64_t x, uint64_t y, uint64_t z)
+{
+    return (x & y) ^ (x & z) ^ (y & z);
+}
+static uint64_t Sigma0(uint64_t x)
+{
+    return R(x, 28) ^ R(x, 34) ^ R(x, 39);
+}
+static uint64_t Sigma1(uint64_t x)
+{
+    return R(x, 14) ^ R(x, 18) ^ R(x, 41);
+}
+static uint64_t sigma0(uint64_t x)
+{
+    return R(x, 1) ^ R(x, 8) ^ (x >> 7);
+}
+static uint64_t sigma1(uint64_t x)
+{
+    return R(x, 19) ^ R(x, 61) ^ (x >> 6);
+}
+
+static const uint64_t K[80] = {
+    0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
+    0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+    0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL,
+    0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+    0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL,
+    0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+    0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL,
+    0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+    0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL,
+    0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+    0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL,
+    0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+    0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL,
+    0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+    0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL,
+    0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+    0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL,
+    0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+    0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL,
+    0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+    0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL,
+    0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+    0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL,
+    0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+    0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL,
+    0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+    0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* a is icv/ocv, w is a single message block. w will get reused internally. */
+static void sha512_bda(uint64_t a[8], uint64_t w[16])
+{
+    uint64_t t, z[8], b[8];
+    int i, j;
+
+    memcpy(z, a, sizeof(z));
+    for (i = 0; i < 80; i++) {
+        memcpy(b, a, sizeof(b));
+
+        t = a[7] + Sigma1(a[4]) + Ch(a[4], a[5], a[6]) + K[i] + w[i % 16];
+        b[7] = t + Sigma0(a[0]) + Maj(a[0], a[1], a[2]);
+        b[3] += t;
+        for (j = 0; j < 8; ++j) {
+            a[(j + 1) % 8] = b[j];
+        }
+        if (i % 16 == 15) {
+            for (j = 0; j < 16; ++j) {
+                w[j] += w[(j + 9) % 16] + sigma0(w[(j + 1) % 16]) +
+                        sigma1(w[(j + 14) % 16]);
+            }
+        }
+    }
+
+    for (i = 0; i < 8; i++) {
+        a[i] += z[i];
+    }
+}
+
+/* a is icv/ocv, w is a single message block that needs be64 conversion. */
+static void sha512_be64(uint64_t a[8], uint64_t w[16])
+{
+    uint64_t t[16];
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        t[i] = be64_to_cpu(w[i]);
+    }
+    sha512_bda(a, t);
+}
+
+static void sha512_read_icv(CPUS390XState *env, uint64_t addr,
+                            uint64_t a[8], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 8; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldq_be_data_ra(env, addr, ra);
+    }
+}
+
+static void sha512_write_ocv(CPUS390XState *env, uint64_t addr,
+                             uint64_t a[8], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 8; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        cpu_stq_be_data_ra(env, addr, a[i], ra);
+    }
+}
+
+static void sha512_read_block(CPUS390XState *env, uint64_t addr,
+                              uint64_t a[16], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 16; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldq_be_data_ra(env, addr, ra);
+    }
+}
+
+static void sha512_read_mbl_be64(CPUS390XState *env, uint64_t addr,
+                                 uint8_t a[16], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 16; i++, addr += 1) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldub_data_ra(env, addr, ra);
+    }
+}
+
+static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
+                      uint64_t *message_reg, uint64_t *len_reg, uint32_t type)
+{
+    enum { MAX_BLOCKS_PER_RUN = 64 }; /* Arbitrary: keep interactivity. */
+    uint64_t len = *len_reg, a[8], processed = 0;
+    int i, message_reg_len = 64;
+
+    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
+
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        len = (uint32_t)len;
+        message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
+    }
+
+    /* KIMD: length has to be properly aligned. */
+    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    sha512_read_icv(env, param_addr, a, ra);
+
+    /* Process full blocks first. */
+    for (; len >= 128; len -= 128, processed += 128) {
+        uint64_t w[16];
+
+        if (processed >= MAX_BLOCKS_PER_RUN * 128) {
+            break;
+        }
+
+        sha512_read_block(env, *message_reg + processed, w, ra);
+        sha512_bda(a, w);
+    }
+
+    /* KLMD: Process partial/empty block last. */
+    if (type == S390_FEAT_TYPE_KLMD && len < 128) {
+        uint8_t x[128];
+
+        /* Read the remainder of the message byte-per-byte. */
+        for (i = 0; i < len; i++) {
+            uint64_t addr = wrap_address(env, *message_reg + processed + i);
+
+            x[i] = cpu_ldub_data_ra(env, addr, ra);
+        }
+        /*
+         * Pad the remainder with zero and place magic value 128 after the
+         * message.
+         */
+        memset(x + len, 0, 128 - len);
+        x[len] = 128;
+
+        /*
+         * Place the MBL either into this block (if there is space left),
+         * or use an additional one.
+         */
+        if (len < 112) {
+            sha512_read_mbl_be64(env, param_addr + 64, x + 112, ra);
+        }
+        sha512_be64(a, (uint64_t *)x);
+
+        if (len >= 112) {
+            memset(x, 0, 112);
+            sha512_read_mbl_be64(env, param_addr + 64, x + 112, ra);
+            sha512_be64(a, (uint64_t *)x);
+        }
+
+        processed += len;
+        len = 0;
+    }
+
+    /*
+     * Modify memory after we read all inputs and modify registers only after
+     * writing memory succeeded.
+     *
+     * TODO: if writing fails halfway through (e.g., when crossing page
+     * boundaries), we're in trouble. We'd need something like acces_prepare().
+     */
+    sha512_write_ocv(env, param_addr, a, ra);
+    *message_reg = deposit64(*message_reg, 0, message_reg_len,
+                             *message_reg + processed);
+    *len_reg -= processed;
+    return !len ? 0 : 3;
+}
+
  uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
                       uint32_t type)
  {
@@ -52,6 +281,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
              cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
          }
          break;
+    case 3: /* CPACF_*_SHA_512 */
+        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
+                            &env->regs[r2 + 1], type);
      default:
          /* we don't implement any other subfunction yet */
          g_assert_not_reached();
-- 
2.37.3


-- 
Thanks,

David / dhildenb



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction
  2022-09-21 10:07 ` [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction Jason A. Donenfeld
@ 2022-09-22 13:09   ` David Hildenbrand
  2022-09-22 15:40   ` Thomas Huth
  2022-09-26 15:11   ` Thomas Huth
  2 siblings, 0 replies; 29+ messages in thread
From: David Hildenbrand @ 2022-09-22 13:09 UTC (permalink / raw)
  To: Jason A. Donenfeld, qemu-s390x, qemu-devel
  Cc: Thomas Huth, Christian Borntraeger, Richard Henderson,
	Cornelia Huck, Harald Freudenberger, Holger Dengler

On 21.09.22 12:07, Jason A. Donenfeld wrote:
> In order for hosts running inside of TCG to initialize the kernel's
> random number generator, we should support the PRNO_TRNG instruction,
> backed in the usual way with the qemu_guest_getrandom helper. This is
> confirmed working on Linux 5.19.
> 
> Cc: Thomas Huth <thuth@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: Cornelia Huck <cohuck@redhat.com>
> Cc: Harald Freudenberger <freude@linux.ibm.com>
> Cc: Holger Dengler <dengler@linux.ibm.com>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

Thanks!

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 13:07 ` [PATCH v8 1/2] target/s390x: support SHA-512 extensions David Hildenbrand
@ 2022-09-22 14:35   ` Jason A. Donenfeld
  2022-09-22 14:51     ` David Hildenbrand
  2022-09-23 10:14   ` Alex Bennée
  1 sibling, 1 reply; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-22 14:35 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: qemu-s390x, qemu-devel, Thomas Huth, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On Thu, Sep 22, 2022 at 03:07:13PM +0200, David Hildenbrand wrote:
> +        if (processed >= MAX_BLOCKS_PER_RUN * 128) {
> +            break;

If you make this a `goto out` or similar instead of a break, then you
can

> +    if (type == S390_FEAT_TYPE_KLMD && len < 128) {

change that to `if (len)`.

> +        /*
> +         * Pad the remainder with zero and place magic value 128 after the
> +         * message.
> +         */
> +        memset(x + len, 0, 128 - len);
> +        x[len] = 128;

"magic value 128" ==> "set the top bit"

Aside from these nits, this refactoring looks fine. I haven't tested
this or checked the crypto carefully, but if it passes all the Linux
test vectors, hopefully things are still fine.

Jason


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 14:35   ` Jason A. Donenfeld
@ 2022-09-22 14:51     ` David Hildenbrand
  2022-09-22 15:11       ` David Hildenbrand
  2022-09-22 15:36       ` Thomas Huth
  0 siblings, 2 replies; 29+ messages in thread
From: David Hildenbrand @ 2022-09-22 14:51 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: qemu-s390x, qemu-devel, Thomas Huth, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 22.09.22 16:35, Jason A. Donenfeld wrote:
> On Thu, Sep 22, 2022 at 03:07:13PM +0200, David Hildenbrand wrote:
>> +        if (processed >= MAX_BLOCKS_PER_RUN * 128) {
>> +            break;
> 
> If you make this a `goto out` or similar instead of a break, then you
> can
> 
>> +    if (type == S390_FEAT_TYPE_KLMD && len < 128) {
> 
> change that to `if (len)`.


Thanks, I'll do this on top:

diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 67133ba33a..c1505b27a4 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -190,7 +190,7 @@ static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
          uint64_t w[16];
  
          if (processed >= MAX_BLOCKS_PER_RUN * 128) {
-            break;
+            goto write_ocv;
          }
  
          sha512_read_block(env, *message_reg + processed, w, ra);
@@ -198,7 +198,7 @@ static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
      }
  
      /* KMLD: Process partial/empty block last. */
-    if (type == S390_FEAT_TYPE_KLMD && len < 128) {
+    if (len) {
          uint8_t x[128];
  
          /* Read the remainder of the message byte-per-byte. */
@@ -237,6 +237,7 @@ static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
       * TODO: if writing fails halfway through (e.g., when crossing page
       * boundaries), we're in trouble. We'd need something like access_prepare().
       */
+write_ocv:
      sha512_write_ocv(env, param_addr, a, ra);
      *message_reg = deposit64(*message_reg, 0, message_reg_len,
                               *message_reg + processed);


> 
>> +        /*
>> +         * Pad the remainder with zero and place magic value 128 after the
>> +         * message.
>> +         */
>> +        memset(x + len, 0, 128 - len);
>> +        x[len] = 128;
> 
> "magic value 128" ==> "set the top bit"

Yes, thanks. I missed that detail in the PoP. (not sure if it's documented at all ...)

"Pad the remainder with zero and set the top bit."

> 
> Aside from these nits, this refactoring looks fine. I haven't tested
> this or checked the crypto carefully, but if it passes all the Linux
> test vectors, hopefully things are still fine.


Thanks. I'll resend this patch only as reply to your original one,
so Thomas can easily pick it up (or add more feedback :)).

-- 
Thanks,

David / dhildenb



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 14:51     ` David Hildenbrand
@ 2022-09-22 15:11       ` David Hildenbrand
  2022-09-22 15:36       ` Thomas Huth
  1 sibling, 0 replies; 29+ messages in thread
From: David Hildenbrand @ 2022-09-22 15:11 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: qemu-s390x, qemu-devel, Thomas Huth, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 22.09.22 16:51, David Hildenbrand wrote:
> On 22.09.22 16:35, Jason A. Donenfeld wrote:
>> On Thu, Sep 22, 2022 at 03:07:13PM +0200, David Hildenbrand wrote:
>>> +        if (processed >= MAX_BLOCKS_PER_RUN * 128) {
>>> +            break;
>>
>> If you make this a `goto out` or similar instead of a break, then you
>> can
>>
>>> +    if (type == S390_FEAT_TYPE_KLMD && len < 128) {
>>
>> change that to `if (len)`.
> 
> 
> Thanks, I'll do this on top:
> 
> diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
> index 67133ba33a..c1505b27a4 100644
> --- a/target/s390x/tcg/crypto_helper.c
> +++ b/target/s390x/tcg/crypto_helper.c
> @@ -190,7 +190,7 @@ static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
>            uint64_t w[16];
>    
>            if (processed >= MAX_BLOCKS_PER_RUN * 128) {
> -            break;
> +            goto write_ocv;
>            }
>    
>            sha512_read_block(env, *message_reg + processed, w, ra);
> @@ -198,7 +198,7 @@ static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
>        }
>    
>        /* KMLD: Process partial/empty block last. */
> -    if (type == S390_FEAT_TYPE_KLMD && len < 128) {
> +    if (len) {
>            uint8_t x[128];
>    
>            /* Read the remainder of the message byte-per-byte. */
> @@ -237,6 +237,7 @@ static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
>         * TODO: if writing fails halfway through (e.g., when crossing page
>         * boundaries), we're in trouble. We'd need something like access_prepare().
>         */
> +write_ocv:
>        sha512_write_ocv(env, param_addr, a, ra);
>        *message_reg = deposit64(*message_reg, 0, message_reg_len,
>                                 *message_reg + processed);
>

I just realized (when testing) that this doesn't work as we also have to 
deal with the "len == 0" case for KLMD ...

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 14:51     ` David Hildenbrand
  2022-09-22 15:11       ` David Hildenbrand
@ 2022-09-22 15:36       ` Thomas Huth
  2022-09-22 15:37         ` David Hildenbrand
  1 sibling, 1 reply; 29+ messages in thread
From: Thomas Huth @ 2022-09-22 15:36 UTC (permalink / raw)
  To: David Hildenbrand, Jason A. Donenfeld
  Cc: qemu-s390x, qemu-devel, Christian Borntraeger, Richard Henderson,
	Cornelia Huck, Harald Freudenberger, Holger Dengler

On 22/09/2022 16.51, David Hildenbrand wrote:
[...]
> Thanks. I'll resend this patch only as reply to your original one,
> so Thomas can easily pick it up (or add more feedback :)).

We're also missing the machine compat handling ... could you
add something like this on top:

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -803,8 +803,11 @@ DEFINE_CCW_MACHINE(7_2, "7.2", true);
  
  static void ccw_machine_7_1_instance_options(MachineState *machine)
  {
+    static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V7_1 };
+
      ccw_machine_7_2_instance_options(machine);
      s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
+    s390_set_qemu_cpu_model(0x8561, 15, 1, qemu_cpu_feat);
  }
  
  static void ccw_machine_7_1_class_options(MachineClass *mc)
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -744,13 +744,16 @@ static uint16_t qemu_V7_0[] = {
      S390_FEAT_MISC_INSTRUCTION_EXT3,
  };
  
+static uint16_t qemu_V7_1[] = {
+    S390_FEAT_VECTOR_ENH2,
+};
+
  /*
   * Features for the "qemu" CPU model of the latest QEMU machine and the "max"
   * CPU model under TCG. Don't include features that are not part of the full
   * feature set of the current "max" CPU model generation.
   */
  static uint16_t qemu_MAX[] = {
-    S390_FEAT_VECTOR_ENH2,
      S390_FEAT_MSA_EXT_5,
      S390_FEAT_KIMD_SHA_512,
      S390_FEAT_KLMD_SHA_512,
@@ -876,6 +879,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
      QEMU_FEAT_INITIALIZER(V6_0),
      QEMU_FEAT_INITIALIZER(V6_2),
      QEMU_FEAT_INITIALIZER(V7_0),
+    QEMU_FEAT_INITIALIZER(V7_1),
      QEMU_FEAT_INITIALIZER(MAX),
  };
  
(otherwise I can also add it when picking up the patch)

  Thomas




^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 15:36       ` Thomas Huth
@ 2022-09-22 15:37         ` David Hildenbrand
  0 siblings, 0 replies; 29+ messages in thread
From: David Hildenbrand @ 2022-09-22 15:37 UTC (permalink / raw)
  To: Thomas Huth, Jason A. Donenfeld
  Cc: qemu-s390x, qemu-devel, Christian Borntraeger, Richard Henderson,
	Cornelia Huck, Harald Freudenberger, Holger Dengler

On 22.09.22 17:36, Thomas Huth wrote:
> On 22/09/2022 16.51, David Hildenbrand wrote:
> [...]
>> Thanks. I'll resend this patch only as reply to your original one,
>> so Thomas can easily pick it up (or add more feedback :)).
> 
> We're also missing the machine compat handling ... could you
> add something like this on top:
> 
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -803,8 +803,11 @@ DEFINE_CCW_MACHINE(7_2, "7.2", true);
>    
>    static void ccw_machine_7_1_instance_options(MachineState *machine)
>    {
> +    static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V7_1 };
> +
>        ccw_machine_7_2_instance_options(machine);
>        s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
> +    s390_set_qemu_cpu_model(0x8561, 15, 1, qemu_cpu_feat);
>    }
>    
>    static void ccw_machine_7_1_class_options(MachineClass *mc)
> diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
> --- a/target/s390x/gen-features.c
> +++ b/target/s390x/gen-features.c
> @@ -744,13 +744,16 @@ static uint16_t qemu_V7_0[] = {
>        S390_FEAT_MISC_INSTRUCTION_EXT3,
>    };
>    
> +static uint16_t qemu_V7_1[] = {
> +    S390_FEAT_VECTOR_ENH2,
> +};
> +
>    /*
>     * Features for the "qemu" CPU model of the latest QEMU machine and the "max"
>     * CPU model under TCG. Don't include features that are not part of the full
>     * feature set of the current "max" CPU model generation.
>     */
>    static uint16_t qemu_MAX[] = {
> -    S390_FEAT_VECTOR_ENH2,
>        S390_FEAT_MSA_EXT_5,
>        S390_FEAT_KIMD_SHA_512,
>        S390_FEAT_KLMD_SHA_512,
> @@ -876,6 +879,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
>        QEMU_FEAT_INITIALIZER(V6_0),
>        QEMU_FEAT_INITIALIZER(V6_2),
>        QEMU_FEAT_INITIALIZER(V7_0),
> +    QEMU_FEAT_INITIALIZER(V7_1),
>        QEMU_FEAT_INITIALIZER(MAX),
>    };
>    
> (otherwise I can also add it when picking up the patch)

He, I just added that already while testing :)

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-21 10:07 [PATCH v8 1/2] target/s390x: support SHA-512 extensions Jason A. Donenfeld
  2022-09-21 10:07 ` [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction Jason A. Donenfeld
  2022-09-22 13:07 ` [PATCH v8 1/2] target/s390x: support SHA-512 extensions David Hildenbrand
@ 2022-09-22 15:38 ` David Hildenbrand
  2022-09-22 15:55   ` Thomas Huth
  2 siblings, 1 reply; 29+ messages in thread
From: David Hildenbrand @ 2022-09-22 15:38 UTC (permalink / raw)
  To: qemu-devel
  Cc: qemu-s390x, Jason A. Donenfeld, Thomas Huth, David Hildenbrand,
	Christian Borntraeger, Richard Henderson, Cornelia Huck,
	Harald Freudenberger, Holger Dengler

From: "Jason A. Donenfeld" <Jason@zx2c4.com>

In order to fully support MSA_EXT_5, we have to support the SHA-512
special instructions. So implement those.

The implementation began as something TweetNacl-like, and then was
adjusted to be useful here. It's not very beautiful, but it is quite
short and compact, which is what we're going for.

Cc: Thomas Huth <thuth@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: Holger Dengler <dengler@linux.ibm.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
[ restructure, add missing exception, add comments, fixup CPU model ]
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 hw/s390x/s390-virtio-ccw.c       |   3 +
 target/s390x/gen-features.c      |   9 +-
 target/s390x/tcg/crypto_helper.c | 229 +++++++++++++++++++++++++++++++
 3 files changed, 240 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 9a2467c889..e18b816aba 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -803,8 +803,11 @@ DEFINE_CCW_MACHINE(7_2, "7.2", true);
 
 static void ccw_machine_7_1_instance_options(MachineState *machine)
 {
+    static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V7_1 };
+
     ccw_machine_7_2_instance_options(machine);
     s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
+    s390_set_qemu_cpu_model(0x8561, 15, 1, qemu_cpu_feat);
 }
 
 static void ccw_machine_7_1_class_options(MachineClass *mc)
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 1558c52626..baadbf4517 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -744,13 +744,19 @@ static uint16_t qemu_V7_0[] = {
     S390_FEAT_MISC_INSTRUCTION_EXT3,
 };
 
+static uint16_t qemu_V7_1[] = {
+    S390_FEAT_VECTOR_ENH2,
+};
+
 /*
  * Features for the "qemu" CPU model of the latest QEMU machine and the "max"
  * CPU model under TCG. Don't include features that are not part of the full
  * feature set of the current "max" CPU model generation.
  */
 static uint16_t qemu_MAX[] = {
-    S390_FEAT_VECTOR_ENH2,
+    S390_FEAT_MSA_EXT_5,
+    S390_FEAT_KIMD_SHA_512,
+    S390_FEAT_KLMD_SHA_512,
 };
 
 /****** END FEATURE DEFS ******/
@@ -873,6 +879,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
     QEMU_FEAT_INITIALIZER(V6_0),
     QEMU_FEAT_INITIALIZER(V6_2),
     QEMU_FEAT_INITIALIZER(V7_0),
+    QEMU_FEAT_INITIALIZER(V7_1),
     QEMU_FEAT_INITIALIZER(MAX),
 };
 
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 138d9e7ad9..106c89fd2d 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -1,10 +1,12 @@
 /*
  *  s390x crypto helpers
  *
+ *  Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  *  Copyright (c) 2017 Red Hat Inc
  *
  *  Authors:
  *   David Hildenbrand <david@redhat.com>
+ *   Jason A. Donenfeld <Jason@zx2c4.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +20,230 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
+static uint64_t R(uint64_t x, int c)
+{
+    return (x >> c) | (x << (64 - c));
+}
+static uint64_t Ch(uint64_t x, uint64_t y, uint64_t z)
+{
+    return (x & y) ^ (~x & z);
+}
+static uint64_t Maj(uint64_t x, uint64_t y, uint64_t z)
+{
+    return (x & y) ^ (x & z) ^ (y & z);
+}
+static uint64_t Sigma0(uint64_t x)
+{
+    return R(x, 28) ^ R(x, 34) ^ R(x, 39);
+}
+static uint64_t Sigma1(uint64_t x)
+{
+    return R(x, 14) ^ R(x, 18) ^ R(x, 41);
+}
+static uint64_t sigma0(uint64_t x)
+{
+    return R(x, 1) ^ R(x, 8) ^ (x >> 7);
+}
+static uint64_t sigma1(uint64_t x)
+{
+    return R(x, 19) ^ R(x, 61) ^ (x >> 6);
+}
+
+static const uint64_t K[80] = {
+    0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
+    0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+    0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL,
+    0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+    0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL,
+    0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+    0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL,
+    0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+    0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL,
+    0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+    0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL,
+    0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+    0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL,
+    0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+    0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL,
+    0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+    0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL,
+    0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+    0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL,
+    0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+    0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL,
+    0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+    0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL,
+    0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+    0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL,
+    0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+    0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* a is icv/ocv, w is a single message block. w will get reused internally. */
+static void sha512_bda(uint64_t a[8], uint64_t w[16])
+{
+    uint64_t t, z[8], b[8];
+    int i, j;
+
+    memcpy(z, a, sizeof(z));
+    for (i = 0; i < 80; i++) {
+        memcpy(b, a, sizeof(b));
+
+        t = a[7] + Sigma1(a[4]) + Ch(a[4], a[5], a[6]) + K[i] + w[i % 16];
+        b[7] = t + Sigma0(a[0]) + Maj(a[0], a[1], a[2]);
+        b[3] += t;
+        for (j = 0; j < 8; ++j) {
+            a[(j + 1) % 8] = b[j];
+        }
+        if (i % 16 == 15) {
+            for (j = 0; j < 16; ++j) {
+                w[j] += w[(j + 9) % 16] + sigma0(w[(j + 1) % 16]) +
+                        sigma1(w[(j + 14) % 16]);
+            }
+        }
+    }
+
+    for (i = 0; i < 8; i++) {
+        a[i] += z[i];
+    }
+}
+
+/* a is icv/ocv, w is a single message block that needs be64 conversion. */
+static void sha512_bda_be64(uint64_t a[8], uint64_t w[16])
+{
+    uint64_t t[16];
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        t[i] = be64_to_cpu(w[i]);
+    }
+    sha512_bda(a, t);
+}
+
+static void sha512_read_icv(CPUS390XState *env, uint64_t addr,
+                            uint64_t a[8], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 8; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldq_be_data_ra(env, addr, ra);
+    }
+}
+
+static void sha512_write_ocv(CPUS390XState *env, uint64_t addr,
+                             uint64_t a[8], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 8; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        cpu_stq_be_data_ra(env, addr, a[i], ra);
+    }
+}
+
+static void sha512_read_block(CPUS390XState *env, uint64_t addr,
+                              uint64_t a[16], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 16; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldq_be_data_ra(env, addr, ra);
+    }
+}
+
+static void sha512_read_mbl_be64(CPUS390XState *env, uint64_t addr,
+                                 uint8_t a[16], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 16; i++, addr += 1) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldub_data_ra(env, addr, ra);
+    }
+}
+
+static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
+                      uint64_t *message_reg, uint64_t *len_reg, uint32_t type)
+{
+    enum { MAX_BLOCKS_PER_RUN = 64 }; /* Arbitrary: keep interactivity. */
+    uint64_t len = *len_reg, a[8], processed = 0;
+    int i, message_reg_len = 64;
+
+    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
+
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        len = (uint32_t)len;
+        message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
+    }
+
+    /* KIMD: length has to be properly aligned. */
+    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    sha512_read_icv(env, param_addr, a, ra);
+
+    /* Process full blocks first. */
+    for (; len >= 128; len -= 128, processed += 128) {
+        uint64_t w[16];
+
+        if (processed >= MAX_BLOCKS_PER_RUN * 128) {
+            break;
+        }
+
+        sha512_read_block(env, *message_reg + processed, w, ra);
+        sha512_bda(a, w);
+    }
+
+    /* KLMD: Process partial/empty block last. */
+    if (type == S390_FEAT_TYPE_KLMD && len < 128) {
+        uint8_t x[128];
+
+        /* Read the remainder of the message byte-per-byte. */
+        for (i = 0; i < len; i++) {
+            uint64_t addr = wrap_address(env, *message_reg + processed + i);
+
+            x[i] = cpu_ldub_data_ra(env, addr, ra);
+        }
+        /* Pad the remainder with zero and set the top bit. */
+        memset(x + len, 0, 128 - len);
+        x[len] = 128;
+
+        /*
+         * Place the MBL either into this block (if there is space left),
+         * or use an additional one.
+         */
+        if (len < 112) {
+            sha512_read_mbl_be64(env, param_addr + 64, x + 112, ra);
+        }
+        sha512_bda_be64(a, (uint64_t *)x);
+
+        if (len >= 112) {
+            memset(x, 0, 112);
+            sha512_read_mbl_be64(env, param_addr + 64, x + 112, ra);
+            sha512_bda_be64(a, (uint64_t *)x);
+        }
+
+        processed += len;
+        len = 0;
+    }
+
+    /*
+     * Modify memory after we read all inputs and modify registers only after
+     * writing memory succeeded.
+     *
+     * TODO: if writing fails halfway through (e.g., when crossing page
+     * boundaries), we're in trouble. We'd need something like access_prepare().
+     */
+    sha512_write_ocv(env, param_addr, a, ra);
+    *message_reg = deposit64(*message_reg, 0, message_reg_len,
+                             *message_reg + processed);
+    *len_reg -= processed;
+    return !len ? 0 : 3;
+}
+
 uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
                      uint32_t type)
 {
@@ -52,6 +278,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
             cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
         }
         break;
+    case 3: /* CPACF_*_SHA_512 */
+        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
+                            &env->regs[r2 + 1], type);
     default:
         /* we don't implement any other subfunction yet */
         g_assert_not_reached();
-- 
2.37.3



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction
  2022-09-21 10:07 ` [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction Jason A. Donenfeld
  2022-09-22 13:09   ` David Hildenbrand
@ 2022-09-22 15:40   ` Thomas Huth
  2022-09-26 15:11   ` Thomas Huth
  2 siblings, 0 replies; 29+ messages in thread
From: Thomas Huth @ 2022-09-22 15:40 UTC (permalink / raw)
  To: Jason A. Donenfeld, qemu-s390x, qemu-devel
  Cc: David Hildenbrand, Christian Borntraeger, Richard Henderson,
	Cornelia Huck, Harald Freudenberger, Holger Dengler

On 21/09/2022 12.07, Jason A. Donenfeld wrote:
> In order for hosts running inside of TCG to initialize the kernel's
> random number generator, we should support the PRNO_TRNG instruction,
> backed in the usual way with the qemu_guest_getrandom helper. This is
> confirmed working on Linux 5.19.
> 
> Cc: Thomas Huth <thuth@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: Cornelia Huck <cohuck@redhat.com>
> Cc: Harald Freudenberger <freude@linux.ibm.com>
> Cc: Holger Dengler <dengler@linux.ibm.com>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>   target/s390x/gen-features.c      |  1 +
>   target/s390x/tcg/crypto_helper.c | 30 ++++++++++++++++++++++++++++++
>   2 files changed, 31 insertions(+)
> 
> diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
> index 14a7f2ae90..aaade67574 100644
> --- a/target/s390x/gen-features.c
> +++ b/target/s390x/gen-features.c
> @@ -754,6 +754,7 @@ static uint16_t qemu_MAX[] = {
>       S390_FEAT_MSA_EXT_5,
>       S390_FEAT_KIMD_SHA_512,
>       S390_FEAT_KLMD_SHA_512,
> +    S390_FEAT_PRNO_TRNG,
>   };
>   
>   /****** END FEATURE DEFS ******/
> diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
> index 02073ec70b..0daa9a2dd9 100644
> --- a/target/s390x/tcg/crypto_helper.c
> +++ b/target/s390x/tcg/crypto_helper.c
> @@ -14,6 +14,7 @@
>   
>   #include "qemu/osdep.h"
>   #include "qemu/main-loop.h"
> +#include "qemu/guest-random.h"
>   #include "s390x-internal.h"
>   #include "tcg_s390x.h"
>   #include "exec/helper-proto.h"
> @@ -173,6 +174,31 @@ static int klmd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t parameter_bloc
>       return 0;
>   }
>   
> +static void fill_buf_random(CPUS390XState *env, uintptr_t ra,
> +                            uint64_t *buf_reg, uint64_t *len_reg)
> +{
> +    uint8_t tmp[256];
> +    uint64_t len = *len_reg;
> +    int buf_reg_len = 64;
> +
> +    if (!(env->psw.mask & PSW_MASK_64)) {
> +        len = (uint32_t)len;
> +        buf_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
> +    }
> +
> +    while (len) {
> +        size_t block = MIN(len, sizeof(tmp));
> +
> +        qemu_guest_getrandom_nofail(tmp, block);
> +        for (size_t i = 0; i < block; ++i) {
> +            cpu_stb_data_ra(env, wrap_address(env, *buf_reg), tmp[i], ra);
> +            *buf_reg = deposit64(*buf_reg, 0, buf_reg_len, *buf_reg + 1);
> +            --*len_reg;
> +        }
> +        len -= block;
> +    }
> +}
> +
>   uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
>                        uint32_t type)
>   {
> @@ -215,6 +241,10 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
>               return klmd_sha512(env, ra, env->regs[1], &env->regs[r2], &env->regs[r2 + 1]);
>           }
>           break;
> +    case 114: /* CPACF_PRNO_TRNG */
> +        fill_buf_random(env, ra, &env->regs[r1], &env->regs[r1 + 1]);
> +        fill_buf_random(env, ra, &env->regs[r2], &env->regs[r2 + 1]);
> +        break;

Thanks, patch looks fine to me!

(if we ever have another instruction that uses fc 114, we might want to 
check "type" here, too, but that can also be added later, of course)

  Thomas



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 15:38 ` [PATCH v8.1 " David Hildenbrand
@ 2022-09-22 15:55   ` Thomas Huth
  2022-09-22 16:35     ` Jason A. Donenfeld
  2022-09-22 17:18     ` David Hildenbrand
  0 siblings, 2 replies; 29+ messages in thread
From: Thomas Huth @ 2022-09-22 15:55 UTC (permalink / raw)
  To: David Hildenbrand, qemu-devel
  Cc: qemu-s390x, Jason A. Donenfeld, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 22/09/2022 17.38, David Hildenbrand wrote:
> From: "Jason A. Donenfeld" <Jason@zx2c4.com>
> 
> In order to fully support MSA_EXT_5, we have to support the SHA-512
> special instructions. So implement those.
> 
> The implementation began as something TweetNacl-like, and then was
> adjusted to be useful here. It's not very beautiful, but it is quite
> short and compact, which is what we're going for.
...
> @@ -52,6 +278,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
>               cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
>           }
>           break;
> +    case 3: /* CPACF_*_SHA_512 */
> +        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
> +                            &env->regs[r2 + 1], type);

I have to say that I liked Jason's v8 better here. Code 3 is also used for 
other instructions with completely different meaning, e.g. PCKMO uses 3 for 
TDEA-192 ... so having the "type" check here made more sense.
(meta comment: maybe we should split up the msa function and stop using just 
one function for all crypto/rng related instructions? ... but that's of 
course something for a different patch series)

  Thomas



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 15:55   ` Thomas Huth
@ 2022-09-22 16:35     ` Jason A. Donenfeld
  2022-09-23  6:25       ` Thomas Huth
  2022-09-22 17:18     ` David Hildenbrand
  1 sibling, 1 reply; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-22 16:35 UTC (permalink / raw)
  To: Thomas Huth
  Cc: David Hildenbrand, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On Thu, Sep 22, 2022 at 5:55 PM Thomas Huth <thuth@redhat.com> wrote:
>
> On 22/09/2022 17.38, David Hildenbrand wrote:
> > From: "Jason A. Donenfeld" <Jason@zx2c4.com>
> >
> > In order to fully support MSA_EXT_5, we have to support the SHA-512
> > special instructions. So implement those.
> >
> > The implementation began as something TweetNacl-like, and then was
> > adjusted to be useful here. It's not very beautiful, but it is quite
> > short and compact, which is what we're going for.
> ...
> > @@ -52,6 +278,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
> >               cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
> >           }
> >           break;
> > +    case 3: /* CPACF_*_SHA_512 */
> > +        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
> > +                            &env->regs[r2 + 1], type);
>
> I have to say that I liked Jason's v8 better here. Code 3 is also used for
> other instructions with completely different meaning, e.g. PCKMO uses 3 for
> TDEA-192 ... so having the "type" check here made more sense.
> (meta comment: maybe we should split up the msa function and stop using just
> one function for all crypto/rng related instructions? ... but that's of
> course something for a different patch series)

Maybe just commit my v8, and then David's changes can layer on top as
follow ups? Checking len alignment, for example, is a separate patch
from the rest.

Jason


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 15:55   ` Thomas Huth
  2022-09-22 16:35     ` Jason A. Donenfeld
@ 2022-09-22 17:18     ` David Hildenbrand
  2022-09-23  6:23       ` Thomas Huth
  1 sibling, 1 reply; 29+ messages in thread
From: David Hildenbrand @ 2022-09-22 17:18 UTC (permalink / raw)
  To: Thomas Huth, qemu-devel
  Cc: qemu-s390x, Jason A. Donenfeld, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 22.09.22 17:55, Thomas Huth wrote:
> On 22/09/2022 17.38, David Hildenbrand wrote:
>> From: "Jason A. Donenfeld" <Jason@zx2c4.com>
>>
>> In order to fully support MSA_EXT_5, we have to support the SHA-512
>> special instructions. So implement those.
>>
>> The implementation began as something TweetNacl-like, and then was
>> adjusted to be useful here. It's not very beautiful, but it is quite
>> short and compact, which is what we're going for.
> ...
>> @@ -52,6 +278,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
>>                cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
>>            }
>>            break;
>> +    case 3: /* CPACF_*_SHA_512 */
>> +        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
>> +                            &env->regs[r2 + 1], type);
> 
> I have to say that I liked Jason's v8 better here. Code 3 is also used for
> other instructions with completely different meaning, e.g. PCKMO uses 3 for
> TDEA-192 ... so having the "type" check here made more sense.
> (meta comment: maybe we should split up the msa function and stop using just
> one function for all crypto/rng related instructions? ... but that's of
> course something for a different patch series)

It kind-f made sense in v8 where we actually had different functions. We 
no longer have that here.

Anyhow, this is just the same as in patch 2/2. So no need to be fancy 
here ;)

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 17:18     ` David Hildenbrand
@ 2022-09-23  6:23       ` Thomas Huth
  2022-09-23  6:37         ` David Hildenbrand
  0 siblings, 1 reply; 29+ messages in thread
From: Thomas Huth @ 2022-09-23  6:23 UTC (permalink / raw)
  To: David Hildenbrand, qemu-devel
  Cc: qemu-s390x, Jason A. Donenfeld, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 22/09/2022 19.18, David Hildenbrand wrote:
> On 22.09.22 17:55, Thomas Huth wrote:
>> On 22/09/2022 17.38, David Hildenbrand wrote:
>>> From: "Jason A. Donenfeld" <Jason@zx2c4.com>
>>>
>>> In order to fully support MSA_EXT_5, we have to support the SHA-512
>>> special instructions. So implement those.
>>>
>>> The implementation began as something TweetNacl-like, and then was
>>> adjusted to be useful here. It's not very beautiful, but it is quite
>>> short and compact, which is what we're going for.
>> ...
>>> @@ -52,6 +278,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, 
>>> uint32_t r2, uint32_t r3,
>>>                cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
>>>            }
>>>            break;
>>> +    case 3: /* CPACF_*_SHA_512 */
>>> +        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
>>> +                            &env->regs[r2 + 1], type);
>>
>> I have to say that I liked Jason's v8 better here. Code 3 is also used for
>> other instructions with completely different meaning, e.g. PCKMO uses 3 for
>> TDEA-192 ... so having the "type" check here made more sense.
>> (meta comment: maybe we should split up the msa function and stop using just
>> one function for all crypto/rng related instructions? ... but that's of
>> course something for a different patch series)
> 
> It kind-f made sense in v8 where we actually had different functions. We no 
> longer have that here.

But the point is that the "msa" helper is also called for other instructions 
like PCKMO which can also be called with code 3. And there it means 
something completely different. ... unless I completely misunderstood the 
code, of course.

I think I'll go with Jason's v8 (+ compat machine handling on top) for now, 
so that we better record his state of the patch, and then we can do cleanup 
patches on top later.

  Thomas



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 16:35     ` Jason A. Donenfeld
@ 2022-09-23  6:25       ` Thomas Huth
  0 siblings, 0 replies; 29+ messages in thread
From: Thomas Huth @ 2022-09-23  6:25 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: David Hildenbrand, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 22/09/2022 18.35, Jason A. Donenfeld wrote:
> On Thu, Sep 22, 2022 at 5:55 PM Thomas Huth <thuth@redhat.com> wrote:
>>
>> On 22/09/2022 17.38, David Hildenbrand wrote:
>>> From: "Jason A. Donenfeld" <Jason@zx2c4.com>
>>>
>>> In order to fully support MSA_EXT_5, we have to support the SHA-512
>>> special instructions. So implement those.
>>>
>>> The implementation began as something TweetNacl-like, and then was
>>> adjusted to be useful here. It's not very beautiful, but it is quite
>>> short and compact, which is what we're going for.
>> ...
>>> @@ -52,6 +278,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
>>>                cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
>>>            }
>>>            break;
>>> +    case 3: /* CPACF_*_SHA_512 */
>>> +        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
>>> +                            &env->regs[r2 + 1], type);
>>
>> I have to say that I liked Jason's v8 better here. Code 3 is also used for
>> other instructions with completely different meaning, e.g. PCKMO uses 3 for
>> TDEA-192 ... so having the "type" check here made more sense.
>> (meta comment: maybe we should split up the msa function and stop using just
>> one function for all crypto/rng related instructions? ... but that's of
>> course something for a different patch series)
> 
> Maybe just commit my v8, and then David's changes can layer on top as
> follow ups? Checking len alignment, for example, is a separate patch
> from the rest.

Yes, let's do that now - that will also later help to distinguish who did 
what part of the code.

  Thomas



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23  6:23       ` Thomas Huth
@ 2022-09-23  6:37         ` David Hildenbrand
  2022-09-23  9:19           ` Jason A. Donenfeld
  0 siblings, 1 reply; 29+ messages in thread
From: David Hildenbrand @ 2022-09-23  6:37 UTC (permalink / raw)
  To: Thomas Huth, qemu-devel
  Cc: qemu-s390x, Jason A. Donenfeld, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 23.09.22 08:23, Thomas Huth wrote:
> On 22/09/2022 19.18, David Hildenbrand wrote:
>> On 22.09.22 17:55, Thomas Huth wrote:
>>> On 22/09/2022 17.38, David Hildenbrand wrote:
>>>> From: "Jason A. Donenfeld" <Jason@zx2c4.com>
>>>>
>>>> In order to fully support MSA_EXT_5, we have to support the SHA-512
>>>> special instructions. So implement those.
>>>>
>>>> The implementation began as something TweetNacl-like, and then was
>>>> adjusted to be useful here. It's not very beautiful, but it is quite
>>>> short and compact, which is what we're going for.
>>> ...
>>>> @@ -52,6 +278,9 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1,
>>>> uint32_t r2, uint32_t r3,
>>>>                 cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
>>>>             }
>>>>             break;
>>>> +    case 3: /* CPACF_*_SHA_512 */
>>>> +        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
>>>> +                            &env->regs[r2 + 1], type);
>>>
>>> I have to say that I liked Jason's v8 better here. Code 3 is also used for
>>> other instructions with completely different meaning, e.g. PCKMO uses 3 for
>>> TDEA-192 ... so having the "type" check here made more sense.
>>> (meta comment: maybe we should split up the msa function and stop using just
>>> one function for all crypto/rng related instructions? ... but that's of
>>> course something for a different patch series)
>>
>> It kind-f made sense in v8 where we actually had different functions. We no
>> longer have that here.
> 
> But the point is that the "msa" helper is also called for other instructions
> like PCKMO which can also be called with code 3. And there it means
> something completely different. ... unless I completely misunderstood the
> code, of course.

test_be_bit() fences everything off we don't support. Simply falling 
through here and returning 0 at the end doesn't make any sense either.

> 
> I think I'll go with Jason's v8 (+ compat machine handling on top) for now,
> so that we better record his state of the patch, and then we can do cleanup
> patches on top later.

Feel free to commit what you want (I'll be happy to see Jason's work 
upstream for good), but note that

1) I don't feel confident to give the original patch my ack/rb
2) I am not a supporter of committing code that has known issues
3) I won't follow up with additional cleanup patches because I already 
spent more time on this than I originally planned.

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23  6:37         ` David Hildenbrand
@ 2022-09-23  9:19           ` Jason A. Donenfeld
  2022-09-23 10:47             ` David Hildenbrand
  0 siblings, 1 reply; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-23  9:19 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Thomas Huth, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

Hi David & Thomas,

On Fri, Sep 23, 2022 at 08:37:50AM +0200, David Hildenbrand wrote:
> > But the point is that the "msa" helper is also called for other instructions
> > like PCKMO which can also be called with code 3. And there it means
> > something completely different. ... unless I completely misunderstood the
> > code, of course.
> 
> test_be_bit() fences everything off we don't support. Simply falling 
> through here and returning 0 at the end doesn't make any sense either.

Indeed you're right here, and also, there's this line in your code:

    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);

which means things *are* bounded to just those types as Thomas wants
too.

> > I think I'll go with Jason's v8 (+ compat machine handling on top) for now,
> > so that we better record his state of the patch, and then we can do cleanup
> > patches on top later.

I think doing things in layered steps makes sense, so we actually have a
record of how this changes, rather than trying to sneak in huge changes
to a "v8.1" patch, which lore doesn't even understand. At the same time,
I think David's refactoring is for the most part a quite nice
improvement that we shouldn't forget about.

> 3) I won't follow up with additional cleanup patches because I already 
> spent more time on this than I originally planned.

What is this B.S.? I spent months on this code and had to poke you a
bunch to review it. You spend one afternoon with it and you're already
burnt out, apparently. Sorry to hear that. But also, something is amiss
when the volunteer completely outside his realm of expertise has a
greater commitment than the professional maintainer. Regardless, seeing
that kind of thing here doesn't make me enthusiastic about contributing
to s390 stuff in the future, in the sense that hearing "I won't work
more on this" from a maintainer is a contagious sentiment; leaders are
emulated.

The 2/2 patch doesn't even apply on top of your "v8.1 1/2", so your
submission isn't even easily apply-able.

So, here, to kick things off in the right direction, and hopefully
motivate you to send something afresh, below is a diff between v8 in its
entirety and your "v8.1 1/2" patch, so that you can send this in. It
sounds like Thomas may have already picked some of the machine version
handling stuff from that, so it might need a little readjustment, but
that's the general idea. It should be somewhat trivial to submit this
and justify the test_be_bit() stuff or that g_assert() or whatever else
separately.

Jason

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 9a2467c889..e18b816aba 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -803,8 +803,11 @@ DEFINE_CCW_MACHINE(7_2, "7.2", true);
 
 static void ccw_machine_7_1_instance_options(MachineState *machine)
 {
+    static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V7_1 };
+
     ccw_machine_7_2_instance_options(machine);
     s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
+    s390_set_qemu_cpu_model(0x8561, 15, 1, qemu_cpu_feat);
 }
 
 static void ccw_machine_7_1_class_options(MachineClass *mc)
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index aaade67574..1e3b7c0dc9 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -744,13 +744,16 @@ static uint16_t qemu_V7_0[] = {
     S390_FEAT_MISC_INSTRUCTION_EXT3,
 };
 
+static uint16_t qemu_V7_1[] = {
+    S390_FEAT_VECTOR_ENH2,
+};
+
 /*
  * Features for the "qemu" CPU model of the latest QEMU machine and the "max"
  * CPU model under TCG. Don't include features that are not part of the full
  * feature set of the current "max" CPU model generation.
  */
 static uint16_t qemu_MAX[] = {
-    S390_FEAT_VECTOR_ENH2,
     S390_FEAT_MSA_EXT_5,
     S390_FEAT_KIMD_SHA_512,
     S390_FEAT_KLMD_SHA_512,
@@ -877,6 +880,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
     QEMU_FEAT_INITIALIZER(V6_0),
     QEMU_FEAT_INITIALIZER(V6_2),
     QEMU_FEAT_INITIALIZER(V7_0),
+    QEMU_FEAT_INITIALIZER(V7_1),
     QEMU_FEAT_INITIALIZER(MAX),
 };
 
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 0daa9a2dd9..762b277884 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -21,13 +21,34 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
-static uint64_t R(uint64_t x, int c) { return (x >> c) | (x << (64 - c)); }
-static uint64_t Ch(uint64_t x, uint64_t y, uint64_t z) { return (x & y) ^ (~x & z); }
-static uint64_t Maj(uint64_t x, uint64_t y, uint64_t z) { return (x & y) ^ (x & z) ^ (y & z); }
-static uint64_t Sigma0(uint64_t x) { return R(x, 28) ^ R(x, 34) ^ R(x, 39); }
-static uint64_t Sigma1(uint64_t x) { return R(x, 14) ^ R(x, 18) ^ R(x, 41); }
-static uint64_t sigma0(uint64_t x) { return R(x, 1) ^ R(x, 8) ^ (x >> 7); }
-static uint64_t sigma1(uint64_t x) { return R(x, 19) ^ R(x, 61) ^ (x >> 6); }
+static uint64_t R(uint64_t x, int c)
+{
+    return (x >> c) | (x << (64 - c));
+}
+static uint64_t Ch(uint64_t x, uint64_t y, uint64_t z)
+{
+    return (x & y) ^ (~x & z);
+}
+static uint64_t Maj(uint64_t x, uint64_t y, uint64_t z)
+{
+    return (x & y) ^ (x & z) ^ (y & z);
+}
+static uint64_t Sigma0(uint64_t x)
+{
+    return R(x, 28) ^ R(x, 34) ^ R(x, 39);
+}
+static uint64_t Sigma1(uint64_t x)
+{
+    return R(x, 14) ^ R(x, 18) ^ R(x, 41);
+}
+static uint64_t sigma0(uint64_t x)
+{
+    return R(x, 1) ^ R(x, 8) ^ (x >> 7);
+}
+static uint64_t sigma1(uint64_t x)
+{
+    return R(x, 19) ^ R(x, 61) ^ (x >> 6);
+}
 
 static const uint64_t K[80] = {
     0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
@@ -59,119 +80,169 @@ static const uint64_t K[80] = {
     0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
 };
 
-static int kimd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t parameter_block,
-                       uint64_t *message_reg, uint64_t *len_reg, uint8_t *stack_buffer)
+/* a is icv/ocv, w is a single message block. w will get reused internally. */
+static void sha512_bda(uint64_t a[8], uint64_t w[16])
 {
-    enum { MAX_BLOCKS_PER_RUN = 64 }; /* This is arbitrary, just to keep interactivity. */
-    uint64_t z[8], b[8], a[8], w[16], t;
-    uint64_t message = message_reg ? *message_reg : 0;
-    uint64_t len = *len_reg, processed = 0, addr;
-    int i, j, message_reg_len = 64, blocks = 0, cc = 0;
+    uint64_t t, z[8], b[8];
+    int i, j;
 
-    if (!(env->psw.mask & PSW_MASK_64)) {
-        len = (uint32_t)len;
-        message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
-    }
+    memcpy(z, a, sizeof(z));
+    for (i = 0; i < 80; i++) {
+        memcpy(b, a, sizeof(b));
 
-    for (i = 0; i < 8; ++i) {
-        addr = wrap_address(env, parameter_block + 8 * i);
-        z[i] = a[i] = cpu_ldq_be_data_ra(env, addr, ra);
-    }
-
-    while (len >= 128) {
-        if (++blocks > MAX_BLOCKS_PER_RUN) {
-            cc = 3;
-            break;
+        t = a[7] + Sigma1(a[4]) + Ch(a[4], a[5], a[6]) + K[i] + w[i % 16];
+        b[7] = t + Sigma0(a[0]) + Maj(a[0], a[1], a[2]);
+        b[3] += t;
+        for (j = 0; j < 8; ++j) {
+            a[(j + 1) % 8] = b[j];
         }
-
-        for (i = 0; i < 16; ++i) {
-            if (message) {
-                addr = wrap_address(env, message + 8 * i);
-                w[i] = cpu_ldq_be_data_ra(env, addr, ra);
-            } else {
-                w[i] = be64_to_cpu(((uint64_t *)stack_buffer)[i]);
+        if (i % 16 == 15) {
+            for (j = 0; j < 16; ++j) {
+                w[j] += w[(j + 9) % 16] + sigma0(w[(j + 1) % 16]) +
+                        sigma1(w[(j + 14) % 16]);
             }
         }
+    }
 
-        for (i = 0; i < 80; ++i) {
-            for (j = 0; j < 8; ++j) {
-                b[j] = a[j];
-            }
-            t = a[7] + Sigma1(a[4]) + Ch(a[4], a[5], a[6]) + K[i] + w[i % 16];
-            b[7] = t + Sigma0(a[0]) + Maj(a[0], a[1], a[2]);
-            b[3] += t;
-            for (j = 0; j < 8; ++j) {
-                a[(j + 1) % 8] = b[j];
-            }
-            if (i % 16 == 15) {
-                for (j = 0; j < 16; ++j) {
-                    w[j] += w[(j + 9) % 16] + sigma0(w[(j + 1) % 16]) + sigma1(w[(j + 14) % 16]);
-                }
-            }
-        }
+    for (i = 0; i < 8; i++) {
+        a[i] += z[i];
+    }
+}
 
-        for (i = 0; i < 8; ++i) {
-            a[i] += z[i];
-            z[i] = a[i];
-        }
+/* a is icv/ocv, w is a single message block that needs be64 conversion. */
+static void sha512_bda_be64(uint64_t a[8], uint64_t w[16])
+{
+    uint64_t t[16];
+    int i;
 
-        if (message) {
-            message += 128;
-        } else {
-            stack_buffer += 128;
-        }
-        len -= 128;
-        processed += 128;
+    for (i = 0; i < 16; i++) {
+        t[i] = be64_to_cpu(w[i]);
+    }
+    sha512_bda(a, t);
+}
+
+static void sha512_read_icv(CPUS390XState *env, uint64_t addr,
+                            uint64_t a[8], uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i < 8; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldq_be_data_ra(env, addr, ra);
     }
+}
+
+static void sha512_write_ocv(CPUS390XState *env, uint64_t addr,
+                             uint64_t a[8], uintptr_t ra)
+{
+    int i;
 
-    for (i = 0; i < 8; ++i) {
-        addr = wrap_address(env, parameter_block + 8 * i);
-        cpu_stq_be_data_ra(env, addr, z[i], ra);
+    for (i = 0; i < 8; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        cpu_stq_be_data_ra(env, addr, a[i], ra);
     }
+}
+
+static void sha512_read_block(CPUS390XState *env, uint64_t addr,
+                              uint64_t a[16], uintptr_t ra)
+{
+    int i;
 
-    if (message_reg) {
-        *message_reg = deposit64(*message_reg, 0, message_reg_len, message);
+    for (i = 0; i < 16; i++, addr += 8) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldq_be_data_ra(env, addr, ra);
     }
-    *len_reg -= processed;
-    return cc;
 }
 
-static int klmd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t parameter_block,
-                        uint64_t *message_reg, uint64_t *len_reg)
+static void sha512_read_mbl_be64(CPUS390XState *env, uint64_t addr,
+                                 uint8_t a[16], uintptr_t ra)
 {
-    uint8_t x[256];
-    uint64_t i, message, len, addr;
-    int j, message_reg_len = 64, cc;
+    int i;
 
-    cc = kimd_sha512(env, ra, parameter_block, message_reg, len_reg, NULL);
-    if (cc) {
-        return cc;
+    for (i = 0; i < 16; i++, addr += 1) {
+        addr = wrap_address(env, addr);
+        a[i] = cpu_ldub_data_ra(env, addr, ra);
     }
+}
+
+static int cpacf_sha512(CPUS390XState *env, uintptr_t ra, uint64_t param_addr,
+                      uint64_t *message_reg, uint64_t *len_reg, uint32_t type)
+{
+    enum { MAX_BLOCKS_PER_RUN = 64 }; /* Arbitrary: keep interactivity. */
+    uint64_t len = *len_reg, a[8], processed = 0;
+    int i, message_reg_len = 64;
+
+    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
 
-    message = *message_reg;
-    len = *len_reg;
     if (!(env->psw.mask & PSW_MASK_64)) {
         len = (uint32_t)len;
         message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
     }
 
-    for (i = 0; i < len; ++i) {
-        addr = wrap_address(env, message + i);
-        x[i] = cpu_ldub_data_ra(env, addr, ra);
+    /* KIMD: length has to be properly aligned. */
+    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
     }
-    memset(x + i, 0, sizeof(x) - i);
-    x[i] = 128;
-    i = i < 112 ? 128 : 256;
-    for (j = 0; j < 16; ++j) {
-        addr = wrap_address(env, parameter_block + 64 + j);
-        x[i - 16 + j] = cpu_ldub_data_ra(env, addr, ra);
+
+    sha512_read_icv(env, param_addr, a, ra);
+
+    /* Process full blocks first. */
+    for (; len >= 128; len -= 128, processed += 128) {
+        uint64_t w[16];
+
+        if (processed >= MAX_BLOCKS_PER_RUN * 128) {
+            break;
+        }
+
+        sha512_read_block(env, *message_reg + processed, w, ra);
+        sha512_bda(a, w);
     }
-    if (kimd_sha512(env, ra, parameter_block, NULL, &i, x)) {
-        g_assert_not_reached(); /* It must handle at least 2 blocks. */
+
+    /* KLMD: Process partial/empty block last. */
+    if (type == S390_FEAT_TYPE_KLMD && len < 128) {
+        uint8_t x[128];
+
+        /* Read the remainder of the message byte-per-byte. */
+        for (i = 0; i < len; i++) {
+            uint64_t addr = wrap_address(env, *message_reg + processed + i);
+
+            x[i] = cpu_ldub_data_ra(env, addr, ra);
+        }
+        /* Pad the remainder with zero and set the top bit. */
+        memset(x + len, 0, 128 - len);
+        x[len] = 128;
+
+        /*
+         * Place the MBL either into this block (if there is space left),
+         * or use an additional one.
+         */
+        if (len < 112) {
+            sha512_read_mbl_be64(env, param_addr + 64, x + 112, ra);
+        }
+        sha512_bda_be64(a, (uint64_t *)x);
+
+        if (len >= 112) {
+            memset(x, 0, 112);
+            sha512_read_mbl_be64(env, param_addr + 64, x + 112, ra);
+            sha512_bda_be64(a, (uint64_t *)x);
+        }
+
+        processed += len;
+        len = 0;
     }
-    *message_reg = deposit64(*message_reg, 0, message_reg_len, message + len);
-    *len_reg -= len;
-    return 0;
+
+    /*
+     * Modify memory after we read all inputs and modify registers only after
+     * writing memory succeeded.
+     *
+     * TODO: if writing fails halfway through (e.g., when crossing page
+     * boundaries), we're in trouble. We'd need something like access_prepare().
+     */
+    sha512_write_ocv(env, param_addr, a, ra);
+    *message_reg = deposit64(*message_reg, 0, message_reg_len,
+                             *message_reg + processed);
+    *len_reg -= processed;
+    return !len ? 0 : 3;
 }
 
 static void fill_buf_random(CPUS390XState *env, uintptr_t ra,
@@ -234,13 +305,8 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
         }
         break;
     case 3: /* CPACF_*_SHA_512 */
-        switch (type) {
-        case S390_FEAT_TYPE_KIMD:
-            return kimd_sha512(env, ra, env->regs[1], &env->regs[r2], &env->regs[r2 + 1], NULL);
-        case S390_FEAT_TYPE_KLMD:
-            return klmd_sha512(env, ra, env->regs[1], &env->regs[r2], &env->regs[r2 + 1]);
-        }
-        break;
+        return cpacf_sha512(env, ra, env->regs[1], &env->regs[r2],
+                            &env->regs[r2 + 1], type);
     case 114: /* CPACF_PRNO_TRNG */
         fill_buf_random(env, ra, &env->regs[r1], &env->regs[r1 + 1]);
         fill_buf_random(env, ra, &env->regs[r2], &env->regs[r2 + 1]);


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 1/2] target/s390x: support SHA-512 extensions
  2022-09-22 13:07 ` [PATCH v8 1/2] target/s390x: support SHA-512 extensions David Hildenbrand
  2022-09-22 14:35   ` Jason A. Donenfeld
@ 2022-09-23 10:14   ` Alex Bennée
  1 sibling, 0 replies; 29+ messages in thread
From: Alex Bennée @ 2022-09-23 10:14 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Jason A. Donenfeld, qemu-s390x, Thomas Huth,
	Christian Borntraeger, Richard Henderson, Cornelia Huck,
	Harald Freudenberger, Holger Dengler, qemu-devel


David Hildenbrand <david@redhat.com> writes:

> On 21.09.22 12:07, Jason A. Donenfeld wrote:
>> In order to fully support MSA_EXT_5, we have to support the SHA-512
>> special instructions. So implement those.
>> The implementation began as something TweetNacl-like, and then was
>> adjusted to be useful here. It's not very beautiful, but it is quite
>> short and compact, which is what we're going for.
>> Cc: Thomas Huth <thuth@redhat.com>
>> Cc: David Hildenbrand <david@redhat.com>
>> Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
>> Cc: Richard Henderson <richard.henderson@linaro.org>
>> Cc: Cornelia Huck <cohuck@redhat.com>
>> Cc: Harald Freudenberger <freude@linux.ibm.com>
>> Cc: Holger Dengler <dengler@linux.ibm.com>
>> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
>> ---
>
<snip>
>
> This passes the Linux boot test, but I'm pretty sure I messed up some
> corner case.
>
>
> Interestingly, I discovered tests/tcg/multiarch/sha512.c.

I added that because I wanted to further exercise the vector code and
it is a nice test because it verifies its own results. It gets rebuilt
with various flavours to exercise different sets of instructions e.g.:

  # MVX versions of sha512
  sha512-mvx: CFLAGS=-march=z13 -mvx -O3
  sha512-mvx: sha512.c
          $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)

so it might be worth enabling the SHA512 instructions and building
another variant of the binary. That does depend on the compiler
recognising whats going on and using the appropriate instructions
instead. Alternatively we could extend the test to use compiler
intrinsics if available although I suspect that will get messy quick.

-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23  9:19           ` Jason A. Donenfeld
@ 2022-09-23 10:47             ` David Hildenbrand
  2022-09-23 11:19               ` Jason A. Donenfeld
  0 siblings, 1 reply; 29+ messages in thread
From: David Hildenbrand @ 2022-09-23 10:47 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Thomas Huth, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

>> 3) I won't follow up with additional cleanup patches because I already
>> spent more time on this than I originally planned.
> 
> What is this B.S.?
> I spent months on this code and had to poke you a
> bunch to review it. You spend one afternoon with it and you're already
> burnt out, apparently. Sorry to hear that. But also, something is amiss

There is a big difference between "burnt out" and "having to 
prioritize". No need to feel sorry.

You must be fortunate if "one afternoon" is not a significant time 
investment. For me it is a significant investment.

> when the volunteer completely outside his realm of expertise has a
> greater commitment than the professional maintainer. Regardless, seeing > that kind of thing here doesn't make me enthusiastic about contributing
> to s390 stuff in the future, in the sense that hearing "I won't work
> more on this" from a maintainer is a contagious sentiment; leaders are
> emulated.

Let's recap:

1. This is very complicated code and a complicated instruction to
    emulate. It's not easy to review. It's not easy code to write for
    someone new to s390x / TCG -- especially to get memory
    accesses right and work around the lack of memory transactions.

2. We provided early feedback fast, but I expressed that there are
    certain things that need improvements and that might be coded in a
    way that make it easier to understand/review. I had to play myself
    with that code to figure out what it even does and how we can improve
    it. As I was overloaded lately (including vacation, conferences),
    that time was not hard to find because other projects were of higher
    priority on my end.

3. You really pushed me hard offline to look into it. I did it
    ASAP because it fell through the cracks and I expressed that I am
    sorry. I proposed to get it ready for upstream and you agreed. Thomas
    was aware of that communication.


I sent out something ASAP to get your stuff finally merged. I really 
tried my best yesterday. Apparently I failed.

In an ideal world I would have *never* sent out that code. I would have 
provided review feedback and guidance to make that code easier to grasp 
and sort out the remaining issues. I thought we (Thomas included) had an 
agreement that that's the way we are going to do it. Apparently I was wrong.


Most probably I lived in the kernel space too long such that we don't 
rush something upstream. For that reason *I* sent out a patch with 
fixups included instead of requesting more changes after you clearly 
expressed that you don't want to wait any longer.


Here I am, getting told by Thomas that we now do it differently now. 
What I really tried to express here is: if Thomas wants to commit things 
differently now, maybe he can just separate the cleanup parts. I really 
*don't want* to send out a multi-patch series to cleanup individual 
parts -- that takes significantly more time. Especially not if something 
is not committed yet.


Yes, such upstream experiences are discouraging to new contributors. But 
such upstream experiences discourage maintainer like me as well. This 
morning I honestly asked myself if I should still be listed as a 
maintainer under s390x/tcg.

Not sure if s390x/tcg would be better without me, but then I get to 
disappoint less people.

> 
> The 2/2 patch doesn't even apply on top of your "v8.1 1/2", so your
> submission isn't even easily apply-able.

Sorry, but that's a piece of cake for Thomas. And he could always 
request a complete resend from me anytime.

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23 10:47             ` David Hildenbrand
@ 2022-09-23 11:19               ` Jason A. Donenfeld
  2022-09-23 11:35                 ` David Hildenbrand
  0 siblings, 1 reply; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-23 11:19 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Thomas Huth, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On Fri, Sep 23, 2022 at 12:47 PM David Hildenbrand <david@redhat.com> wrote:
> You must be fortunate if "one afternoon" is not a significant time
> investment. For me it is a significant investment.

For me too, to say the least of the multiple afternoons I've spent on
this patch set. Getting back to technical content:

> and sort out the remaining issues. I thought we (Thomas included) had an
> agreement that that's the way we are going to do it. Apparently I was wrong.
> Most probably I lived in the kernel space too long such that we don't
> rush something upstream. For that reason *I* sent out a patch with
> Here I am, getting told by Thomas that we now do it differently now.
> What I really tried to express here is: if Thomas wants to commit things
> differently now, maybe he can just separate the cleanup parts. I really
> *don't want* to send out a multi-patch series to cleanup individual
> parts -- that takes significantly more time. Especially not if something
> is not committed yet.

To recap what's been fixed in your v8.1, versus what's been refactored
out of style preference:

1) It handles the machine versioning.
2) It throws an exception on length alignment in KIMD mode:
+    /* KIMD: length has to be properly aligned. */
+    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
3) It asserts if type is neither KIMD vs KLMD, with:
+    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);


With (1), Thomas added that to v8.

With (3), doesn't the upper layer of the tcg dispatcher already check
that, and return an error back to the guest? If so, then your change
doesn't do anything. If not, then your change introduces a DoS, since
a guest can now crash the host process by triggering that g_assert(),
right? I had assumed the tcg dispatcher was checking that.

With (2), I found this text:

4. For COMPUTE INTERMEDIATE MESSAGE
DIGEST, the second-operand length is not a multiple
of the data block size of the designated
function (see Figure 7-65 on page 7-102 for
COMPUTE INTERMEDIATE MESSAGE
DIGEST functions). This specification-exception
condition does not apply to the query function,
nor does it apply to COMPUTE LAST MESSAGE
DIGEST.

This part seems like the most important delta between Thomas' plan and
what you posted in v8.1.

With that said, your style refactorings are probably a nice thing to
keep around too.

Jason


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23 11:19               ` Jason A. Donenfeld
@ 2022-09-23 11:35                 ` David Hildenbrand
  2022-09-23 11:46                   ` Jason A. Donenfeld
  0 siblings, 1 reply; 29+ messages in thread
From: David Hildenbrand @ 2022-09-23 11:35 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Thomas Huth, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 23.09.22 13:19, Jason A. Donenfeld wrote:
> On Fri, Sep 23, 2022 at 12:47 PM David Hildenbrand <david@redhat.com> wrote:
>> You must be fortunate if "one afternoon" is not a significant time
>> investment. For me it is a significant investment.
> 
> For me too, to say the least of the multiple afternoons I've spent on
> this patch set. Getting back to technical content:
> 
>> and sort out the remaining issues. I thought we (Thomas included) had an
>> agreement that that's the way we are going to do it. Apparently I was wrong.
>> Most probably I lived in the kernel space too long such that we don't
>> rush something upstream. For that reason *I* sent out a patch with
>> Here I am, getting told by Thomas that we now do it differently now.
>> What I really tried to express here is: if Thomas wants to commit things
>> differently now, maybe he can just separate the cleanup parts. I really
>> *don't want* to send out a multi-patch series to cleanup individual
>> parts -- that takes significantly more time. Especially not if something
>> is not committed yet.
> 
> To recap what's been fixed in your v8.1, versus what's been refactored
> out of style preference:
> 
> 1) It handles the machine versioning.
> 2) It throws an exception on length alignment in KIMD mode:
> +    /* KIMD: length has to be properly aligned. */
> +    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
> +        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
> +    }
> 3) It asserts if type is neither KIMD vs KLMD, with:
> +    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
> 

One important part is

4) No memory modifications before all inputs were read

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23 11:35                 ` David Hildenbrand
@ 2022-09-23 11:46                   ` Jason A. Donenfeld
  2022-09-23 12:05                     ` Thomas Huth
  2022-09-23 12:13                     ` David Hildenbrand
  0 siblings, 2 replies; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-23 11:46 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Thomas Huth, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

Hi David,

On Fri, Sep 23, 2022 at 1:35 PM David Hildenbrand <david@redhat.com> wrote:
>
> On 23.09.22 13:19, Jason A. Donenfeld wrote:
> > On Fri, Sep 23, 2022 at 12:47 PM David Hildenbrand <david@redhat.com> wrote:
> >> You must be fortunate if "one afternoon" is not a significant time
> >> investment. For me it is a significant investment.
> >
> > For me too, to say the least of the multiple afternoons I've spent on
> > this patch set. Getting back to technical content:
> >
> >> and sort out the remaining issues. I thought we (Thomas included) had an
> >> agreement that that's the way we are going to do it. Apparently I was wrong.
> >> Most probably I lived in the kernel space too long such that we don't
> >> rush something upstream. For that reason *I* sent out a patch with
> >> Here I am, getting told by Thomas that we now do it differently now.
> >> What I really tried to express here is: if Thomas wants to commit things
> >> differently now, maybe he can just separate the cleanup parts. I really
> >> *don't want* to send out a multi-patch series to cleanup individual
> >> parts -- that takes significantly more time. Especially not if something
> >> is not committed yet.
> >
> > To recap what's been fixed in your v8.1, versus what's been refactored
> > out of style preference:
> >
> > 1) It handles the machine versioning.
> > 2) It throws an exception on length alignment in KIMD mode:
> > +    /* KIMD: length has to be properly aligned. */
> > +    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
> > +        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
> > +    }
> > 3) It asserts if type is neither KIMD vs KLMD, with:
> > +    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
> >
>
> One important part is
>
> 4) No memory modifications before all inputs were read

Ahhh, which v8's klmd doesn't do, since it updates the parameter block
before doing the last block. Is this a requirement of the spec? If
not, then it doesn't matter. But if so, v8's approach is truly
hopeless, and we'd be remiss to not go with your refactoring that
accomplishes this.

Jason


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23 11:46                   ` Jason A. Donenfeld
@ 2022-09-23 12:05                     ` Thomas Huth
  2022-09-23 12:07                       ` Jason A. Donenfeld
  2022-09-23 12:13                     ` David Hildenbrand
  1 sibling, 1 reply; 29+ messages in thread
From: Thomas Huth @ 2022-09-23 12:05 UTC (permalink / raw)
  To: Jason A. Donenfeld, David Hildenbrand
  Cc: qemu-devel, qemu-s390x, Christian Borntraeger, Richard Henderson,
	Cornelia Huck, Harald Freudenberger, Holger Dengler

On 23/09/2022 13.46, Jason A. Donenfeld wrote:
> Hi David,
> 
> On Fri, Sep 23, 2022 at 1:35 PM David Hildenbrand <david@redhat.com> wrote:
>>
>> On 23.09.22 13:19, Jason A. Donenfeld wrote:
>>> On Fri, Sep 23, 2022 at 12:47 PM David Hildenbrand <david@redhat.com> wrote:
>>>> You must be fortunate if "one afternoon" is not a significant time
>>>> investment. For me it is a significant investment.
>>>
>>> For me too, to say the least of the multiple afternoons I've spent on
>>> this patch set. Getting back to technical content:
>>>
>>>> and sort out the remaining issues. I thought we (Thomas included) had an
>>>> agreement that that's the way we are going to do it. Apparently I was wrong.
>>>> Most probably I lived in the kernel space too long such that we don't
>>>> rush something upstream. For that reason *I* sent out a patch with
>>>> Here I am, getting told by Thomas that we now do it differently now.
>>>> What I really tried to express here is: if Thomas wants to commit things
>>>> differently now, maybe he can just separate the cleanup parts. I really
>>>> *don't want* to send out a multi-patch series to cleanup individual
>>>> parts -- that takes significantly more time. Especially not if something
>>>> is not committed yet.
>>>
>>> To recap what's been fixed in your v8.1, versus what's been refactored
>>> out of style preference:
>>>
>>> 1) It handles the machine versioning.
>>> 2) It throws an exception on length alignment in KIMD mode:
>>> +    /* KIMD: length has to be properly aligned. */
>>> +    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
>>> +        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
>>> +    }
>>> 3) It asserts if type is neither KIMD vs KLMD, with:
>>> +    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
>>>
>>
>> One important part is
>>
>> 4) No memory modifications before all inputs were read
> 
> Ahhh, which v8's klmd doesn't do, since it updates the parameter block
> before doing the last block. Is this a requirement of the spec? If
> not, then it doesn't matter. But if so, v8's approach is truly
> hopeless, and we'd be remiss to not go with your refactoring that
> accomplishes this.

Ok, great, if you're fine with the rework, I'll go with David's v8.1 
instead. (keeping an entry on my TODO list to rework that ugly generic "msa" 
helper function one day - this really kept me being confused for much of my 
patch review time)

  Thomas



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23 12:05                     ` Thomas Huth
@ 2022-09-23 12:07                       ` Jason A. Donenfeld
  2022-09-23 12:45                         ` Thomas Huth
  0 siblings, 1 reply; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-23 12:07 UTC (permalink / raw)
  To: Thomas Huth
  Cc: David Hildenbrand, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On Fri, Sep 23, 2022 at 2:05 PM Thomas Huth <thuth@redhat.com> wrote:
>
> On 23/09/2022 13.46, Jason A. Donenfeld wrote:
> > Hi David,
> >
> > On Fri, Sep 23, 2022 at 1:35 PM David Hildenbrand <david@redhat.com> wrote:
> >>
> >> On 23.09.22 13:19, Jason A. Donenfeld wrote:
> >>> On Fri, Sep 23, 2022 at 12:47 PM David Hildenbrand <david@redhat.com> wrote:
> >>>> You must be fortunate if "one afternoon" is not a significant time
> >>>> investment. For me it is a significant investment.
> >>>
> >>> For me too, to say the least of the multiple afternoons I've spent on
> >>> this patch set. Getting back to technical content:
> >>>
> >>>> and sort out the remaining issues. I thought we (Thomas included) had an
> >>>> agreement that that's the way we are going to do it. Apparently I was wrong.
> >>>> Most probably I lived in the kernel space too long such that we don't
> >>>> rush something upstream. For that reason *I* sent out a patch with
> >>>> Here I am, getting told by Thomas that we now do it differently now.
> >>>> What I really tried to express here is: if Thomas wants to commit things
> >>>> differently now, maybe he can just separate the cleanup parts. I really
> >>>> *don't want* to send out a multi-patch series to cleanup individual
> >>>> parts -- that takes significantly more time. Especially not if something
> >>>> is not committed yet.
> >>>
> >>> To recap what's been fixed in your v8.1, versus what's been refactored
> >>> out of style preference:
> >>>
> >>> 1) It handles the machine versioning.
> >>> 2) It throws an exception on length alignment in KIMD mode:
> >>> +    /* KIMD: length has to be properly aligned. */
> >>> +    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
> >>> +        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
> >>> +    }
> >>> 3) It asserts if type is neither KIMD vs KLMD, with:
> >>> +    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
> >>>
> >>
> >> One important part is
> >>
> >> 4) No memory modifications before all inputs were read
> >
> > Ahhh, which v8's klmd doesn't do, since it updates the parameter block
> > before doing the last block. Is this a requirement of the spec? If
> > not, then it doesn't matter. But if so, v8's approach is truly
> > hopeless, and we'd be remiss to not go with your refactoring that
> > accomplishes this.
>
> Ok, great, if you're fine with the rework, I'll go with David's v8.1
> instead. (keeping an entry on my TODO list to rework that ugly generic "msa"
> helper function one day - this really kept me being confused for much of my
> patch review time)

Okay, sure. Can one of you just look to see if that g_assert() is
going to be a DoS vector, though, or if it'll never be reached if the
prior code goes well? That's the one remaining thing I'm not sure
about.

Do you want me to rebase 2/2 on top of v8.1?

Jason


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23 11:46                   ` Jason A. Donenfeld
  2022-09-23 12:05                     ` Thomas Huth
@ 2022-09-23 12:13                     ` David Hildenbrand
  1 sibling, 0 replies; 29+ messages in thread
From: David Hildenbrand @ 2022-09-23 12:13 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Thomas Huth, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 23.09.22 13:46, Jason A. Donenfeld wrote:
> Hi David,
> 
> On Fri, Sep 23, 2022 at 1:35 PM David Hildenbrand <david@redhat.com> wrote:
>>
>> On 23.09.22 13:19, Jason A. Donenfeld wrote:
>>> On Fri, Sep 23, 2022 at 12:47 PM David Hildenbrand <david@redhat.com> wrote:
>>>> You must be fortunate if "one afternoon" is not a significant time
>>>> investment. For me it is a significant investment.
>>>
>>> For me too, to say the least of the multiple afternoons I've spent on
>>> this patch set. Getting back to technical content:
>>>
>>>> and sort out the remaining issues. I thought we (Thomas included) had an
>>>> agreement that that's the way we are going to do it. Apparently I was wrong.
>>>> Most probably I lived in the kernel space too long such that we don't
>>>> rush something upstream. For that reason *I* sent out a patch with
>>>> Here I am, getting told by Thomas that we now do it differently now.
>>>> What I really tried to express here is: if Thomas wants to commit things
>>>> differently now, maybe he can just separate the cleanup parts. I really
>>>> *don't want* to send out a multi-patch series to cleanup individual
>>>> parts -- that takes significantly more time. Especially not if something
>>>> is not committed yet.
>>>
>>> To recap what's been fixed in your v8.1, versus what's been refactored
>>> out of style preference:
>>>
>>> 1) It handles the machine versioning.
>>> 2) It throws an exception on length alignment in KIMD mode:
>>> +    /* KIMD: length has to be properly aligned. */
>>> +    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
>>> +        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
>>> +    }
>>> 3) It asserts if type is neither KIMD vs KLMD, with:
>>> +    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
>>>
>>
>> One important part is
>>
>> 4) No memory modifications before all inputs were read
> 
> Ahhh, which v8's klmd doesn't do, since it updates the parameter block
> before doing the last block. Is this a requirement of the spec? If

Right, and not only the parameter block, but also registers IIRC.


It depend on the instruction and exception. IIRC, exceptions can be 
nullifying, terminating, suppressing, and partially-completing ...

Suppression: no state modification. PSW updated. Exception triggered. 
"The contents of any result fields, including the condition code, are 
not changed."

Nullification: no state modification. PSW not incremented. Exception 
triggered.

Termination: state partially changed. Bad (e.g., machine check). 
Exception triggered.

Partial completion only applies to "Interruptible Instructions". Instead 
of triggering an exception, the instruction exits (with CC=3 or so) and 
modified the state accordingly. There are only a handful of such 
instructions.



There is a chapter called "Types of Instruction Ending" in the PoP 
that's an interesting read.


So in case of Suppression/Nullification, the expectation is that no 
state (memory, register content) was updated when the exception triggers.


Depending on the way the instruction is used and how exceptions are 
handled, that can be a real issue, for example, if the program assumes 
that registers were not updated, or that memory wasn't updated -- but 
they actually were.

> not, then it doesn't matter. But if so, v8's approach is truly
> hopeless, and we'd be remiss to not go with your refactoring that
> accomplishes this.
I wouldn't call it hopeless, but that was the real reason why a 
restructured your code at all and why I had to play with the code myself 
to see if it can be done any better. All the moving stuff into other 
functions were just attempts to keep the code readable when unifying 
both functions :)

As I said, handling state update is non-trivial, and that instruction is 
especially hard to implement.

I *assume* that we never fail that way in the Linux kernel use case, 
because we don't rely on exceptions at all. So one might argue that v8 
is "good enough" for that.

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8.1 1/2] target/s390x: support SHA-512 extensions
  2022-09-23 12:07                       ` Jason A. Donenfeld
@ 2022-09-23 12:45                         ` Thomas Huth
  0 siblings, 0 replies; 29+ messages in thread
From: Thomas Huth @ 2022-09-23 12:45 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: David Hildenbrand, qemu-devel, qemu-s390x, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On 23/09/2022 14.07, Jason A. Donenfeld wrote:
> On Fri, Sep 23, 2022 at 2:05 PM Thomas Huth <thuth@redhat.com> wrote:
>>
>> On 23/09/2022 13.46, Jason A. Donenfeld wrote:
>>> Hi David,
>>>
>>> On Fri, Sep 23, 2022 at 1:35 PM David Hildenbrand <david@redhat.com> wrote:
>>>>
>>>> On 23.09.22 13:19, Jason A. Donenfeld wrote:
>>>>> On Fri, Sep 23, 2022 at 12:47 PM David Hildenbrand <david@redhat.com> wrote:
>>>>>> You must be fortunate if "one afternoon" is not a significant time
>>>>>> investment. For me it is a significant investment.
>>>>>
>>>>> For me too, to say the least of the multiple afternoons I've spent on
>>>>> this patch set. Getting back to technical content:
>>>>>
>>>>>> and sort out the remaining issues. I thought we (Thomas included) had an
>>>>>> agreement that that's the way we are going to do it. Apparently I was wrong.
>>>>>> Most probably I lived in the kernel space too long such that we don't
>>>>>> rush something upstream. For that reason *I* sent out a patch with
>>>>>> Here I am, getting told by Thomas that we now do it differently now.
>>>>>> What I really tried to express here is: if Thomas wants to commit things
>>>>>> differently now, maybe he can just separate the cleanup parts. I really
>>>>>> *don't want* to send out a multi-patch series to cleanup individual
>>>>>> parts -- that takes significantly more time. Especially not if something
>>>>>> is not committed yet.
>>>>>
>>>>> To recap what's been fixed in your v8.1, versus what's been refactored
>>>>> out of style preference:
>>>>>
>>>>> 1) It handles the machine versioning.
>>>>> 2) It throws an exception on length alignment in KIMD mode:
>>>>> +    /* KIMD: length has to be properly aligned. */
>>>>> +    if (type == S390_FEAT_TYPE_KIMD && !QEMU_IS_ALIGNED(len, 128)) {
>>>>> +        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
>>>>> +    }
>>>>> 3) It asserts if type is neither KIMD vs KLMD, with:
>>>>> +    g_assert(type == S390_FEAT_TYPE_KIMD || type == S390_FEAT_TYPE_KLMD);
>>>>>
>>>>
>>>> One important part is
>>>>
>>>> 4) No memory modifications before all inputs were read
>>>
>>> Ahhh, which v8's klmd doesn't do, since it updates the parameter block
>>> before doing the last block. Is this a requirement of the spec? If
>>> not, then it doesn't matter. But if so, v8's approach is truly
>>> hopeless, and we'd be remiss to not go with your refactoring that
>>> accomplishes this.
>>
>> Ok, great, if you're fine with the rework, I'll go with David's v8.1
>> instead. (keeping an entry on my TODO list to rework that ugly generic "msa"
>> helper function one day - this really kept me being confused for much of my
>> patch review time)
> 
> Okay, sure. Can one of you just look to see if that g_assert() is
> going to be a DoS vector, though, or if it'll never be reached if the
> prior code goes well? That's the one remaining thing I'm not sure
> about.
> 
> Do you want me to rebase 2/2 on top of v8.1?

Thanks, but I think it's a trivial contextual conflict only - I can fix that 
up when picking up the patches.

  Thomas



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction
  2022-09-21 10:07 ` [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction Jason A. Donenfeld
  2022-09-22 13:09   ` David Hildenbrand
  2022-09-22 15:40   ` Thomas Huth
@ 2022-09-26 15:11   ` Thomas Huth
  2022-09-26 15:19     ` Jason A. Donenfeld
  2 siblings, 1 reply; 29+ messages in thread
From: Thomas Huth @ 2022-09-26 15:11 UTC (permalink / raw)
  To: Jason A. Donenfeld, qemu-s390x, qemu-devel
  Cc: David Hildenbrand, Christian Borntraeger, Richard Henderson,
	Cornelia Huck, Harald Freudenberger, Holger Dengler

On 21/09/2022 12.07, Jason A. Donenfeld wrote:
> In order for hosts running inside of TCG to initialize the kernel's
> random number generator, we should support the PRNO_TRNG instruction,
> backed in the usual way with the qemu_guest_getrandom helper. This is
> confirmed working on Linux 5.19.
> 
> Cc: Thomas Huth <thuth@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Cc: Cornelia Huck <cohuck@redhat.com>
> Cc: Harald Freudenberger <freude@linux.ibm.com>
> Cc: Holger Dengler <dengler@linux.ibm.com>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>   target/s390x/gen-features.c      |  1 +
>   target/s390x/tcg/crypto_helper.c | 30 ++++++++++++++++++++++++++++++
>   2 files changed, 31 insertions(+)

Seems like this is even working fine with older Linux kernels ... your patch accidentally broke test_s390x_devices in tests/avocado/machine_s390_ccw_virtio.py: This test adds two virtio-rng devices to the guest, then ejects them to see whether /dev/hwrng will be gone ... which does not happen anymore with the prno-trng feature enabled :-)

I'm going to squash this one-liner to fix this issue:

diff a/tests/avocado/machine_s390_ccw_virtio.py b/tests/avocado/machine_s390_ccw_virtio.py
--- a/tests/avocado/machine_s390_ccw_virtio.py
+++ b/tests/avocado/machine_s390_ccw_virtio.py
@@ -66,6 +66,7 @@ def test_s390x_devices(self):
                           '-kernel', kernel_path,
                           '-initrd', initrd_path,
                           '-append', kernel_command_line,
+                         '-cpu', 'max,prno-trng=off',
                           '-device', 'virtio-net-ccw,devno=fe.1.1111',
                           '-device',
                           'virtio-rng-ccw,devno=fe.2.0000,max_revision=0,id=rn1',

  Thomas



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction
  2022-09-26 15:11   ` Thomas Huth
@ 2022-09-26 15:19     ` Jason A. Donenfeld
  0 siblings, 0 replies; 29+ messages in thread
From: Jason A. Donenfeld @ 2022-09-26 15:19 UTC (permalink / raw)
  To: Thomas Huth
  Cc: qemu-s390x, qemu-devel, David Hildenbrand, Christian Borntraeger,
	Richard Henderson, Cornelia Huck, Harald Freudenberger,
	Holger Dengler

On Mon, Sep 26, 2022 at 5:11 PM Thomas Huth <thuth@redhat.com> wrote:
> Seems like this is even working fine with older Linux kernels ...

Oh good!


 your patch accidentally broke test_s390x_devices in
tests/avocado/machine_s390_ccw_virtio.py: This test adds two
virtio-rng devices to the guest, then ejects them to see whether
/dev/hwrng will be gone ... which does not happen anymore with the
prno-trng feature enabled :-)
>
> I'm going to squash this one-liner to fix this issue:

Seems reasonable. Thanks.

Jason


^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2022-09-26 15:27 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-21 10:07 [PATCH v8 1/2] target/s390x: support SHA-512 extensions Jason A. Donenfeld
2022-09-21 10:07 ` [PATCH v8 2/2] target/s390x: support PRNO_TRNG instruction Jason A. Donenfeld
2022-09-22 13:09   ` David Hildenbrand
2022-09-22 15:40   ` Thomas Huth
2022-09-26 15:11   ` Thomas Huth
2022-09-26 15:19     ` Jason A. Donenfeld
2022-09-22 13:07 ` [PATCH v8 1/2] target/s390x: support SHA-512 extensions David Hildenbrand
2022-09-22 14:35   ` Jason A. Donenfeld
2022-09-22 14:51     ` David Hildenbrand
2022-09-22 15:11       ` David Hildenbrand
2022-09-22 15:36       ` Thomas Huth
2022-09-22 15:37         ` David Hildenbrand
2022-09-23 10:14   ` Alex Bennée
2022-09-22 15:38 ` [PATCH v8.1 " David Hildenbrand
2022-09-22 15:55   ` Thomas Huth
2022-09-22 16:35     ` Jason A. Donenfeld
2022-09-23  6:25       ` Thomas Huth
2022-09-22 17:18     ` David Hildenbrand
2022-09-23  6:23       ` Thomas Huth
2022-09-23  6:37         ` David Hildenbrand
2022-09-23  9:19           ` Jason A. Donenfeld
2022-09-23 10:47             ` David Hildenbrand
2022-09-23 11:19               ` Jason A. Donenfeld
2022-09-23 11:35                 ` David Hildenbrand
2022-09-23 11:46                   ` Jason A. Donenfeld
2022-09-23 12:05                     ` Thomas Huth
2022-09-23 12:07                       ` Jason A. Donenfeld
2022-09-23 12:45                         ` Thomas Huth
2022-09-23 12:13                     ` David Hildenbrand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).