qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Cornelia Huck <cohuck@redhat.com>
To: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-s390x@nongnu.org,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel@nongnu.org, David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PULL 16/34] s390x/tcg: Fault-safe memset
Date: Thu, 19 Sep 2019 14:40:57 +0200	[thread overview]
Message-ID: <20190919124115.11510-17-cohuck@redhat.com> (raw)
In-Reply-To: <20190919124115.11510-1-cohuck@redhat.com>

From: David Hildenbrand <david@redhat.com>

Replace fast_memset() by access_memset(), that first tries to probe
access to all affected pages (maximum is two). We'll use the same
mechanism for other types of accesses soon.

Only in very rare cases (especially TLB_NOTDIRTY), we'll have to
fallback to ld/st helpers. Try to speed up that case as suggested by
Richard.

We'll rework most involved handlers soon to do all accesses via new
fault-safe helpers, especially MVC.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/mem_helper.c | 123 +++++++++++++++++++++++++++++++-------
 1 file changed, 103 insertions(+), 20 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index a24506676b8d..dd5da7074610 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -117,27 +117,95 @@ static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
     }
 }
 
-static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte,
-                        uint32_t l, uintptr_t ra)
+/* An access covers at most 4096 bytes and therefore at most two pages. */
+typedef struct S390Access {
+    target_ulong vaddr1;
+    target_ulong vaddr2;
+    char *haddr1;
+    char *haddr2;
+    uint16_t size1;
+    uint16_t size2;
+    /*
+     * If we can't access the host page directly, we'll have to do I/O access
+     * via ld/st helpers. These are internal details, so we store the
+     * mmu idx to do the access here instead of passing it around in the
+     * helpers. Maybe, one day we can get rid of ld/st access - once we can
+     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
+     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
+     * pages, we might trigger a new MMU translation - very unlikely that
+     * the mapping changes in between and we would trigger a fault.
+     */
+    int mmu_idx;
+} S390Access;
+
+static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
+                                 MMUAccessType access_type, int mmu_idx,
+                                 uintptr_t ra)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    S390Access access = {
+        .vaddr1 = vaddr,
+        .size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
+        .mmu_idx = mmu_idx,
+    };
 
-    while (l > 0) {
-        void *p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
-        if (p) {
-            /* Access to the whole page in write mode granted.  */
-            uint32_t l_adj = adj_len_to_page(l, dest);
-            memset(p, byte, l_adj);
-            dest += l_adj;
-            l -= l_adj;
+    g_assert(size > 0 && size <= 4096);
+    access.haddr1 = probe_access(env, access.vaddr1, access.size1, access_type,
+                                 mmu_idx, ra);
+
+    if (unlikely(access.size1 != size)) {
+        /* The access crosses page boundaries. */
+        access.vaddr2 = wrap_address(env, vaddr + access.size1);
+        access.size2 = size - access.size1;
+        access.haddr2 = probe_access(env, access.vaddr2, access.size2,
+                                     access_type, mmu_idx, ra);
+    }
+    return access;
+}
+
+/* Helper to handle memset on a single page. */
+static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
+                             uint8_t byte, uint16_t size, int mmu_idx,
+                             uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    g_assert(haddr);
+    memset(haddr, byte, size);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    int i;
+
+    if (likely(haddr)) {
+        memset(haddr, byte, size);
+    } else {
+        /*
+         * Do a single access and test if we can then get access to the
+         * page. This is especially relevant to speed up TLB_NOTDIRTY.
+         */
+        g_assert(size > 0);
+        helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
+        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+        if (likely(haddr)) {
+            memset(haddr + 1, byte, size - 1);
         } else {
-            /* We failed to get access to the whole page. The next write
-               access will likely fill the QEMU TLB for the next iteration.  */
-            cpu_stb_data_ra(env, dest, byte, ra);
-            dest++;
-            l--;
+            for (i = 1; i < size; i++) {
+                helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
+            }
         }
     }
+#endif
+}
+
+static void access_memset(CPUS390XState *env, S390Access *desta,
+                          uint8_t byte, uintptr_t ra)
+{
+
+    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
+                     desta->mmu_idx, ra);
+    if (likely(!desta->size2)) {
+        return;
+    }
+    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
+                     desta->mmu_idx, ra);
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -259,15 +327,19 @@ uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
                              uint64_t src, uintptr_t ra)
 {
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access desta;
     uint32_t i;
     uint8_t c = 0;
 
     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
                __func__, l, dest, src);
 
+    desta = access_prepare(env, dest, l + 1, MMU_DATA_STORE, mmu_idx, ra);
+
     /* xor with itself is the same as memset(0) */
     if (src == dest) {
-        fast_memset(env, dest, 0, l + 1, ra);
+        access_memset(env, &desta, 0, ra);
         return 0;
     }
 
@@ -315,6 +387,8 @@ uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
                               uint64_t src, uintptr_t ra)
 {
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access desta;
     uint32_t i;
 
     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
@@ -323,13 +397,15 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
     /* MVC always copies one more byte than specified - maximum is 256 */
     l++;
 
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
     /*
      * "When the operands overlap, the result is obtained as if the operands
      * were processed one byte at a time". Only non-destructive overlaps
      * behave like memmove().
      */
     if (dest == src + 1) {
-        fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l, ra);
+        access_memset(env, &desta, cpu_ldub_data_ra(env, src, ra), ra);
     } else if (!is_destructive_overlap(env, dest, src, l)) {
         fast_memmove(env, dest, src, l, ra);
     } else {
@@ -775,7 +851,9 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
                                uint64_t *src, uint64_t *srclen,
                                uint16_t pad, int wordsize, uintptr_t ra)
 {
+    const int mmu_idx = cpu_mmu_index(env, false);
     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
+    S390Access desta;
     int i, cc;
 
     if (*destlen == *srclen) {
@@ -805,7 +883,8 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
     } else if (wordsize == 1) {
         /* Pad the remaining area */
         *destlen -= len;
-        fast_memset(env, *dest, pad, len, ra);
+        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+        access_memset(env, &desta, pad, ra);
         *dest = wrap_address(env, *dest + len);
     } else {
         /* The remaining length selects the padding byte. */
@@ -825,6 +904,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
 /* move long */
 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 {
+    const int mmu_idx = cpu_mmu_index(env, false);
     uintptr_t ra = GETPC();
     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
     uint64_t dest = get_address(env, r1);
@@ -832,6 +912,7 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
     uint64_t src = get_address(env, r2);
     uint8_t pad = env->regs[r2 + 1] >> 24;
     uint32_t cc, cur_len;
+    S390Access desta;
 
     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
         cc = 3;
@@ -859,7 +940,9 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
     while (destlen) {
         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
         if (!srclen) {
-            fast_memset(env, dest, pad, cur_len, ra);
+            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
+                                   ra);
+            access_memset(env, &desta, pad, ra);
         } else {
             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
 
-- 
2.20.1



  parent reply	other threads:[~2019-09-19 13:18 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 12:40 [Qemu-devel] [PULL 00/34] s390x update Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 01/34] s390x/tcg: Reset exception_index to -1 instead of 0 Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 02/34] s390x/tcg: MVCL: Zero out unused bits of address Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 03/34] s390x/tcg: MVCL: Detect destructive overlaps Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 04/34] s390x/tcg: MVCL: Process max 4k bytes at a time Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 05/34] s390x/tcg: MVC: Increment the length once Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 06/34] s390x/tcg: MVC: Use is_destructive_overlap() Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 07/34] s390x/tcg: MVPG: Check for specification exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 08/34] s390x/tcg: MVPG: Properly wrap the addresses Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 09/34] s390x/tcg: MVCLU/MVCLE: Process max 4k bytes at a time Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 10/34] s390x/tcg: MVCS/MVCP: Check for special operation exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 11/34] s390x/tcg: MVCOS: Lengths are 32 bit in 24/31-bit mode Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 12/34] s390x/tcg: MVCS/MVCP: Properly wrap the length Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 13/34] s390x/tcg: MVST: Check for specification exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 14/34] s390x/tcg: MVST: Fix storing back the addresses to registers Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 15/34] s390x/tcg: Always use MMU_USER_IDX for CONFIG_USER_ONLY Cornelia Huck
2019-09-19 12:40 ` Cornelia Huck [this message]
2019-09-19 12:40 ` [Qemu-devel] [PULL 17/34] s390x/tcg: Fault-safe memmove Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 18/34] s390x/tcg: MVCS/MVCP: Use access_memmove() Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 19/34] s390x/tcg: MVC: Fault-safe handling on destructive overlaps Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 20/34] s390x/tcg: MVCLU: Fault-safe handling Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 21/34] s390x/tcg: OC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 22/34] s390x/tcg: XC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 23/34] s390x/tcg: NC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 24/34] s390x/tcg: MVCIN: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 25/34] s390x/tcg: MVN: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 26/34] s390x/tcg: MVZ: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 27/34] s390x/tcg: MVST: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 28/34] s390x/tcg: MVO: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 29/34] tests/tcg: target/s390x: Test MVO Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 30/34] pc-bios/s390-ccw: Do not pre-initialize empty array Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 31/34] pc-bios/s390-ccw/net: fix a possible memory leak in get_uuid() Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 32/34] pc-bios/s390-ccw: Rebuild the s390-netboot.img firmware image Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 33/34] s390x/kvm: Officially require at least kernel 3.15 Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 34/34] s390x/cpumodel: Add the z15 name to the description of gen15a Cornelia Huck
2019-09-20 10:45 ` [PULL 00/34] s390x update Peter Maydell
2019-09-20 11:00   ` Cornelia Huck
2019-09-20 11:51     ` David Hildenbrand
2019-09-20 11:59       ` David Hildenbrand
2019-09-20 13:32         ` Cornelia Huck
2019-09-20 13:41           ` Peter Maydell
2019-09-20 15:34             ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190919124115.11510-17-cohuck@redhat.com \
    --to=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).