All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code
@ 2016-07-08 20:38 Richard Henderson
  2016-07-08 20:38 ` [Qemu-devel] [PULL 1/4] cputlb: Move VICTIM_TLB_HIT out of line Richard Henderson
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Richard Henderson @ 2016-07-08 20:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

Synchronicity?  Both changes look good to me.

I took the opportunity to tidy up softmmu_template.h a tiny bit at
the same time, to avoid too much increase in code size in cold code
paths.  The final is 4k smaller than the original.


r~


The following changes since commit 4f4a9ca4a4386c137301b3662faba076455ff15a:

  Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160707' into staging (2016-07-07 14:49:38 +0100)

are available in the git repository at:

  git://github.com/rth7680/qemu.git tags/pull-tcg-20160708

for you to fetch changes up to 7399a337e4126f7c8c8af3336726f001378c4798:

  translate-all: Fix user-mode self-modifying code in 2 page long TB (2016-07-08 13:17:38 -0700)

----------------------------------------------------------------
two self-modifying code fixes

----------------------------------------------------------------
Richard Henderson (1):
      cputlb: Move VICTIM_TLB_HIT out of line

Samuel Damashek (2):
      cputlb: Add address parameter to VICTIM_TLB_HIT
      cputlb: Fix for self-modifying writes across page boundaries

Stanislav Shmarov (1):
      translate-all: Fix user-mode self-modifying code in 2 page long TB

 cputlb.c           | 29 ++++++++++++++++++++
 softmmu_template.h | 79 +++++++++++++++++++++++++++---------------------------
 translate-all.c    | 10 +++----
 3 files changed, 74 insertions(+), 44 deletions(-)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PULL 1/4] cputlb: Move VICTIM_TLB_HIT out of line
  2016-07-08 20:38 [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Richard Henderson
@ 2016-07-08 20:38 ` Richard Henderson
  2016-07-08 20:38 ` [Qemu-devel] [PULL 2/4] cputlb: Add address parameter to VICTIM_TLB_HIT Richard Henderson
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2016-07-08 20:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

There are currently 22 invocations of this function,
and we're about to increase that number.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 cputlb.c           | 29 +++++++++++++++++++++++++++++
 softmmu_template.h | 25 -------------------------
 2 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 079e497..7518544 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -498,6 +498,35 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
     return qemu_ram_addr_from_host_nofail(p);
 }
 
+/* Return true if ADDR is present in the victim tlb, and has been copied
+   back to the main tlb.  */
+static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
+                           size_t elt_ofs, target_ulong page)
+{
+    size_t vidx;
+    for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
+        CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
+        target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
+
+        if (cmp == page) {
+            /* Found entry in victim tlb, swap tlb and iotlb.  */
+            CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
+            CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
+            CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
+
+            tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
+            tmpio = *io; *io = *vio; *vio = tmpio;
+            return true;
+        }
+    }
+    return false;
+}
+
+/* Macro to call the above, with local variables from the use context.  */
+#define VICTIM_TLB_HIT(TY) \
+  victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
+                 addr & TARGET_PAGE_MASK)
+
 #define MMUSUFFIX _mmu
 
 #define SHIFT 0
diff --git a/softmmu_template.h b/softmmu_template.h
index 4d378ca..405ba35 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -116,31 +116,6 @@
 # define helper_te_st_name  helper_le_st_name
 #endif
 
-/* macro to check the victim tlb */
-#define VICTIM_TLB_HIT(ty)                                                    \
-({                                                                            \
-    /* we are about to do a page table walk. our last hope is the             \
-     * victim tlb. try to refill from the victim tlb before walking the       \
-     * page table. */                                                         \
-    int vidx;                                                                 \
-    CPUIOTLBEntry tmpiotlb;                                                   \
-    CPUTLBEntry tmptlb;                                                       \
-    for (vidx = CPU_VTLB_SIZE-1; vidx >= 0; --vidx) {                         \
-        if (env->tlb_v_table[mmu_idx][vidx].ty == (addr & TARGET_PAGE_MASK)) {\
-            /* found entry in victim tlb, swap tlb and iotlb */               \
-            tmptlb = env->tlb_table[mmu_idx][index];                          \
-            env->tlb_table[mmu_idx][index] = env->tlb_v_table[mmu_idx][vidx]; \
-            env->tlb_v_table[mmu_idx][vidx] = tmptlb;                         \
-            tmpiotlb = env->iotlb[mmu_idx][index];                            \
-            env->iotlb[mmu_idx][index] = env->iotlb_v[mmu_idx][vidx];         \
-            env->iotlb_v[mmu_idx][vidx] = tmpiotlb;                           \
-            break;                                                            \
-        }                                                                     \
-    }                                                                         \
-    /* return true when there is a vtlb hit, i.e. vidx >=0 */                 \
-    vidx >= 0;                                                                \
-})
-
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               CPUIOTLBEntry *iotlbentry,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PULL 2/4] cputlb: Add address parameter to VICTIM_TLB_HIT
  2016-07-08 20:38 [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Richard Henderson
  2016-07-08 20:38 ` [Qemu-devel] [PULL 1/4] cputlb: Move VICTIM_TLB_HIT out of line Richard Henderson
@ 2016-07-08 20:38 ` Richard Henderson
  2016-07-08 20:38 ` [Qemu-devel] [PULL 3/4] cputlb: Fix for self-modifying writes across page boundaries Richard Henderson
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2016-07-08 20:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Samuel Damashek

From: Samuel Damashek <samuel.damashek@invincea.com>

[rth: Split out from the original patch.]

Signed-off-by: Samuel Damashek <samuel.damashek@invincea.com>
Message-Id: <20160706182652.16190-1-samuel.damashek@invincea.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 cputlb.c           |  4 ++--
 softmmu_template.h | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 7518544..d068ee5 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -523,9 +523,9 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
 }
 
 /* Macro to call the above, with local variables from the use context.  */
-#define VICTIM_TLB_HIT(TY) \
+#define VICTIM_TLB_HIT(TY, ADDR) \
   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
-                 addr & TARGET_PAGE_MASK)
+                 (ADDR) & TARGET_PAGE_MASK)
 
 #define MMUSUFFIX _mmu
 
diff --git a/softmmu_template.h b/softmmu_template.h
index 405ba35..aeab016 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -161,7 +161,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
     /* If the TLB entry is for a different page, reload and try again.  */
     if ((addr & TARGET_PAGE_MASK)
          != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (!VICTIM_TLB_HIT(ADDR_READ)) {
+        if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
             tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
                      mmu_idx, retaddr);
         }
@@ -235,7 +235,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
     /* If the TLB entry is for a different page, reload and try again.  */
     if ((addr & TARGET_PAGE_MASK)
          != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (!VICTIM_TLB_HIT(ADDR_READ)) {
+        if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
             tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
                      mmu_idx, retaddr);
         }
@@ -345,7 +345,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     /* If the TLB entry is for a different page, reload and try again.  */
     if ((addr & TARGET_PAGE_MASK)
         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (!VICTIM_TLB_HIT(addr_write)) {
+        if (!VICTIM_TLB_HIT(addr_write, addr)) {
             tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
         }
         tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
@@ -415,7 +415,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     /* If the TLB entry is for a different page, reload and try again.  */
     if ((addr & TARGET_PAGE_MASK)
         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (!VICTIM_TLB_HIT(addr_write)) {
+        if (!VICTIM_TLB_HIT(addr_write, addr)) {
             tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
         }
         tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
@@ -477,7 +477,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
     if ((addr & TARGET_PAGE_MASK)
         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         /* TLB entry is for a different page */
-        if (!VICTIM_TLB_HIT(addr_write)) {
+        if (!VICTIM_TLB_HIT(addr_write, addr)) {
             tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
         }
     }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PULL 3/4] cputlb: Fix for self-modifying writes across page boundaries
  2016-07-08 20:38 [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Richard Henderson
  2016-07-08 20:38 ` [Qemu-devel] [PULL 1/4] cputlb: Move VICTIM_TLB_HIT out of line Richard Henderson
  2016-07-08 20:38 ` [Qemu-devel] [PULL 2/4] cputlb: Add address parameter to VICTIM_TLB_HIT Richard Henderson
@ 2016-07-08 20:38 ` Richard Henderson
  2016-07-29  2:50   ` TeLeMan
  2016-07-08 20:38 ` [Qemu-devel] [PULL 4/4] translate-all: Fix user-mode self-modifying code in 2 page long TB Richard Henderson
  2016-07-11 17:46 ` [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Peter Maydell
  4 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2016-07-08 20:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Samuel Damashek

From: Samuel Damashek <samuel.damashek@invincea.com>

As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.

In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.

Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.

The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.

Signed-off-by: Samuel Damashek <samuel.damashek@invincea.com>
Message-Id: <20160706182652.16190-1-samuel.damashek@invincea.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 softmmu_template.h | 44 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/softmmu_template.h b/softmmu_template.h
index aeab016..284ab2c 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -370,12 +370,25 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     if (DATA_SIZE > 1
         && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
                      >= TARGET_PAGE_SIZE)) {
-        int i;
+        int i, index2;
+        target_ulong page2, tlb_addr2;
     do_unaligned_access:
-        /* XXX: not efficient, but simple */
-        /* Note: relies on the fact that tlb_fill() does not remove the
-         * previous page from the TLB cache.  */
-        for (i = DATA_SIZE - 1; i >= 0; i--) {
+        /* Ensure the second page is in the TLB.  Note that the first page
+           is already guaranteed to be filled, and that the second page
+           cannot evict the first.  */
+        page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
+        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
+        if (page2 != (tlb_addr2 & (TARGET_PAGE_MASK | TLB_INVALID_MASK))
+            && !VICTIM_TLB_HIT(addr_write, page2)) {
+            tlb_fill(ENV_GET_CPU(env), page2, MMU_DATA_STORE,
+                     mmu_idx, retaddr);
+        }
+
+        /* XXX: not efficient, but simple.  */
+        /* This loop must go in the forward direction to avoid issues
+           with self-modifying code in Windows 64-bit.  */
+        for (i = 0; i < DATA_SIZE; ++i) {
             /* Little-endian extract.  */
             uint8_t val8 = val >> (i * 8);
             /* Note the adjustment at the beginning of the function.
@@ -440,12 +453,25 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     if (DATA_SIZE > 1
         && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
                      >= TARGET_PAGE_SIZE)) {
-        int i;
+        int i, index2;
+        target_ulong page2, tlb_addr2;
     do_unaligned_access:
+        /* Ensure the second page is in the TLB.  Note that the first page
+           is already guaranteed to be filled, and that the second page
+           cannot evict the first.  */
+        page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
+        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
+        if (page2 != (tlb_addr2 & (TARGET_PAGE_MASK | TLB_INVALID_MASK))
+            && !VICTIM_TLB_HIT(addr_write, page2)) {
+            tlb_fill(ENV_GET_CPU(env), page2, MMU_DATA_STORE,
+                     mmu_idx, retaddr);
+        }
+
         /* XXX: not efficient, but simple */
-        /* Note: relies on the fact that tlb_fill() does not remove the
-         * previous page from the TLB cache.  */
-        for (i = DATA_SIZE - 1; i >= 0; i--) {
+        /* This loop must go in the forward direction to avoid issues
+           with self-modifying code.  */
+        for (i = 0; i < DATA_SIZE; ++i) {
             /* Big-endian extract.  */
             uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
             /* Note the adjustment at the beginning of the function.
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PULL 4/4] translate-all: Fix user-mode self-modifying code in 2 page long TB
  2016-07-08 20:38 [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Richard Henderson
                   ` (2 preceding siblings ...)
  2016-07-08 20:38 ` [Qemu-devel] [PULL 3/4] cputlb: Fix for self-modifying writes across page boundaries Richard Henderson
@ 2016-07-08 20:38 ` Richard Henderson
  2016-07-11 17:46 ` [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Peter Maydell
  4 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2016-07-08 20:38 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Stanislav Shmarov

From: Stanislav Shmarov <snarpix@gmail.com>

In user-mode emulation Translation Block can consist of 2 guest pages.
In that case QEMU also mprotects 2 host pages that are dedicated for
guest memory, containing instructions. QEMU detects self-modifying code
with SEGFAULT signal processing.

In case if instruction in 1st page is modifying memory of 2nd
page (or vice versa) QEMU will mark 2nd page with PAGE_WRITE,
invalidate TB, generate new TB contatining 1 guest instruction and
exit to CPU loop. QEMU won't call mprotect, and new TB will cause
same SEGFAULT. Page will have both PAGE_WRITE_ORG and PAGE_WRITE
flags, so QEMU will handle the signal as guest binary problem,
and exit with guest SEGFAULT.

Solution is to do following: In case if current TB was invalidated
continue to invalidate TBs from remaining guest pages and mark pages
as PAGE_WRITE. After that disable host page protection with mprotect.
If current tb was invalidated longjmp to main loop. That is more
efficient, since we won't get SEGFAULT when executing new TB.

Reviewed-by: Sergey Fedorov <sergey.fedorov@linaro.org>
Signed-off-by: Stanislav Shmarov <snarpix@gmail.com>
Message-Id: <1467880392-1043630-1-git-send-email-snarpix@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 translate-all.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/translate-all.c b/translate-all.c
index eaa95e4..0d47c1c 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -2000,6 +2000,7 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
 int page_unprotect(target_ulong address, uintptr_t pc)
 {
     unsigned int prot;
+    bool current_tb_invalidated;
     PageDesc *p;
     target_ulong host_start, host_end, addr;
 
@@ -2021,6 +2022,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
         host_end = host_start + qemu_host_page_size;
 
         prot = 0;
+        current_tb_invalidated = false;
         for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
             p = page_find(addr >> TARGET_PAGE_BITS);
             p->flags |= PAGE_WRITE;
@@ -2028,10 +2030,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
 
             /* and since the content will be modified, we must invalidate
                the corresponding translated code. */
-            if (tb_invalidate_phys_page(addr, pc)) {
-                mmap_unlock();
-                return 2;
-            }
+            current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
 #ifdef DEBUG_TB_CHECK
             tb_invalidate_check(addr);
 #endif
@@ -2040,7 +2039,8 @@ int page_unprotect(target_ulong address, uintptr_t pc)
                  prot & PAGE_BITS);
 
         mmap_unlock();
-        return 1;
+        /* If current TB was invalidated return to main loop */
+        return current_tb_invalidated ? 2 : 1;
     }
     mmap_unlock();
     return 0;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code
  2016-07-08 20:38 [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Richard Henderson
                   ` (3 preceding siblings ...)
  2016-07-08 20:38 ` [Qemu-devel] [PULL 4/4] translate-all: Fix user-mode self-modifying code in 2 page long TB Richard Henderson
@ 2016-07-11 17:46 ` Peter Maydell
  4 siblings, 0 replies; 7+ messages in thread
From: Peter Maydell @ 2016-07-11 17:46 UTC (permalink / raw)
  To: Richard Henderson; +Cc: QEMU Developers

On 8 July 2016 at 21:38, Richard Henderson <rth@twiddle.net> wrote:
> Synchronicity?  Both changes look good to me.
>
> I took the opportunity to tidy up softmmu_template.h a tiny bit at
> the same time, to avoid too much increase in code size in cold code
> paths.  The final is 4k smaller than the original.
>
>
> r~
>
>
> The following changes since commit 4f4a9ca4a4386c137301b3662faba076455ff15a:
>
>   Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20160707' into staging (2016-07-07 14:49:38 +0100)
>
> are available in the git repository at:
>
>   git://github.com/rth7680/qemu.git tags/pull-tcg-20160708
>
> for you to fetch changes up to 7399a337e4126f7c8c8af3336726f001378c4798:
>
>   translate-all: Fix user-mode self-modifying code in 2 page long TB (2016-07-08 13:17:38 -0700)
>
> ----------------------------------------------------------------
> two self-modifying code fixes
>

Applied, thanks.

-- PMM

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PULL 3/4] cputlb: Fix for self-modifying writes across page boundaries
  2016-07-08 20:38 ` [Qemu-devel] [PULL 3/4] cputlb: Fix for self-modifying writes across page boundaries Richard Henderson
@ 2016-07-29  2:50   ` TeLeMan
  0 siblings, 0 replies; 7+ messages in thread
From: TeLeMan @ 2016-07-29  2:50 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Peter Maydell, Samuel Damashek

On Sat, Jul 9, 2016 at 4:38 AM, Richard Henderson <rth@twiddle.net> wrote:
> From: Samuel Damashek <samuel.damashek@invincea.com>
>
> As it currently stands, QEMU does not properly handle self-modifying code
> when the write is unaligned and crosses a page boundary. The procedure
> for handling a write to the current translation block is to write-protect
> the current translation block, catch the write, split up the translation
> block into the current instruction (which remains write-protected so that
> the current instruction is not modified) and the remaining instructions
> in the translation block, and then restore the CPU state to before the
> write occurred so the write will be retried and successfully executed.
> However, since unaligned writes across pages are split into one-byte
> writes for simplicity, writes to the second page (which is not the
> current TB) may succeed before a write to the current TB is attempted,
> and since these writes are not invalidated before resuming state after
> splitting the TB, these writes will be performed a second time, thus
> corrupting the second page. Credit goes to Patrick Hulin for
> discovering this.
>
> In recent 64-bit versions of Windows running in emulated mode, this
> results in either being very unstable (a BSOD after a couple minutes of
> uptime), or being entirely unable to boot. Windows performs one or more
> 8-byte unaligned self-modifying writes (xors) which intersect the end
> of the current TB and the beginning of the next TB, which runs into the
> aforementioned issue. This commit fixes that issue by making the
> unaligned write loop perform the writes in forwards order, instead of
> reverse order. This way, QEMU immediately tries to write to the current
> TB, and splits the TB before any write to the second page is executed.
> The write then proceeds as intended. With this patch applied, I am able
> to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
> KVM.
>
> Per Richard Henderson's input, this patch also ensures the second page
> is in the TLB before executing the write loop, to ensure the second
> page is mapped.
>
> The original discussion of the issue is located at
> http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
>
> Signed-off-by: Samuel Damashek <samuel.damashek@invincea.com>
> Message-Id: <20160706182652.16190-1-samuel.damashek@invincea.com>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  softmmu_template.h | 44 +++++++++++++++++++++++++++++++++++---------
>  1 file changed, 35 insertions(+), 9 deletions(-)
>
> diff --git a/softmmu_template.h b/softmmu_template.h
> index aeab016..284ab2c 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -370,12 +370,25 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>      if (DATA_SIZE > 1
>          && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
>                       >= TARGET_PAGE_SIZE)) {
> -        int i;
> +        int i, index2;
> +        target_ulong page2, tlb_addr2;
>      do_unaligned_access:
> -        /* XXX: not efficient, but simple */
> -        /* Note: relies on the fact that tlb_fill() does not remove the
> -         * previous page from the TLB cache.  */
> -        for (i = DATA_SIZE - 1; i >= 0; i--) {
> +        /* Ensure the second page is in the TLB.  Note that the first page
> +           is already guaranteed to be filled, and that the second page
> +           cannot evict the first.  */
> +        page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;

Should there be (addr + DATA_SIZE - 1)?
A minor optimization: We can compare the second page to the first page at first.

> +        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> +        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
> +        if (page2 != (tlb_addr2 & (TARGET_PAGE_MASK | TLB_INVALID_MASK))
> +            && !VICTIM_TLB_HIT(addr_write, page2)) {
> +            tlb_fill(ENV_GET_CPU(env), page2, MMU_DATA_STORE,
> +                     mmu_idx, retaddr);
> +        }
> +
> +        /* XXX: not efficient, but simple.  */
> +        /* This loop must go in the forward direction to avoid issues
> +           with self-modifying code in Windows 64-bit.  */
> +        for (i = 0; i < DATA_SIZE; ++i) {
>              /* Little-endian extract.  */
>              uint8_t val8 = val >> (i * 8);
>              /* Note the adjustment at the beginning of the function.
> @@ -440,12 +453,25 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
>      if (DATA_SIZE > 1
>          && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
>                       >= TARGET_PAGE_SIZE)) {
> -        int i;
> +        int i, index2;
> +        target_ulong page2, tlb_addr2;
>      do_unaligned_access:
> +        /* Ensure the second page is in the TLB.  Note that the first page
> +           is already guaranteed to be filled, and that the second page
> +           cannot evict the first.  */
> +        page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;

The same as above.

> +        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> +        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
> +        if (page2 != (tlb_addr2 & (TARGET_PAGE_MASK | TLB_INVALID_MASK))
> +            && !VICTIM_TLB_HIT(addr_write, page2)) {
> +            tlb_fill(ENV_GET_CPU(env), page2, MMU_DATA_STORE,
> +                     mmu_idx, retaddr);
> +        }
> +
>          /* XXX: not efficient, but simple */
> -        /* Note: relies on the fact that tlb_fill() does not remove the
> -         * previous page from the TLB cache.  */
> -        for (i = DATA_SIZE - 1; i >= 0; i--) {
> +        /* This loop must go in the forward direction to avoid issues
> +           with self-modifying code.  */
> +        for (i = 0; i < DATA_SIZE; ++i) {
>              /* Big-endian extract.  */
>              uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
>              /* Note the adjustment at the beginning of the function.
> --
> 2.7.4
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-07-29  2:50 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-08 20:38 [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Richard Henderson
2016-07-08 20:38 ` [Qemu-devel] [PULL 1/4] cputlb: Move VICTIM_TLB_HIT out of line Richard Henderson
2016-07-08 20:38 ` [Qemu-devel] [PULL 2/4] cputlb: Add address parameter to VICTIM_TLB_HIT Richard Henderson
2016-07-08 20:38 ` [Qemu-devel] [PULL 3/4] cputlb: Fix for self-modifying writes across page boundaries Richard Henderson
2016-07-29  2:50   ` TeLeMan
2016-07-08 20:38 ` [Qemu-devel] [PULL 4/4] translate-all: Fix user-mode self-modifying code in 2 page long TB Richard Henderson
2016-07-11 17:46 ` [Qemu-devel] [PULL 0/4] tlb fixes for self-modifying code Peter Maydell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.