All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
To: "Emilio G. Cota" <cota@braap.org>,
	"Alex Bennée" <alex.bennee@linaro.org>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v3 0/3] softmmu demacro
Date: Mon, 25 Feb 2019 22:27:19 +0000	[thread overview]
Message-ID: <01acc60d-5f00-20ed-4631-6da98ae4da7e@ilande.co.uk> (raw)
In-Reply-To: <20190219182528.GA19925@flamenco>

On 19/02/2019 18:25, Emilio G. Cota wrote:

> On Tue, Feb 19, 2019 at 14:22:37 +0000, Alex Bennée wrote:
>> Emilio,
>>
>> Any chance you could run it through your benchmark suite?
> 
> Something isn't quite right. For instance, gcc in SPEC doesn't
> complete; it fails after 2s with some errors about the syntax of
> the source being compiled. Before the patches the error isn't
> there (the test completes in ~7s, as usual), so I suspect
> some guest memory accesses aren't going to the right place.
> 
> I am too busy right now to try to debug this, but if you have
> patches to test, I can run them. The patches I tested are in
> your v3 branch on github, i.e. with 430f28154b5 at the head.

I spent a few hours yesterday and today testing this patchset against my OpenBIOS
test images and found that all of them were able to boot, except for one MacOS image
which started exhibiting flashing blocks on a progress bar during boot. Tracing this
back, I found the problem was still present when just the first patch "accel/tcg:
demacro cputlb" was applied.

First of all there were a couple of typos I found in load_helper() and store_helper()
which can be fixed up with the following diff:


diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 351f579fed..f3cc2dd0d9 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1251,7 +1251,7 @@ static tcg_target_ulong load_helper(CPUArchState *env,
target_ulong addr,
         }

         tmp = io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
-                       addr & tlb_addr & TLB_RECHECK,
+                       tlb_addr & TLB_RECHECK,
                        code_read ? MMU_INST_FETCH : MMU_DATA_LOAD, size);
         return handle_bswap(tmp, size, big_endian);
     }
@@ -1405,14 +1405,14 @@ tcg_target_ulong
 helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
                    uintptr_t retaddr)
 {
-    return (int32_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
+    return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
 }

 tcg_target_ulong
 helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
                    uintptr_t retaddr)
 {
-    return (int32_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
+    return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
 }
+    return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
 }

 tcg_target_ulong
 helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
                    uintptr_t retaddr)
 {
-    return (int32_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
+    return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
 }

 /*
@@ -1427,6 +1427,7 @@ static void store_helper(CPUArchState *env, target_ulong addr,
uint64_t val,
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
     target_ulong tlb_addr = tlb_addr_write(entry);
+    const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
     unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;

@@ -1438,7 +1439,8 @@ static void store_helper(CPUArchState *env, target_ulong addr,
uint64_t val,

     /* If the TLB entry is for a different page, reload and try again.  */
     if (!tlb_hit(tlb_addr, addr)) {
-        if (!VICTIM_TLB_HIT(addr_write, addr)) {
+        if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
+            addr & TARGET_PAGE_MASK)) {
             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
                      mmu_idx, retaddr);
             index = tlb_index(env, mmu_idx, addr);
@@ -1466,7 +1468,6 @@ static void store_helper(CPUArchState *env, target_ulong addr,
uint64_t val,
         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
                      >= TARGET_PAGE_SIZE)) {
         int i;
-        uintptr_t index2;
         CPUTLBEntry *entry2;
         target_ulong page2, tlb_addr2;
     do_unaligned_access:
@@ -1476,15 +1477,13 @@ static void store_helper(CPUArchState *env, target_ulong
addr, uint64_t val,
          * cannot evict the first.
          */
         page2 = (addr + size) & TARGET_PAGE_MASK;
-        index2 = tlb_index(env, mmu_idx, page2);
-        entry2 = tlb_entry(env, mmu_idx, index2);
+        entry2 = tlb_entry(env, mmu_idx, page2);
         tlb_addr2 = tlb_addr_write(entry2);
         if (!tlb_hit_page(tlb_addr2, page2)
-            && !VICTIM_TLB_HIT(addr_write, page2)) {
+            && !victim_tlb_hit(env, mmu_idx, index, tlb_off,
+                               page2 & TARGET_PAGE_MASK)) {
             tlb_fill(ENV_GET_CPU(env), page2, size, MMU_DATA_STORE,
                      mmu_idx, retaddr);
-            index2 = tlb_index(env, mmu_idx, page2);
-            entry2 = tlb_entry(env, mmu_idx, index2);
         }

         /*


Even with this patch applied I was still seeing issues with the progress bar, so I
ended up manually unrolling softmmu_template.h myself until I could see at what point
things were breaking.

The culprit turned out to be the change in type of res in load_helper() from the size
-related type of uint8_t/uint16_t/uint32_t/uint64_t to tcg_target_ulong in the slow
path as seen from my locally unrolled version:


WORKING:

tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
{
    ....

    /* Handle slow unaligned access (it spans two pages or IO).  */
    if (4 > 1
        && unlikely((addr & ~TARGET_PAGE_MASK) + 4 - 1
                    >= TARGET_PAGE_SIZE)) {
        target_ulong addr1, addr2;
        uint32_t res1, res2;
        ^^^^^^^^
        unsigned shift;
    do_unaligned_access:
        addr1 = addr & ~(4 - 1);
        addr2 = addr1 + 4;
        res1 = helper_le_ldul_mmu(env, addr1, oi, retaddr);
        res2 = helper_le_ldul_mmu(env, addr2, oi, retaddr);
        shift = (addr & (4 - 1)) * 8;

        /* Little-endian combine.  */
        res = (res1 >> shift) | (res2 << ((4 * 8) - shift));
        return res;
    }

}

tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
{
    ....

    /* Handle slow unaligned access (it spans two pages or IO).  */
    if (4 > 1
        && unlikely((addr & ~TARGET_PAGE_MASK) + 4 - 1
                    >= TARGET_PAGE_SIZE)) {
        target_ulong addr1, addr2;
        uint32_t res1, res2;
        ^^^^^^^^
        unsigned shift;
    do_unaligned_access:
        addr1 = addr & ~(4 - 1);
        addr2 = addr1 + 4;
        res1 = helper_be_ldul_mmu(env, addr1, oi, retaddr);
        res2 = helper_be_ldul_mmu(env, addr2, oi, retaddr);
        shift = (addr & (4 - 1)) * 8;

        /* Big-endian combine.  */
        res = (res1 << shift) | (res2 >> ((4 * 8) - shift));
        return res;
    }
}


CORRUPTED:

tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
{
    ....

    /* Handle slow unaligned access (it spans two pages or IO).  */
    if (4 > 1
        && unlikely((addr & ~TARGET_PAGE_MASK) + 4 - 1
                    >= TARGET_PAGE_SIZE)) {
        target_ulong addr1, addr2;
        tcg_target_ulong res1, res2;
        ^^^^^^^^^^^^^^^^
        unsigned shift;
    do_unaligned_access:
        addr1 = addr & ~(4 - 1);
        addr2 = addr1 + 4;
        res1 = helper_le_ldul_mmu(env, addr1, oi, retaddr);
        res2 = helper_le_ldul_mmu(env, addr2, oi, retaddr);
        shift = (addr & (4 - 1)) * 8;

        /* Little-endian combine.  */
        res = (res1 >> shift) | (res2 << ((4 * 8) - shift));
        return res;
    }

}

tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
{
    ....

    /* Handle slow unaligned access (it spans two pages or IO).  */
    if (4 > 1
        && unlikely((addr & ~TARGET_PAGE_MASK) + 4 - 1
                    >= TARGET_PAGE_SIZE)) {
        target_ulong addr1, addr2;
        tcg_target_ulong res1, res2;
        ^^^^^^^^^^^^^^^^
        unsigned shift;
    do_unaligned_access:
        addr1 = addr & ~(4 - 1);
        addr2 = addr1 + 4;
        res1 = helper_be_ldul_mmu(env, addr1, oi, retaddr);
        res2 = helper_be_ldul_mmu(env, addr2, oi, retaddr);
        shift = (addr & (4 - 1)) * 8;

        /* Big-endian combine.  */
        res = (res1 << shift) | (res2 >> ((4 * 8) - shift));
        return res;
    }
}


Presumably the issue here is somehow related to the compiler incorrectly
extending/reducing the shift when the larger type is involved? Also during my tests
the visual corruption was only present for 32-bit accesses, but presumably all the
access sizes will need a similar fix.


ATB,

Mark.

  reply	other threads:[~2019-02-25 22:27 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-15 14:31 [Qemu-devel] [PATCH v3 0/3] softmmu demacro Alex Bennée
2019-02-15 14:31 ` [Qemu-devel] [PATCH v3 1/3] accel/tcg: demacro cputlb Alex Bennée
2019-02-15 14:31 ` [Qemu-devel] [PATCH v3 2/3] accel/tcg: remove softmmu_template.h Alex Bennée
2019-02-15 14:31 ` [Qemu-devel] [PATCH v3 3/3] accel/tcg: move unaligned helpers out of core helpers Alex Bennée
2019-02-15 15:03 ` [Qemu-devel] [PATCH v3 0/3] softmmu demacro no-reply
2019-02-19 14:22 ` Alex Bennée
2019-02-19 18:25   ` Emilio G. Cota
2019-02-25 22:27     ` Mark Cave-Ayland [this message]
2019-02-26  9:24       ` Alex Bennée
2019-02-26 19:03         ` Mark Cave-Ayland
2019-02-26 19:57           ` Mark Cave-Ayland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=01acc60d-5f00-20ed-4631-6da98ae4da7e@ilande.co.uk \
    --to=mark.cave-ayland@ilande.co.uk \
    --cc=alex.bennee@linaro.org \
    --cc=cota@braap.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.