From: "Emilio G. Cota" <cota@braap.org>
To: qemu-devel@nongnu.org
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
"Richard Henderson" <rth@twiddle.net>,
"Alex Bennée" <alex.bennee@linaro.org>
Subject: [Qemu-devel] [PATCH v2 4/4] cputlb: read CPUTLBEntry.addr_write atomically
Date: Wed, 3 Oct 2018 16:04:54 -0400 [thread overview]
Message-ID: <20181003200454.18384-5-cota@braap.org> (raw)
In-Reply-To: <20181003200454.18384-1-cota@braap.org>
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This and the previous commit result in a small performance decrease,
but this is a fair price for removing UB.
Host: Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7482.981146 task-clock (msec) # 0.998 CPUs utilized ( +- 0.09% )
31,565,219,958 cycles # 4.218 GHz ( +- 0.09% )
57,102,517,194 instructions # 1.81 insns per cycle ( +- 0.07% )
10,255,768,012 branches # 1370.546 M/sec ( +- 0.07% )
172,980,542 branch-misses # 1.69% of all branches ( +- 0.11% )
7.494710830 seconds time elapsed ( +- 0.09% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7649.735155 task-clock (msec) # 0.999 CPUs utilized ( +- 0.13% )
32,262,593,483 cycles # 4.217 GHz ( +- 0.13% )
58,487,065,236 instructions # 1.81 insns per cycle ( +- 0.06% )
10,561,549,557 branches # 1380.643 M/sec ( +- 0.06% )
173,995,793 branch-misses # 1.65% of all branches ( +- 0.12% )
7.660611466 seconds time elapsed ( +- 0.13% )
That is, a ~2% slowdown for the aarch64 bootup+shutdown test.
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
accel/tcg/softmmu_template.h | 16 ++++++++++------
include/exec/cpu_ldst.h | 2 +-
include/exec/cpu_ldst_template.h | 2 +-
accel/tcg/cputlb.c | 15 +++++++++------
4 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
index f060a693d4..1e50263871 100644
--- a/accel/tcg/softmmu_template.h
+++ b/accel/tcg/softmmu_template.h
@@ -277,7 +277,8 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
{
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ target_ulong tlb_addr =
+ atomic_read(&env->tlb_table[mmu_idx][index].addr_write);
unsigned a_bits = get_alignment_bits(get_memop(oi));
uintptr_t haddr;
@@ -292,7 +293,8 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
mmu_idx, retaddr);
}
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
+ tlb_addr = atomic_read(&env->tlb_table[mmu_idx][index].addr_write) &
+ ~TLB_INVALID_MASK;
}
/* Handle an IO access. */
@@ -321,7 +323,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
cannot evict the first. */
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
+ tlb_addr2 = atomic_read(&env->tlb_table[mmu_idx][index2].addr_write);
if (!tlb_hit_page(tlb_addr2, page2)
&& !VICTIM_TLB_HIT(addr_write, page2)) {
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
@@ -354,7 +356,8 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
{
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ target_ulong tlb_addr =
+ atomic_read(&env->tlb_table[mmu_idx][index].addr_write);
unsigned a_bits = get_alignment_bits(get_memop(oi));
uintptr_t haddr;
@@ -369,7 +372,8 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
mmu_idx, retaddr);
}
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
+ tlb_addr = atomic_read(&env->tlb_table[mmu_idx][index].addr_write) &
+ ~TLB_INVALID_MASK;
}
/* Handle an IO access. */
@@ -398,7 +402,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
cannot evict the first. */
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
+ tlb_addr2 = atomic_read(&env->tlb_table[mmu_idx][index2].addr_write);
if (!tlb_hit_page(tlb_addr2, page2)
&& !VICTIM_TLB_HIT(addr_write, page2)) {
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 41ed0526e2..9581587ce1 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -426,7 +426,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
tlb_addr = tlbentry->addr_read;
break;
case 1:
- tlb_addr = tlbentry->addr_write;
+ tlb_addr = atomic_read(&tlbentry->addr_write);
break;
case 2:
tlb_addr = tlbentry->addr_code;
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
index 4db2302962..ba7a11123c 100644
--- a/include/exec/cpu_ldst_template.h
+++ b/include/exec/cpu_ldst_template.h
@@ -176,7 +176,7 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
addr = ptr;
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = CPU_MMU_INDEX;
- if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
+ if (unlikely(atomic_read(&env->tlb_table[mmu_idx][page_index].addr_write) !=
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
oi = make_memop_idx(SHIFT, mmu_idx);
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 142a9cdf9e..adbeda0d3b 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -257,7 +257,7 @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
target_ulong page)
{
return tlb_hit_page(tlb_entry->addr_read, page) ||
- tlb_hit_page(tlb_entry->addr_write, page) ||
+ tlb_hit_page(atomic_read(&tlb_entry->addr_write), page) ||
tlb_hit_page(tlb_entry->addr_code, page);
}
@@ -863,7 +863,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ tlb_addr = atomic_read(&env->tlb_table[mmu_idx][index].addr_write);
if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
/* RAM access */
uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
@@ -912,7 +912,9 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
assert_cpu_is_self(ENV_GET_CPU(env));
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
- target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
+ /* elt_ofs might correspond to .addr_write, so use atomic_read */
+ target_ulong cmp =
+ atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
@@ -984,7 +986,8 @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
uintptr_t retaddr)
{
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ target_ulong tlb_addr =
+ atomic_read(&env->tlb_table[mmu_idx][index].addr_write);
if (!tlb_hit(tlb_addr, addr)) {
/* TLB entry is for a different page */
@@ -1004,7 +1007,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
size_t mmu_idx = get_mmuidx(oi);
size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
- target_ulong tlb_addr = tlbe->addr_write;
+ target_ulong tlb_addr = atomic_read(&tlbe->addr_write);
TCGMemOp mop = get_memop(oi);
int a_bits = get_alignment_bits(mop);
int s_bits = mop & MO_SIZE;
@@ -1035,7 +1038,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
mmu_idx, retaddr);
}
- tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK;
+ tlb_addr = atomic_read(&tlbe->addr_write) & ~TLB_INVALID_MASK;
}
/* Notice an IO access or a needs-MMU-lookup access */
--
2.17.1
next prev parent reply other threads:[~2018-10-03 20:05 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-03 20:04 [Qemu-devel] [PATCH v2 0/4] per-TLB lock Emilio G. Cota
2018-10-03 20:04 ` [Qemu-devel] [PATCH v2 1/4] exec: introduce tlb_init Emilio G. Cota
2018-10-04 11:08 ` Alex Bennée
2018-10-03 20:04 ` [Qemu-devel] [PATCH v2 2/4] cputlb: fix assert_cpu_is_self macro Emilio G. Cota
2018-10-03 20:23 ` Richard Henderson
2018-10-04 10:16 ` Alex Bennée
2018-10-03 20:04 ` [Qemu-devel] [PATCH v2 3/4] cputlb: serialize tlb updates with env->tlb_lock Emilio G. Cota
2018-10-04 11:07 ` Alex Bennée
2018-10-03 20:04 ` Emilio G. Cota [this message]
2018-10-04 4:01 ` [Qemu-devel] [PATCH v2 4/4] cputlb: read CPUTLBEntry.addr_write atomically Emilio G. Cota
2018-10-04 4:03 ` Emilio G. Cota
2018-10-04 20:15 ` [Qemu-devel] [PATCH v2 0/4] per-TLB lock Alex Bennée
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181003200454.18384-5-cota@braap.org \
--to=cota@braap.org \
--cc=alex.bennee@linaro.org \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.