* [Qemu-devel] [PATCH 1/6] exec: Move user-only watchpoint stubs inline
2019-08-24 21:34 [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
@ 2019-08-24 21:34 ` Richard Henderson
2019-08-26 7:45 ` David Hildenbrand
2019-08-24 21:34 ` [Qemu-devel] [PATCH 2/6] exec: Factor out core logic of check_watchpoint() Richard Henderson
` (5 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2019-08-24 21:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, david
Let the user-only watchpoint stubs resolve to empty inline functions.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/hw/core/cpu.h | 23 +++++++++++++++++++++++
exec.c | 26 ++------------------------
2 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 77fca95a40..6de688059d 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -1070,12 +1070,35 @@ static inline bool cpu_breakpoint_test(CPUState *cpu, vaddr pc, int mask)
return false;
}
+#ifdef CONFIG_USER_ONLY
+static inline int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
+ int flags, CPUWatchpoint **watchpoint)
+{
+ return -ENOSYS;
+}
+
+static inline int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
+ vaddr len, int flags)
+{
+ return -ENOSYS;
+}
+
+static inline void cpu_watchpoint_remove_by_ref(CPUState *cpu,
+ CPUWatchpoint *wp)
+{
+}
+
+static inline void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
+{
+}
+#else
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
int flags, CPUWatchpoint **watchpoint);
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
vaddr len, int flags);
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint);
void cpu_watchpoint_remove_all(CPUState *cpu, int mask);
+#endif
/**
* cpu_get_address_space:
diff --git a/exec.c b/exec.c
index 53a15b7ad7..31fb75901f 100644
--- a/exec.c
+++ b/exec.c
@@ -1062,28 +1062,7 @@ static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
}
#endif
-#if defined(CONFIG_USER_ONLY)
-void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
-
-{
-}
-
-int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
- int flags)
-{
- return -ENOSYS;
-}
-
-void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
-{
-}
-
-int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
- int flags, CPUWatchpoint **watchpoint)
-{
- return -ENOSYS;
-}
-#else
+#ifndef CONFIG_USER_ONLY
/* Add a watchpoint. */
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
int flags, CPUWatchpoint **watchpoint)
@@ -1173,8 +1152,7 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
return !(addr > wpend || wp->vaddr > addrend);
}
-
-#endif
+#endif /* !CONFIG_USER_ONLY */
/* Add a breakpoint. */
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
--
2.17.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] exec: Move user-only watchpoint stubs inline
2019-08-24 21:34 ` [Qemu-devel] [PATCH 1/6] exec: Move user-only watchpoint stubs inline Richard Henderson
@ 2019-08-26 7:45 ` David Hildenbrand
0 siblings, 0 replies; 14+ messages in thread
From: David Hildenbrand @ 2019-08-26 7:45 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 24.08.19 23:34, Richard Henderson wrote:
> Let the user-only watchpoint stubs resolve to empty inline functions.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/hw/core/cpu.h | 23 +++++++++++++++++++++++
> exec.c | 26 ++------------------------
> 2 files changed, 25 insertions(+), 24 deletions(-)
>
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 77fca95a40..6de688059d 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -1070,12 +1070,35 @@ static inline bool cpu_breakpoint_test(CPUState *cpu, vaddr pc, int mask)
> return false;
> }
>
> +#ifdef CONFIG_USER_ONLY
> +static inline int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
> + int flags, CPUWatchpoint **watchpoint)
> +{
> + return -ENOSYS;
> +}
> +
> +static inline int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
> + vaddr len, int flags)
> +{
> + return -ENOSYS;
> +}
> +
> +static inline void cpu_watchpoint_remove_by_ref(CPUState *cpu,
> + CPUWatchpoint *wp)
> +{
> +}
> +
> +static inline void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
> +{
> +}
> +#else
> int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
> int flags, CPUWatchpoint **watchpoint);
> int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
> vaddr len, int flags);
> void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint);
> void cpu_watchpoint_remove_all(CPUState *cpu, int mask);
> +#endif
>
> /**
> * cpu_get_address_space:
> diff --git a/exec.c b/exec.c
> index 53a15b7ad7..31fb75901f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1062,28 +1062,7 @@ static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
> }
> #endif
>
> -#if defined(CONFIG_USER_ONLY)
> -void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
> -
> -{
> -}
> -
> -int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
> - int flags)
> -{
> - return -ENOSYS;
> -}
> -
> -void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
> -{
> -}
> -
> -int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
> - int flags, CPUWatchpoint **watchpoint)
> -{
> - return -ENOSYS;
> -}
> -#else
> +#ifndef CONFIG_USER_ONLY
> /* Add a watchpoint. */
> int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
> int flags, CPUWatchpoint **watchpoint)
> @@ -1173,8 +1152,7 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
>
> return !(addr > wpend || wp->vaddr > addrend);
> }
> -
> -#endif
> +#endif /* !CONFIG_USER_ONLY */
>
> /* Add a breakpoint. */
> int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
>
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 2/6] exec: Factor out core logic of check_watchpoint()
2019-08-24 21:34 [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
2019-08-24 21:34 ` [Qemu-devel] [PATCH 1/6] exec: Move user-only watchpoint stubs inline Richard Henderson
@ 2019-08-24 21:34 ` Richard Henderson
2019-08-24 21:34 ` [Qemu-devel] [PATCH 3/6] cputlb: Fold TLB_RECHECK into TLB_INVALID_MASK Richard Henderson
` (4 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2019-08-24 21:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, david
From: David Hildenbrand <david@redhat.com>
We want to perform the same checks in probe_write() to trigger a cpu
exit before doing any modifications. We'll have to pass a PC.
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20190823100741.9621-9-david@redhat.com>
[rth: Use vaddr for len, like other watchpoint functions;
Move user-only stub to static inline.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/hw/core/cpu.h | 7 +++++++
exec.c | 26 ++++++++++++++++++--------
2 files changed, 25 insertions(+), 8 deletions(-)
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 6de688059d..7bd8bed5b2 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -1091,6 +1091,11 @@ static inline void cpu_watchpoint_remove_by_ref(CPUState *cpu,
static inline void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
{
}
+
+static inline void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
+ MemTxAttrs atr, int fl, uintptr_t ra)
+{
+}
#else
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
int flags, CPUWatchpoint **watchpoint);
@@ -1098,6 +1103,8 @@ int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
vaddr len, int flags);
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint);
void cpu_watchpoint_remove_all(CPUState *cpu, int mask);
+void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
+ MemTxAttrs attrs, int flags, uintptr_t ra);
#endif
/**
diff --git a/exec.c b/exec.c
index 31fb75901f..cb6f5763dc 100644
--- a/exec.c
+++ b/exec.c
@@ -2789,11 +2789,10 @@ static const MemoryRegionOps notdirty_mem_ops = {
};
/* Generate a debug exception if a watchpoint has been hit. */
-static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
+void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
+ MemTxAttrs attrs, int flags, uintptr_t ra)
{
- CPUState *cpu = current_cpu;
CPUClass *cc = CPU_GET_CLASS(cpu);
- target_ulong vaddr;
CPUWatchpoint *wp;
assert(tcg_enabled());
@@ -2804,17 +2803,17 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
return;
}
- vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
- vaddr = cc->adjust_watchpoint_address(cpu, vaddr, len);
+
+ addr = cc->adjust_watchpoint_address(cpu, addr, len);
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- if (cpu_watchpoint_address_matches(wp, vaddr, len)
+ if (cpu_watchpoint_address_matches(wp, addr, len)
&& (wp->flags & flags)) {
if (flags == BP_MEM_READ) {
wp->flags |= BP_WATCHPOINT_HIT_READ;
} else {
wp->flags |= BP_WATCHPOINT_HIT_WRITE;
}
- wp->hitaddr = vaddr;
+ wp->hitaddr = MAX(addr, wp->vaddr);
wp->hitattrs = attrs;
if (!cpu->watchpoint_hit) {
if (wp->flags & BP_CPU &&
@@ -2829,11 +2828,14 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
mmap_unlock();
- cpu_loop_exit(cpu);
+ cpu_loop_exit_restore(cpu, ra);
} else {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
mmap_unlock();
+ if (ra) {
+ cpu_restore_state(cpu, ra, true);
+ }
cpu_loop_exit_noexc(cpu);
}
}
@@ -2843,6 +2845,14 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
}
}
+static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
+{
+ CPUState *cpu = current_cpu;
+ vaddr addr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
+
+ cpu_check_watchpoint(cpu, addr, len, attrs, flags, 0);
+}
+
/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
so these check for a hit then pass through to the normal out-of-line
phys routines. */
--
2.17.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 3/6] cputlb: Fold TLB_RECHECK into TLB_INVALID_MASK
2019-08-24 21:34 [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
2019-08-24 21:34 ` [Qemu-devel] [PATCH 1/6] exec: Move user-only watchpoint stubs inline Richard Henderson
2019-08-24 21:34 ` [Qemu-devel] [PATCH 2/6] exec: Factor out core logic of check_watchpoint() Richard Henderson
@ 2019-08-24 21:34 ` Richard Henderson
2019-08-26 8:36 ` David Hildenbrand
2019-08-24 21:34 ` [Qemu-devel] [PATCH 4/6] exec: Factor out cpu_watchpoint_address_matches Richard Henderson
` (3 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2019-08-24 21:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, david
We had two different mechanisms to force a recheck of the tlb.
Before TLB_RECHECK was introduced, we had a PAGE_WRITE_INV bit
that would immediate set TLB_INVALID_MASK, which automatically
means that a second check of the tlb entry fails.
We can use the same mechanism to handle small pages.
Conserve TLB_* bits by removing TLB_RECHECK.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu-all.h | 5 +--
accel/tcg/cputlb.c | 86 +++++++++++-------------------------------
2 files changed, 24 insertions(+), 67 deletions(-)
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 8323094648..8d07ae23a5 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -329,14 +329,11 @@ CPUArchState *cpu_copy(CPUArchState *env);
#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS - 2))
/* Set if TLB entry is an IO callback. */
#define TLB_MMIO (1 << (TARGET_PAGE_BITS - 3))
-/* Set if TLB entry must have MMU lookup repeated for every access */
-#define TLB_RECHECK (1 << (TARGET_PAGE_BITS - 4))
/* Use this mask to check interception with an alignment mask
* in a TCG backend.
*/
-#define TLB_FLAGS_MASK (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
- | TLB_RECHECK)
+#define TLB_FLAGS_MASK (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO)
/**
* tlb_hit_page: return true if page aligned @addr is a hit against the
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index d9787cc893..c9576bebcf 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -732,11 +732,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
address = vaddr_page;
if (size < TARGET_PAGE_SIZE) {
- /*
- * Slow-path the TLB entries; we will repeat the MMU check and TLB
- * fill on every access.
- */
- address |= TLB_RECHECK;
+ /* Repeat the MMU check and TLB fill on every access. */
+ address |= TLB_INVALID_MASK;
}
if (attrs.byte_swap) {
/* Force the access through the I/O slow path. */
@@ -1026,10 +1023,15 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
(ADDR) & TARGET_PAGE_MASK)
-/* NOTE: this function can trigger an exception */
-/* NOTE2: the returned address is not exactly the physical address: it
- * is actually a ram_addr_t (in system mode; the user mode emulation
- * version of this function returns a guest virtual address).
+/*
+ * Return a ram_addr_t for the virtual address for execution.
+ *
+ * Return -1 if we can't translate and execute from an entire page
+ * of RAM. This will force us to execute by loading and translating
+ * one insn at a time, without caching.
+ *
+ * NOTE: This function will trigger an exception if the page is
+ * not executable.
*/
tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
{
@@ -1043,19 +1045,20 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
index = tlb_index(env, mmu_idx, addr);
entry = tlb_entry(env, mmu_idx, addr);
+
+ if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
+ /*
+ * The MMU protection covers a smaller range than a target
+ * page, so we must redo the MMU check for every insn.
+ */
+ return -1;
+ }
}
assert(tlb_hit(entry->addr_code, addr));
}
- if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
- /*
- * Return -1 if we can't translate and execute from an entire
- * page of RAM here, which will cause us to execute by loading
- * and translating one insn at a time, without caching:
- * - TLB_RECHECK: means the MMU protection covers a smaller range
- * than a target page, so we must redo the MMU check every insn
- * - TLB_MMIO: region is not backed by RAM
- */
+ if (unlikely(entry->addr_code & TLB_MMIO)) {
+ /* The region is not backed by RAM. */
return -1;
}
@@ -1180,7 +1183,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
}
/* Notice an IO access or a needs-MMU-lookup access */
- if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
+ if (unlikely(tlb_addr & TLB_MMIO)) {
/* There's really nothing that can be done to
support this apart from stop-the-world. */
goto stop_the_world;
@@ -1258,6 +1261,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
entry = tlb_entry(env, mmu_idx, addr);
}
tlb_addr = code_read ? entry->addr_code : entry->addr_read;
+ tlb_addr &= ~TLB_INVALID_MASK;
}
/* Handle an IO access. */
@@ -1265,27 +1269,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
if ((addr & (size - 1)) != 0) {
goto do_unaligned_access;
}
-
- if (tlb_addr & TLB_RECHECK) {
- /*
- * This is a TLB_RECHECK access, where the MMU protection
- * covers a smaller range than a target page, and we must
- * repeat the MMU check here. This tlb_fill() call might
- * longjump out if this access should cause a guest exception.
- */
- tlb_fill(env_cpu(env), addr, size,
- access_type, mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
-
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
- tlb_addr &= ~TLB_RECHECK;
- if (!(tlb_addr & ~TARGET_PAGE_MASK)) {
- /* RAM access */
- goto do_aligned_access;
- }
- }
-
return io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
mmu_idx, addr, retaddr, access_type, op);
}
@@ -1314,7 +1297,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
return res & MAKE_64BIT_MASK(0, size * 8);
}
- do_aligned_access:
haddr = (void *)((uintptr_t)addr + entry->addend);
switch (op) {
case MO_UB:
@@ -1509,27 +1491,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
if ((addr & (size - 1)) != 0) {
goto do_unaligned_access;
}
-
- if (tlb_addr & TLB_RECHECK) {
- /*
- * This is a TLB_RECHECK access, where the MMU protection
- * covers a smaller range than a target page, and we must
- * repeat the MMU check here. This tlb_fill() call might
- * longjump out if this access should cause a guest exception.
- */
- tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
- mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
-
- tlb_addr = tlb_addr_write(entry);
- tlb_addr &= ~TLB_RECHECK;
- if (!(tlb_addr & ~TARGET_PAGE_MASK)) {
- /* RAM access */
- goto do_aligned_access;
- }
- }
-
io_writex(env, &env_tlb(env)->d[mmu_idx].iotlb[index], mmu_idx,
val, addr, retaddr, op);
return;
@@ -1579,7 +1540,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
return;
}
- do_aligned_access:
haddr = (void *)((uintptr_t)addr + entry->addend);
switch (op) {
case MO_UB:
--
2.17.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 3/6] cputlb: Fold TLB_RECHECK into TLB_INVALID_MASK
2019-08-24 21:34 ` [Qemu-devel] [PATCH 3/6] cputlb: Fold TLB_RECHECK into TLB_INVALID_MASK Richard Henderson
@ 2019-08-26 8:36 ` David Hildenbrand
0 siblings, 0 replies; 14+ messages in thread
From: David Hildenbrand @ 2019-08-26 8:36 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 24.08.19 23:34, Richard Henderson wrote:
> We had two different mechanisms to force a recheck of the tlb.
>
> Before TLB_RECHECK was introduced, we had a PAGE_WRITE_INV bit
> that would immediate set TLB_INVALID_MASK, which automatically
> means that a second check of the tlb entry fails.
>
> We can use the same mechanism to handle small pages.
> Conserve TLB_* bits by removing TLB_RECHECK.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/exec/cpu-all.h | 5 +--
> accel/tcg/cputlb.c | 86 +++++++++++-------------------------------
> 2 files changed, 24 insertions(+), 67 deletions(-)
>
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 8323094648..8d07ae23a5 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -329,14 +329,11 @@ CPUArchState *cpu_copy(CPUArchState *env);
> #define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS - 2))
> /* Set if TLB entry is an IO callback. */
> #define TLB_MMIO (1 << (TARGET_PAGE_BITS - 3))
> -/* Set if TLB entry must have MMU lookup repeated for every access */
> -#define TLB_RECHECK (1 << (TARGET_PAGE_BITS - 4))
>
> /* Use this mask to check interception with an alignment mask
> * in a TCG backend.
> */
> -#define TLB_FLAGS_MASK (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
> - | TLB_RECHECK)
> +#define TLB_FLAGS_MASK (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO)
>
> /**
> * tlb_hit_page: return true if page aligned @addr is a hit against the
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index d9787cc893..c9576bebcf 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -732,11 +732,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
>
> address = vaddr_page;
> if (size < TARGET_PAGE_SIZE) {
> - /*
> - * Slow-path the TLB entries; we will repeat the MMU check and TLB
> - * fill on every access.
> - */
> - address |= TLB_RECHECK;
> + /* Repeat the MMU check and TLB fill on every access. */
> + address |= TLB_INVALID_MASK;
> }
> if (attrs.byte_swap) {
> /* Force the access through the I/O slow path. */
> @@ -1026,10 +1023,15 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
> victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
> (ADDR) & TARGET_PAGE_MASK)
>
> -/* NOTE: this function can trigger an exception */
> -/* NOTE2: the returned address is not exactly the physical address: it
> - * is actually a ram_addr_t (in system mode; the user mode emulation
> - * version of this function returns a guest virtual address).
> +/*
> + * Return a ram_addr_t for the virtual address for execution.
> + *
> + * Return -1 if we can't translate and execute from an entire page
> + * of RAM. This will force us to execute by loading and translating
> + * one insn at a time, without caching.
> + *
> + * NOTE: This function will trigger an exception if the page is
> + * not executable.
> */
> tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
> {
> @@ -1043,19 +1045,20 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
> tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
> index = tlb_index(env, mmu_idx, addr);
> entry = tlb_entry(env, mmu_idx, addr);
> +
> + if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
> + /*
> + * The MMU protection covers a smaller range than a target
> + * page, so we must redo the MMU check for every insn.
> + */
> + return -1;
> + }
> }
> assert(tlb_hit(entry->addr_code, addr));
> }
>
> - if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
> - /*
> - * Return -1 if we can't translate and execute from an entire
> - * page of RAM here, which will cause us to execute by loading
> - * and translating one insn at a time, without caching:
> - * - TLB_RECHECK: means the MMU protection covers a smaller range
> - * than a target page, so we must redo the MMU check every insn
> - * - TLB_MMIO: region is not backed by RAM
> - */
> + if (unlikely(entry->addr_code & TLB_MMIO)) {
> + /* The region is not backed by RAM. */
> return -1;
> }
>
> @@ -1180,7 +1183,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
> }
>
> /* Notice an IO access or a needs-MMU-lookup access */
> - if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) {
> + if (unlikely(tlb_addr & TLB_MMIO)) {
> /* There's really nothing that can be done to
> support this apart from stop-the-world. */
> goto stop_the_world;
> @@ -1258,6 +1261,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
> entry = tlb_entry(env, mmu_idx, addr);
> }
> tlb_addr = code_read ? entry->addr_code : entry->addr_read;
> + tlb_addr &= ~TLB_INVALID_MASK;
> }
>
> /* Handle an IO access. */
> @@ -1265,27 +1269,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
> if ((addr & (size - 1)) != 0) {
> goto do_unaligned_access;
> }
> -
> - if (tlb_addr & TLB_RECHECK) {
> - /*
> - * This is a TLB_RECHECK access, where the MMU protection
> - * covers a smaller range than a target page, and we must
> - * repeat the MMU check here. This tlb_fill() call might
> - * longjump out if this access should cause a guest exception.
> - */
> - tlb_fill(env_cpu(env), addr, size,
> - access_type, mmu_idx, retaddr);
> - index = tlb_index(env, mmu_idx, addr);
> - entry = tlb_entry(env, mmu_idx, addr);
> -
> - tlb_addr = code_read ? entry->addr_code : entry->addr_read;
> - tlb_addr &= ~TLB_RECHECK;
> - if (!(tlb_addr & ~TARGET_PAGE_MASK)) {
> - /* RAM access */
> - goto do_aligned_access;
> - }
> - }
> -
> return io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
> mmu_idx, addr, retaddr, access_type, op);
> }
> @@ -1314,7 +1297,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
> return res & MAKE_64BIT_MASK(0, size * 8);
> }
>
> - do_aligned_access:
> haddr = (void *)((uintptr_t)addr + entry->addend);
> switch (op) {
> case MO_UB:
> @@ -1509,27 +1491,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
> if ((addr & (size - 1)) != 0) {
> goto do_unaligned_access;
> }
> -
> - if (tlb_addr & TLB_RECHECK) {
> - /*
> - * This is a TLB_RECHECK access, where the MMU protection
> - * covers a smaller range than a target page, and we must
> - * repeat the MMU check here. This tlb_fill() call might
> - * longjump out if this access should cause a guest exception.
> - */
> - tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
> - mmu_idx, retaddr);
> - index = tlb_index(env, mmu_idx, addr);
> - entry = tlb_entry(env, mmu_idx, addr);
> -
> - tlb_addr = tlb_addr_write(entry);
> - tlb_addr &= ~TLB_RECHECK;
> - if (!(tlb_addr & ~TARGET_PAGE_MASK)) {
> - /* RAM access */
> - goto do_aligned_access;
> - }
> - }
> -
> io_writex(env, &env_tlb(env)->d[mmu_idx].iotlb[index], mmu_idx,
> val, addr, retaddr, op);
> return;
> @@ -1579,7 +1540,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
> return;
> }
>
> - do_aligned_access:
> haddr = (void *)((uintptr_t)addr + entry->addend);
> switch (op) {
> case MO_UB:
>
Much better
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 4/6] exec: Factor out cpu_watchpoint_address_matches
2019-08-24 21:34 [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
` (2 preceding siblings ...)
2019-08-24 21:34 ` [Qemu-devel] [PATCH 3/6] cputlb: Fold TLB_RECHECK into TLB_INVALID_MASK Richard Henderson
@ 2019-08-24 21:34 ` Richard Henderson
2019-08-26 8:41 ` David Hildenbrand
2019-08-24 21:34 ` [Qemu-devel] [PATCH 5/6] cputlb: Handle watchpoints via TLB_WATCHPOINT Richard Henderson
` (2 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2019-08-24 21:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, david
We want to move the check for watchpoints from
memory_region_section_get_iotlb to tlb_set_page_with_attrs.
Isolate the loop over watchpoints to an exported function.
Rename the existing cpu_watchpoint_address_matches to
watchpoint_address_matches, since it doesn't actually
have a cpu argument.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/hw/core/cpu.h | 7 +++++++
exec.c | 45 ++++++++++++++++++++++++++++---------------
2 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 7bd8bed5b2..c7cda65c66 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -1096,6 +1096,12 @@ static inline void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
MemTxAttrs atr, int fl, uintptr_t ra)
{
}
+
+static inline int cpu_watchpoint_address_matches(CPUState *cpu,
+ vaddr addr, vaddr len)
+{
+ return 0;
+}
#else
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
int flags, CPUWatchpoint **watchpoint);
@@ -1105,6 +1111,7 @@ void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint);
void cpu_watchpoint_remove_all(CPUState *cpu, int mask);
void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
MemTxAttrs attrs, int flags, uintptr_t ra);
+int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len);
#endif
/**
diff --git a/exec.c b/exec.c
index cb6f5763dc..8575ce51ad 100644
--- a/exec.c
+++ b/exec.c
@@ -1138,9 +1138,8 @@ void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
* partially or completely with the address range covered by the
* access).
*/
-static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
- vaddr addr,
- vaddr len)
+static inline bool watchpoint_address_matches(CPUWatchpoint *wp,
+ vaddr addr, vaddr len)
{
/* We know the lengths are non-zero, but a little caution is
* required to avoid errors in the case where the range ends
@@ -1152,6 +1151,20 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
return !(addr > wpend || wp->vaddr > addrend);
}
+
+/* Return flags for watchpoints that match addr + prot. */
+int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len)
+{
+ CPUWatchpoint *wp;
+ int ret = 0;
+
+ QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+ if (watchpoint_address_matches(wp, addr, TARGET_PAGE_SIZE)) {
+ ret |= wp->flags;
+ }
+ }
+ return ret;
+}
#endif /* !CONFIG_USER_ONLY */
/* Add a breakpoint. */
@@ -1459,7 +1472,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
target_ulong *address)
{
hwaddr iotlb;
- CPUWatchpoint *wp;
+ int flags, match;
if (memory_region_is_ram(section->mr)) {
/* Normal RAM. */
@@ -1477,17 +1490,17 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
iotlb += xlat;
}
- /* Make accesses to pages with watchpoints go via the
- watchpoint trap routines. */
- QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
- /* Avoid trapping reads of pages with a write breakpoint. */
- if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
- iotlb = PHYS_SECTION_WATCH + paddr;
- *address |= TLB_MMIO;
- break;
- }
- }
+ /* Avoid trapping reads of pages with a write breakpoint. */
+ match = (prot & PAGE_READ ? BP_MEM_READ : 0)
+ | (prot & PAGE_WRITE ? BP_MEM_WRITE : 0);
+ flags = cpu_watchpoint_address_matches(cpu, vaddr, TARGET_PAGE_SIZE);
+ if (flags & match) {
+ /*
+ * Make accesses to pages with watchpoints go via the
+ * watchpoint trap routines.
+ */
+ iotlb = PHYS_SECTION_WATCH + paddr;
+ *address |= TLB_MMIO;
}
return iotlb;
@@ -2806,7 +2819,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
addr = cc->adjust_watchpoint_address(cpu, addr, len);
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- if (cpu_watchpoint_address_matches(wp, addr, len)
+ if (watchpoint_address_matches(wp, addr, len)
&& (wp->flags & flags)) {
if (flags == BP_MEM_READ) {
wp->flags |= BP_WATCHPOINT_HIT_READ;
--
2.17.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 4/6] exec: Factor out cpu_watchpoint_address_matches
2019-08-24 21:34 ` [Qemu-devel] [PATCH 4/6] exec: Factor out cpu_watchpoint_address_matches Richard Henderson
@ 2019-08-26 8:41 ` David Hildenbrand
2019-08-28 21:28 ` Richard Henderson
0 siblings, 1 reply; 14+ messages in thread
From: David Hildenbrand @ 2019-08-26 8:41 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 24.08.19 23:34, Richard Henderson wrote:
> We want to move the check for watchpoints from
> memory_region_section_get_iotlb to tlb_set_page_with_attrs.
> Isolate the loop over watchpoints to an exported function.
>
> Rename the existing cpu_watchpoint_address_matches to
> watchpoint_address_matches, since it doesn't actually
> have a cpu argument.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/hw/core/cpu.h | 7 +++++++
> exec.c | 45 ++++++++++++++++++++++++++++---------------
> 2 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 7bd8bed5b2..c7cda65c66 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -1096,6 +1096,12 @@ static inline void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
> MemTxAttrs atr, int fl, uintptr_t ra)
> {
> }
> +
> +static inline int cpu_watchpoint_address_matches(CPUState *cpu,
> + vaddr addr, vaddr len)
> +{
> + return 0;
> +}
> #else
> int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
> int flags, CPUWatchpoint **watchpoint);
> @@ -1105,6 +1111,7 @@ void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint);
> void cpu_watchpoint_remove_all(CPUState *cpu, int mask);
> void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
> MemTxAttrs attrs, int flags, uintptr_t ra);
> +int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len);
> #endif
>
> /**
> diff --git a/exec.c b/exec.c
> index cb6f5763dc..8575ce51ad 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1138,9 +1138,8 @@ void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
> * partially or completely with the address range covered by the
> * access).
> */
> -static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
> - vaddr addr,
> - vaddr len)
> +static inline bool watchpoint_address_matches(CPUWatchpoint *wp,
> + vaddr addr, vaddr len)
> {
> /* We know the lengths are non-zero, but a little caution is
> * required to avoid errors in the case where the range ends
> @@ -1152,6 +1151,20 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
>
> return !(addr > wpend || wp->vaddr > addrend);
> }
> +
> +/* Return flags for watchpoints that match addr + prot. */
> +int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len)
> +{
> + CPUWatchpoint *wp;
> + int ret = 0;
> +
> + QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
> + if (watchpoint_address_matches(wp, addr, TARGET_PAGE_SIZE)) {
> + ret |= wp->flags;
> + }
> + }
> + return ret;
> +}
> #endif /* !CONFIG_USER_ONLY */
>
> /* Add a breakpoint. */
> @@ -1459,7 +1472,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> target_ulong *address)
> {
> hwaddr iotlb;
> - CPUWatchpoint *wp;
> + int flags, match;
>
> if (memory_region_is_ram(section->mr)) {
> /* Normal RAM. */
> @@ -1477,17 +1490,17 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> iotlb += xlat;
> }
>
> - /* Make accesses to pages with watchpoints go via the
> - watchpoint trap routines. */
> - QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
> - if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
> - /* Avoid trapping reads of pages with a write breakpoint. */
> - if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
> - iotlb = PHYS_SECTION_WATCH + paddr;
> - *address |= TLB_MMIO;
> - break;
In the old code, we were able to break once we found a hit ...
> - }
> - }
> + /* Avoid trapping reads of pages with a write breakpoint. */
> + match = (prot & PAGE_READ ? BP_MEM_READ : 0)
> + | (prot & PAGE_WRITE ? BP_MEM_WRITE : 0);
> + flags = cpu_watchpoint_address_matches(cpu, vaddr, TARGET_PAGE_SIZE);
> + if (flags & match) {
... now you cannot break early anymore. Maybe pass in the match to
cpu_watchpoint_address_matches() ?
Anyhow, code seems to be correct, so
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 4/6] exec: Factor out cpu_watchpoint_address_matches
2019-08-26 8:41 ` David Hildenbrand
@ 2019-08-28 21:28 ` Richard Henderson
2019-08-28 21:54 ` David Hildenbrand
0 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2019-08-28 21:28 UTC (permalink / raw)
To: David Hildenbrand, qemu-devel; +Cc: peter.maydell
On 8/26/19 1:41 AM, David Hildenbrand wrote:
>> - /* Make accesses to pages with watchpoints go via the
>> - watchpoint trap routines. */
>> - QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
>> - if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
>> - /* Avoid trapping reads of pages with a write breakpoint. */
>> - if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
>> - iotlb = PHYS_SECTION_WATCH + paddr;
>> - *address |= TLB_MMIO;
>> - break;
> In the old code, we were able to break once we found a hit ...
>
>> - }
>> - }
>> + /* Avoid trapping reads of pages with a write breakpoint. */
>> + match = (prot & PAGE_READ ? BP_MEM_READ : 0)
>> + | (prot & PAGE_WRITE ? BP_MEM_WRITE : 0);
>> + flags = cpu_watchpoint_address_matches(cpu, vaddr, TARGET_PAGE_SIZE);
>> + if (flags & match) {
> ... now you cannot break early anymore. Maybe pass in the match to
> cpu_watchpoint_address_matches() ?
Hmm, yes, perhaps.
OTOH, summing a bitmask is a very quick operation. Depending on the total
number of watchpoints, of course...
r~
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 4/6] exec: Factor out cpu_watchpoint_address_matches
2019-08-28 21:28 ` Richard Henderson
@ 2019-08-28 21:54 ` David Hildenbrand
0 siblings, 0 replies; 14+ messages in thread
From: David Hildenbrand @ 2019-08-28 21:54 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 28.08.19 23:28, Richard Henderson wrote:
> On 8/26/19 1:41 AM, David Hildenbrand wrote:
>>> - /* Make accesses to pages with watchpoints go via the
>>> - watchpoint trap routines. */
>>> - QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
>>> - if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
>>> - /* Avoid trapping reads of pages with a write breakpoint. */
>>> - if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
>>> - iotlb = PHYS_SECTION_WATCH + paddr;
>>> - *address |= TLB_MMIO;
>>> - break;
>> In the old code, we were able to break once we found a hit ...
>>
>>> - }
>>> - }
>>> + /* Avoid trapping reads of pages with a write breakpoint. */
>>> + match = (prot & PAGE_READ ? BP_MEM_READ : 0)
>>> + | (prot & PAGE_WRITE ? BP_MEM_WRITE : 0);
>>> + flags = cpu_watchpoint_address_matches(cpu, vaddr, TARGET_PAGE_SIZE);
>>> + if (flags & match) {
>> ... now you cannot break early anymore. Maybe pass in the match to
>> cpu_watchpoint_address_matches() ?
>
> Hmm, yes, perhaps.
>
> OTOH, summing a bitmask is a very quick operation. Depending on the total
> number of watchpoints, of course...
And for anything that is not a hit, we have to walk all watchpoints
either way, so the speedup would most probably be neglectable.
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 5/6] cputlb: Handle watchpoints via TLB_WATCHPOINT
2019-08-24 21:34 [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
` (3 preceding siblings ...)
2019-08-24 21:34 ` [Qemu-devel] [PATCH 4/6] exec: Factor out cpu_watchpoint_address_matches Richard Henderson
@ 2019-08-24 21:34 ` Richard Henderson
2019-08-28 22:00 ` David Hildenbrand
2019-08-24 21:34 ` [Qemu-devel] [PATCH 6/6] tcg: Check for watchpoints in probe_write() Richard Henderson
2019-08-28 21:47 ` [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
6 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2019-08-24 21:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, david
The raising of exceptions from check_watchpoint, buried inside
of the I/O subsystem, is fundamentally broken. We do not have
the helper return address with which we can unwind guest state.
Replace PHYS_SECTION_WATCH and io_mem_watch with TLB_WATCHPOINT.
Move the call to cpu_check_watchpoint into the cputlb helpers
where we do have the helper return address.
This also allows us to handle watchpoints on RAM to bypass the
full i/o access path.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu-all.h | 5 +-
accel/tcg/cputlb.c | 83 +++++++++++++++++++++++++++---
exec.c | 114 +++--------------------------------------
3 files changed, 87 insertions(+), 115 deletions(-)
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 8d07ae23a5..d2d443c4f9 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -329,11 +329,14 @@ CPUArchState *cpu_copy(CPUArchState *env);
#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS - 2))
/* Set if TLB entry is an IO callback. */
#define TLB_MMIO (1 << (TARGET_PAGE_BITS - 3))
+/* Set if TLB entry contains a watchpoint. */
+#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS - 4))
/* Use this mask to check interception with an alignment mask
* in a TCG backend.
*/
-#define TLB_FLAGS_MASK (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO)
+#define TLB_FLAGS_MASK \
+ (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT)
/**
* tlb_hit_page: return true if page aligned @addr is a hit against the
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index c9576bebcf..f7a414a131 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -710,6 +710,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
hwaddr iotlb, xlat, sz, paddr_page;
target_ulong vaddr_page;
int asidx = cpu_asidx_from_attrs(cpu, attrs);
+ int wp_flags;
assert_cpu_is_self(cpu);
@@ -752,6 +753,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
code_address = address;
iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
paddr_page, xlat, prot, &address);
+ wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
+ TARGET_PAGE_SIZE);
index = tlb_index(env, mmu_idx, vaddr_page);
te = tlb_entry(env, mmu_idx, vaddr_page);
@@ -805,6 +808,9 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
tn.addend = addend - vaddr_page;
if (prot & PAGE_READ) {
tn.addr_read = address;
+ if (wp_flags & BP_MEM_READ) {
+ tn.addr_read |= TLB_WATCHPOINT;
+ }
} else {
tn.addr_read = -1;
}
@@ -831,6 +837,9 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
if (prot & PAGE_WRITE_INV) {
tn.addr_write |= TLB_INVALID_MASK;
}
+ if (wp_flags & BP_MEM_WRITE) {
+ tn.addr_write |= TLB_WATCHPOINT;
+ }
}
copy_tlb_helper_locked(te, &tn);
@@ -1264,13 +1273,33 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
tlb_addr &= ~TLB_INVALID_MASK;
}
- /* Handle an IO access. */
+ /* Handle anything that isn't just a straight memory access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ CPUIOTLBEntry *iotlbentry;
+
+ /* For anything that is unaligned, recurse through full_load. */
if ((addr & (size - 1)) != 0) {
goto do_unaligned_access;
}
- return io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
- mmu_idx, addr, retaddr, access_type, op);
+
+ iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+
+ /* Handle watchpoints. */
+ if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
+ /* On watchpoint hit, this will longjmp out. */
+ cpu_check_watchpoint(env_cpu(env), addr, size,
+ iotlbentry->attrs, BP_MEM_READ, retaddr);
+
+ /* The backing page may or may not require I/O. */
+ tlb_addr &= ~TLB_WATCHPOINT;
+ if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) {
+ goto do_aligned_access;
+ }
+ }
+
+ /* Handle I/O access. */
+ return io_readx(env, iotlbentry, mmu_idx, addr,
+ retaddr, access_type, op);
}
/* Handle slow unaligned access (it spans two pages or IO). */
@@ -1297,6 +1326,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
return res & MAKE_64BIT_MASK(0, size * 8);
}
+ do_aligned_access:
haddr = (void *)((uintptr_t)addr + entry->addend);
switch (op) {
case MO_UB:
@@ -1486,13 +1516,32 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
}
- /* Handle an IO access. */
+ /* Handle anything that isn't just a straight memory access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ CPUIOTLBEntry *iotlbentry;
+
+ /* For anything that is unaligned, recurse through byte stores. */
if ((addr & (size - 1)) != 0) {
goto do_unaligned_access;
}
- io_writex(env, &env_tlb(env)->d[mmu_idx].iotlb[index], mmu_idx,
- val, addr, retaddr, op);
+
+ iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+
+ /* Handle watchpoints. */
+ if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
+ /* On watchpoint hit, this will longjmp out. */
+ cpu_check_watchpoint(env_cpu(env), addr, size,
+ iotlbentry->attrs, BP_MEM_WRITE, retaddr);
+
+ /* The backing page may or may not require I/O. */
+ tlb_addr &= ~TLB_WATCHPOINT;
+ if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) {
+ goto do_aligned_access;
+ }
+ }
+
+ /* Handle I/O access. */
+ io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op);
return;
}
@@ -1504,6 +1553,8 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
uintptr_t index2;
CPUTLBEntry *entry2;
target_ulong page2, tlb_addr2;
+ size_t size2;
+
do_unaligned_access:
/*
* Ensure the second page is in the TLB. Note that the first page
@@ -1511,16 +1562,33 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
* cannot evict the first.
*/
page2 = (addr + size) & TARGET_PAGE_MASK;
+ size2 = (addr + size) & ~TARGET_PAGE_MASK;
index2 = tlb_index(env, mmu_idx, page2);
entry2 = tlb_entry(env, mmu_idx, page2);
tlb_addr2 = tlb_addr_write(entry2);
if (!tlb_hit_page(tlb_addr2, page2)
&& !victim_tlb_hit(env, mmu_idx, index2, tlb_off,
page2 & TARGET_PAGE_MASK)) {
- tlb_fill(env_cpu(env), page2, size, MMU_DATA_STORE,
+ tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
mmu_idx, retaddr);
}
+ /*
+ * Handle watchpoints. Since this may trap, all checks
+ * must happen before any store.
+ */
+ if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), addr,
+ -(addr | TARGET_PAGE_MASK),
+ env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
+ BP_MEM_WRITE, retaddr);
+ }
+ if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), page2, size2,
+ env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
+ BP_MEM_WRITE, retaddr);
+ }
+
/*
* XXX: not efficient, but simple.
* This loop must go in the forward direction to avoid issues
@@ -1540,6 +1608,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
return;
}
+ do_aligned_access:
haddr = (void *)((uintptr_t)addr + entry->addend);
switch (op) {
case MO_UB:
diff --git a/exec.c b/exec.c
index 8575ce51ad..ad0f4a598f 100644
--- a/exec.c
+++ b/exec.c
@@ -193,15 +193,12 @@ typedef struct subpage_t {
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
-#define PHYS_SECTION_WATCH 3
static void io_mem_init(void);
static void memory_map_init(void);
static void tcg_log_global_after_sync(MemoryListener *listener);
static void tcg_commit(MemoryListener *listener);
-static MemoryRegion io_mem_watch;
-
/**
* CPUAddressSpace: all the information a CPU needs about an AddressSpace
* @cpu: the CPU whose AddressSpace this is
@@ -1472,7 +1469,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
target_ulong *address)
{
hwaddr iotlb;
- int flags, match;
if (memory_region_is_ram(section->mr)) {
/* Normal RAM. */
@@ -1490,19 +1486,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
iotlb += xlat;
}
- /* Avoid trapping reads of pages with a write breakpoint. */
- match = (prot & PAGE_READ ? BP_MEM_READ : 0)
- | (prot & PAGE_WRITE ? BP_MEM_WRITE : 0);
- flags = cpu_watchpoint_address_matches(cpu, vaddr, TARGET_PAGE_SIZE);
- if (flags & match) {
- /*
- * Make accesses to pages with watchpoints go via the
- * watchpoint trap routines.
- */
- iotlb = PHYS_SECTION_WATCH + paddr;
- *address |= TLB_MMIO;
- }
-
return iotlb;
}
#endif /* defined(CONFIG_USER_ONLY) */
@@ -2810,10 +2793,14 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
assert(tcg_enabled());
if (cpu->watchpoint_hit) {
- /* We re-entered the check after replacing the TB. Now raise
- * the debug interrupt so that is will trigger after the
- * current instruction. */
+ /*
+ * We re-entered the check after replacing the TB.
+ * Now raise the debug interrupt so that it will
+ * trigger after the current instruction.
+ */
+ qemu_mutex_lock_iothread();
cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
+ qemu_mutex_unlock_iothread();
return;
}
@@ -2858,88 +2845,6 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
}
}
-static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
-{
- CPUState *cpu = current_cpu;
- vaddr addr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
-
- cpu_check_watchpoint(cpu, addr, len, attrs, flags, 0);
-}
-
-/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
- so these check for a hit then pass through to the normal out-of-line
- phys routines. */
-static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
- unsigned size, MemTxAttrs attrs)
-{
- MemTxResult res;
- uint64_t data;
- int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
- AddressSpace *as = current_cpu->cpu_ases[asidx].as;
-
- check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
- switch (size) {
- case 1:
- data = address_space_ldub(as, addr, attrs, &res);
- break;
- case 2:
- data = address_space_lduw(as, addr, attrs, &res);
- break;
- case 4:
- data = address_space_ldl(as, addr, attrs, &res);
- break;
- case 8:
- data = address_space_ldq(as, addr, attrs, &res);
- break;
- default: abort();
- }
- *pdata = data;
- return res;
-}
-
-static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
- uint64_t val, unsigned size,
- MemTxAttrs attrs)
-{
- MemTxResult res;
- int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
- AddressSpace *as = current_cpu->cpu_ases[asidx].as;
-
- check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
- switch (size) {
- case 1:
- address_space_stb(as, addr, val, attrs, &res);
- break;
- case 2:
- address_space_stw(as, addr, val, attrs, &res);
- break;
- case 4:
- address_space_stl(as, addr, val, attrs, &res);
- break;
- case 8:
- address_space_stq(as, addr, val, attrs, &res);
- break;
- default: abort();
- }
- return res;
-}
-
-static const MemoryRegionOps watch_mem_ops = {
- .read_with_attrs = watch_mem_read,
- .write_with_attrs = watch_mem_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
- .valid = {
- .min_access_size = 1,
- .max_access_size = 8,
- .unaligned = false,
- },
- .impl = {
- .min_access_size = 1,
- .max_access_size = 8,
- .unaligned = false,
- },
-};
-
static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
MemTxAttrs attrs, uint8_t *buf, hwaddr len);
static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
@@ -3115,9 +3020,6 @@ static void io_mem_init(void)
memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
NULL, UINT64_MAX);
memory_region_clear_global_locking(&io_mem_notdirty);
-
- memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
- NULL, UINT64_MAX);
}
AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
@@ -3131,8 +3033,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
assert(n == PHYS_SECTION_NOTDIRTY);
n = dummy_section(&d->map, fv, &io_mem_rom);
assert(n == PHYS_SECTION_ROM);
- n = dummy_section(&d->map, fv, &io_mem_watch);
- assert(n == PHYS_SECTION_WATCH);
d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
--
2.17.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] cputlb: Handle watchpoints via TLB_WATCHPOINT
2019-08-24 21:34 ` [Qemu-devel] [PATCH 5/6] cputlb: Handle watchpoints via TLB_WATCHPOINT Richard Henderson
@ 2019-08-28 22:00 ` David Hildenbrand
0 siblings, 0 replies; 14+ messages in thread
From: David Hildenbrand @ 2019-08-28 22:00 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 24.08.19 23:34, Richard Henderson wrote:
> The raising of exceptions from check_watchpoint, buried inside
> of the I/O subsystem, is fundamentally broken. We do not have
> the helper return address with which we can unwind guest state.
>
> Replace PHYS_SECTION_WATCH and io_mem_watch with TLB_WATCHPOINT.
> Move the call to cpu_check_watchpoint into the cputlb helpers
> where we do have the helper return address.
>
> This also allows us to handle watchpoints on RAM to bypass the
> full i/o access path.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/exec/cpu-all.h | 5 +-
> accel/tcg/cputlb.c | 83 +++++++++++++++++++++++++++---
> exec.c | 114 +++--------------------------------------
> 3 files changed, 87 insertions(+), 115 deletions(-)
>
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 8d07ae23a5..d2d443c4f9 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -329,11 +329,14 @@ CPUArchState *cpu_copy(CPUArchState *env);
> #define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS - 2))
> /* Set if TLB entry is an IO callback. */
> #define TLB_MMIO (1 << (TARGET_PAGE_BITS - 3))
> +/* Set if TLB entry contains a watchpoint. */
> +#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS - 4))
>
> /* Use this mask to check interception with an alignment mask
> * in a TCG backend.
> */
> -#define TLB_FLAGS_MASK (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO)
> +#define TLB_FLAGS_MASK \
> + (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT)
>
> /**
> * tlb_hit_page: return true if page aligned @addr is a hit against the
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index c9576bebcf..f7a414a131 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -710,6 +710,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> hwaddr iotlb, xlat, sz, paddr_page;
> target_ulong vaddr_page;
> int asidx = cpu_asidx_from_attrs(cpu, attrs);
> + int wp_flags;
>
> assert_cpu_is_self(cpu);
>
> @@ -752,6 +753,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> code_address = address;
> iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
> paddr_page, xlat, prot, &address);
> + wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
> + TARGET_PAGE_SIZE);
>
> index = tlb_index(env, mmu_idx, vaddr_page);
> te = tlb_entry(env, mmu_idx, vaddr_page);
> @@ -805,6 +808,9 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> tn.addend = addend - vaddr_page;
> if (prot & PAGE_READ) {
> tn.addr_read = address;
> + if (wp_flags & BP_MEM_READ) {
> + tn.addr_read |= TLB_WATCHPOINT;
> + }
> } else {
> tn.addr_read = -1;
> }
> @@ -831,6 +837,9 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
> if (prot & PAGE_WRITE_INV) {
> tn.addr_write |= TLB_INVALID_MASK;
> }
> + if (wp_flags & BP_MEM_WRITE) {
> + tn.addr_write |= TLB_WATCHPOINT;
> + }
> }
>
> copy_tlb_helper_locked(te, &tn);
> @@ -1264,13 +1273,33 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
> tlb_addr &= ~TLB_INVALID_MASK;
> }
>
> - /* Handle an IO access. */
> + /* Handle anything that isn't just a straight memory access. */
> if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> + CPUIOTLBEntry *iotlbentry;
> +
> + /* For anything that is unaligned, recurse through full_load. */
> if ((addr & (size - 1)) != 0) {
> goto do_unaligned_access;
> }
> - return io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
> - mmu_idx, addr, retaddr, access_type, op);
> +
> + iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> +
> + /* Handle watchpoints. */
> + if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
> + /* On watchpoint hit, this will longjmp out. */
> + cpu_check_watchpoint(env_cpu(env), addr, size,
> + iotlbentry->attrs, BP_MEM_READ, retaddr);
> +
> + /* The backing page may or may not require I/O. */
> + tlb_addr &= ~TLB_WATCHPOINT;
> + if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) {
> + goto do_aligned_access;
> + }
> + }
> +
> + /* Handle I/O access. */
> + return io_readx(env, iotlbentry, mmu_idx, addr,
> + retaddr, access_type, op);
> }
>
> /* Handle slow unaligned access (it spans two pages or IO). */
> @@ -1297,6 +1326,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
> return res & MAKE_64BIT_MASK(0, size * 8);
> }
>
> + do_aligned_access:
> haddr = (void *)((uintptr_t)addr + entry->addend);
> switch (op) {
> case MO_UB:
> @@ -1486,13 +1516,32 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
> tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
> }
>
> - /* Handle an IO access. */
> + /* Handle anything that isn't just a straight memory access. */
> if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> + CPUIOTLBEntry *iotlbentry;
> +
> + /* For anything that is unaligned, recurse through byte stores. */
> if ((addr & (size - 1)) != 0) {
> goto do_unaligned_access;
> }
> - io_writex(env, &env_tlb(env)->d[mmu_idx].iotlb[index], mmu_idx,
> - val, addr, retaddr, op);
> +
> + iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
> +
> + /* Handle watchpoints. */
> + if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
> + /* On watchpoint hit, this will longjmp out. */
> + cpu_check_watchpoint(env_cpu(env), addr, size,
> + iotlbentry->attrs, BP_MEM_WRITE, retaddr);
> +
> + /* The backing page may or may not require I/O. */
> + tlb_addr &= ~TLB_WATCHPOINT;
> + if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) {
> + goto do_aligned_access;
> + }
> + }
> +
> + /* Handle I/O access. */
> + io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op);
> return;
> }
>
> @@ -1504,6 +1553,8 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
> uintptr_t index2;
> CPUTLBEntry *entry2;
> target_ulong page2, tlb_addr2;
> + size_t size2;
> +
> do_unaligned_access:
> /*
> * Ensure the second page is in the TLB. Note that the first page
> @@ -1511,16 +1562,33 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
> * cannot evict the first.
> */
> page2 = (addr + size) & TARGET_PAGE_MASK;
> + size2 = (addr + size) & ~TARGET_PAGE_MASK;
> index2 = tlb_index(env, mmu_idx, page2);
> entry2 = tlb_entry(env, mmu_idx, page2);
> tlb_addr2 = tlb_addr_write(entry2);
> if (!tlb_hit_page(tlb_addr2, page2)
> && !victim_tlb_hit(env, mmu_idx, index2, tlb_off,
> page2 & TARGET_PAGE_MASK)) {
> - tlb_fill(env_cpu(env), page2, size, MMU_DATA_STORE,
> + tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
> mmu_idx, retaddr);
This looks like a separate fix, want to split that into a separate patch?
> }
>
> + /*
> + * Handle watchpoints. Since this may trap, all checks
> + * must happen before any store.
> + */
> + if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
> + cpu_check_watchpoint(env_cpu(env), addr,
> + -(addr | TARGET_PAGE_MASK),
or "size - size2", not sure what's better. Probably a matter of taste.
> + env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
> + BP_MEM_WRITE, retaddr);
> + }
> + if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
> + cpu_check_watchpoint(env_cpu(env), page2, size2,
> + env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
> + BP_MEM_WRITE, retaddr);
> + }
> +
> /*
> * XXX: not efficient, but simple.
> * This loop must go in the forward direction to avoid issues
> @@ -1540,6 +1608,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
> return;
> }
>
> + do_aligned_access:
> haddr = (void *)((uintptr_t)addr + entry->addend);
> switch (op) {
> case MO_UB:
> diff --git a/exec.c b/exec.c
> index 8575ce51ad..ad0f4a598f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -193,15 +193,12 @@ typedef struct subpage_t {
> #define PHYS_SECTION_UNASSIGNED 0
> #define PHYS_SECTION_NOTDIRTY 1
> #define PHYS_SECTION_ROM 2
> -#define PHYS_SECTION_WATCH 3
>
> static void io_mem_init(void);
> static void memory_map_init(void);
> static void tcg_log_global_after_sync(MemoryListener *listener);
> static void tcg_commit(MemoryListener *listener);
>
> -static MemoryRegion io_mem_watch;
> -
> /**
> * CPUAddressSpace: all the information a CPU needs about an AddressSpace
> * @cpu: the CPU whose AddressSpace this is
> @@ -1472,7 +1469,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> target_ulong *address)
> {
> hwaddr iotlb;
> - int flags, match;
>
> if (memory_region_is_ram(section->mr)) {
> /* Normal RAM. */
> @@ -1490,19 +1486,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
> iotlb += xlat;
> }
>
> - /* Avoid trapping reads of pages with a write breakpoint. */
> - match = (prot & PAGE_READ ? BP_MEM_READ : 0)
> - | (prot & PAGE_WRITE ? BP_MEM_WRITE : 0);
> - flags = cpu_watchpoint_address_matches(cpu, vaddr, TARGET_PAGE_SIZE);
> - if (flags & match) {
> - /*
> - * Make accesses to pages with watchpoints go via the
> - * watchpoint trap routines.
> - */
> - iotlb = PHYS_SECTION_WATCH + paddr;
> - *address |= TLB_MMIO;
> - }
> -
> return iotlb;
> }
> #endif /* defined(CONFIG_USER_ONLY) */
> @@ -2810,10 +2793,14 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
>
> assert(tcg_enabled());
> if (cpu->watchpoint_hit) {
> - /* We re-entered the check after replacing the TB. Now raise
> - * the debug interrupt so that is will trigger after the
> - * current instruction. */
> + /*
> + * We re-entered the check after replacing the TB.
> + * Now raise the debug interrupt so that it will
> + * trigger after the current instruction.
> + */
No real doc change, but okay.
> + qemu_mutex_lock_iothread();
> cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
> + qemu_mutex_unlock_iothread();
> return;
> }
>
> @@ -2858,88 +2845,6 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
> }
> }
>
> -static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
> -{
> - CPUState *cpu = current_cpu;
> - vaddr addr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
> -
> - cpu_check_watchpoint(cpu, addr, len, attrs, flags, 0);
> -}
> -
> -/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
> - so these check for a hit then pass through to the normal out-of-line
> - phys routines. */
> -static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
> - unsigned size, MemTxAttrs attrs)
> -{
> - MemTxResult res;
> - uint64_t data;
> - int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
> - AddressSpace *as = current_cpu->cpu_ases[asidx].as;
> -
> - check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
> - switch (size) {
> - case 1:
> - data = address_space_ldub(as, addr, attrs, &res);
> - break;
> - case 2:
> - data = address_space_lduw(as, addr, attrs, &res);
> - break;
> - case 4:
> - data = address_space_ldl(as, addr, attrs, &res);
> - break;
> - case 8:
> - data = address_space_ldq(as, addr, attrs, &res);
> - break;
> - default: abort();
> - }
> - *pdata = data;
> - return res;
> -}
> -
> -static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
> - uint64_t val, unsigned size,
> - MemTxAttrs attrs)
> -{
> - MemTxResult res;
> - int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
> - AddressSpace *as = current_cpu->cpu_ases[asidx].as;
> -
> - check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
> - switch (size) {
> - case 1:
> - address_space_stb(as, addr, val, attrs, &res);
> - break;
> - case 2:
> - address_space_stw(as, addr, val, attrs, &res);
> - break;
> - case 4:
> - address_space_stl(as, addr, val, attrs, &res);
> - break;
> - case 8:
> - address_space_stq(as, addr, val, attrs, &res);
> - break;
> - default: abort();
> - }
> - return res;
> -}
> -
> -static const MemoryRegionOps watch_mem_ops = {
> - .read_with_attrs = watch_mem_read,
> - .write_with_attrs = watch_mem_write,
> - .endianness = DEVICE_NATIVE_ENDIAN,
> - .valid = {
> - .min_access_size = 1,
> - .max_access_size = 8,
> - .unaligned = false,
> - },
> - .impl = {
> - .min_access_size = 1,
> - .max_access_size = 8,
> - .unaligned = false,
> - },
> -};
> -
> static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
> MemTxAttrs attrs, uint8_t *buf, hwaddr len);
> static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
> @@ -3115,9 +3020,6 @@ static void io_mem_init(void)
> memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
> NULL, UINT64_MAX);
> memory_region_clear_global_locking(&io_mem_notdirty);
> -
> - memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
> - NULL, UINT64_MAX);
> }
>
> AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
> @@ -3131,8 +3033,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
> assert(n == PHYS_SECTION_NOTDIRTY);
> n = dummy_section(&d->map, fv, &io_mem_rom);
> assert(n == PHYS_SECTION_ROM);
> - n = dummy_section(&d->map, fv, &io_mem_watch);
> - assert(n == PHYS_SECTION_WATCH);
>
> d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
>
>
Looks sane to me (and like a nice fix/cleanup)
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 6/6] tcg: Check for watchpoints in probe_write()
2019-08-24 21:34 [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
` (4 preceding siblings ...)
2019-08-24 21:34 ` [Qemu-devel] [PATCH 5/6] cputlb: Handle watchpoints via TLB_WATCHPOINT Richard Henderson
@ 2019-08-24 21:34 ` Richard Henderson
2019-08-28 21:47 ` [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
6 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2019-08-24 21:34 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, david
From: David Hildenbrand <david@redhat.com>
Let size > 0 indicate a promise to write to those bytes.
Check for write watchpoints in the probed range.
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20190823100741.9621-10-david@redhat.com>
[rth: Recompute index after tlb_fill; check TLB_WATCHPOINT.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index f7a414a131..7fc7aa9482 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1086,13 +1086,24 @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
{
uintptr_t index = tlb_index(env, mmu_idx, addr);
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+ target_ulong tlb_addr = tlb_addr_write(entry);
- if (!tlb_hit(tlb_addr_write(entry), addr)) {
- /* TLB entry is for a different page */
+ if (unlikely(!tlb_hit(tlb_addr, addr))) {
if (!VICTIM_TLB_HIT(addr_write, addr)) {
tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
mmu_idx, retaddr);
+ /* TLB resize via tlb_fill may have moved the entry. */
+ index = tlb_index(env, mmu_idx, addr);
+ entry = tlb_entry(env, mmu_idx, addr);
}
+ tlb_addr = tlb_addr_write(entry);
+ }
+
+ /* Handle watchpoints. */
+ if ((tlb_addr & TLB_WATCHPOINT) && size > 0) {
+ cpu_check_watchpoint(env_cpu(env), addr, size,
+ env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
+ BP_MEM_WRITE, retaddr);
}
}
--
2.17.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints
2019-08-24 21:34 [Qemu-devel] [PATCH 0/6] exec: Cleanup watchpoints Richard Henderson
` (5 preceding siblings ...)
2019-08-24 21:34 ` [Qemu-devel] [PATCH 6/6] tcg: Check for watchpoints in probe_write() Richard Henderson
@ 2019-08-28 21:47 ` Richard Henderson
6 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2019-08-28 21:47 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, david
Ping for 5/6, as yet unreviewed.
r~
On 8/24/19 2:34 PM, Richard Henderson wrote:
> As discussed with David earlier this week, the current implementation
> of watchpoints cannot work, at least reliably. We are raising an
> exception out of the middle of the i/o access path which does not
> even attempt to unwind the guest cpu state, nor does it have the
> information required to do so.
>
> This moves the implementation to the cputlb helpers. This is a point
> at which we can and do raise exceptions properly.
>
> In addition, this fixes a bug in that unaligned stores were detecting
> watchpoints in the middle of the byte-by-byte operation, which means
> that we didn't signal the watchpoint early enough to avoid state change.
>
>
> r~
>
>
> David Hildenbrand (2):
> exec: Factor out core logic of check_watchpoint()
> tcg: Check for watchpoints in probe_write()
>
> Richard Henderson (4):
> exec: Move user-only watchpoint stubs inline
> cputlb: Fold TLB_RECHECK into TLB_INVALID_MASK
> exec: Factor out cpu_watchpoint_address_matches
> cputlb: Handle watchpoints via TLB_WATCHPOINT
>
> include/exec/cpu-all.h | 8 +-
> include/hw/core/cpu.h | 37 +++++++++
> accel/tcg/cputlb.c | 156 ++++++++++++++++++++++++--------------
> exec.c | 167 +++++++++--------------------------------
> 4 files changed, 173 insertions(+), 195 deletions(-)
>
^ permalink raw reply [flat|nested] 14+ messages in thread